//===-- driver/config.d - LDC config file parsing -----------------*- D -*-===// // // LDC – the LLVM D compiler // // This file is distributed under the BSD-style LDC license. See the LICENSE // file for details. // //===----------------------------------------------------------------------===// // // Parsing engine for the LDC config file (ldc2.conf). // //===----------------------------------------------------------------------===// module driver.config; import core.stdc.ctype; import core.stdc.stdio; import core.stdc.string; class Setting { enum Type { scalar, array, group, } this(string name, Type type) { _name = name; _type = type; } @property string name() const { return _name; } @property Type type() const { return _type; } private string _name; private Type _type; } class ScalarSetting : Setting { this(string name, string val) { super(name, Type.scalar); _val = val; } @property string val() const { return _val; } private string _val; } class ArraySetting : Setting { this(string name, string[] vals) { super(name, Type.array); _vals = vals; } @property const(string)[] vals() const { return _vals; } private string[] _vals; } class GroupSetting : Setting { this(string name, Setting[] children) { super(name, Type.group); _children = children; } @property const(Setting)[] children() const { return _children; } private Setting[] _children; } Setting[] parseConfigFile(const(char)* filename) { import dmd.globals : Loc; import dmd.utils; auto content = readFile(Loc.initial, filename).extractData(); // skip UTF-8 BOM if (content.length >= 3 && content[0 .. 3] == "\xEF\xBB\xBF") content = content[3 .. $]; auto parser = Parser(cast(string) content, cast(string) filename.toDString); return parser.parseConfig(); } private: /+ What follows is a recursive descent parser that reads the following EBNF grammar. It is a subset of the libconfig grammar (http://www.hyperrealm.com/libconfig). config = { ows , setting } , ows ; setting = (name | string) , (":" | "=") , value , [";" | ","] ; name = alpha , { alpha | digit | "_" | "-" } ; value = string | array | group ; array = "[" , ows , { string , ows , "," , ows } , "]" ; group = "{" , ows , { setting , ows } , "}" ; string = ( quotstr , { ows , quotstr } ) | ( btstr , { ows, btstr } ) ; quotstr = '"' , { ? any char but '"', '\n' and '\r' ? | escseq } , '"' ; escseq = "\" , ["\" | '"' | "r" | "n" | "t" ] ; btstr = '`' , { ? any char but '`' ? } , '`' ; alpha = ? any char between "a" and "z" included or between "A" and "Z" included ? ; digit = ? any char between "0" and "9" included ? ; ows = [ ws ] ; (* optional white space *) ws = ? white space (space, tab, line feed ...) ? ; Single line comments are also supported. They start with "//" and span until line feed. The "//" sequence is however allowed within strings and doesn't need to be escaped. White space are significant only within strings. Physical line feeds are not allowed within strings. To span a string over multiple lines, use concatenation ("hello " "world" == "hello world"). The following escape sequences are allowed in strings: - \\ - \" - \r - \n - \t +/ enum Token { name, assign, // ':' or '=' str, lbrace, // '{' rbrace, // '}' lbracket, // '[' rbracket, // ']' semicolon, // ';' comma, // ',' unknown, eof, } string humanReadableToken(in Token tok) { final switch(tok) { case Token.name: return `"name"`; case Token.assign: return `':' or '='`; case Token.str: return `"string"`; case Token.lbrace: return `'{'`; case Token.rbrace: return `'}'`; case Token.lbracket: return `'['`; case Token.rbracket: return `']'`; case Token.semicolon: return `';'`; case Token.comma: return `','`; case Token.unknown: return `"unknown token"`; case Token.eof: return `"end of file"`; } } struct Parser { string filename; string content; int index; int lineNum = 1; char lastChar = ' '; static struct Ahead { Token tok; string s; } Ahead ahead; Ahead* aheadp; this(string content, string filename = null) { this.filename = filename; this.content = content; } void error(in string msg) { enum fmt = "Error while reading config file: %.*s\nline %d: %.*s"; char[1024] buf; auto len = snprintf(buf.ptr, buf.length, fmt, filename.length, filename.ptr, lineNum, msg.length, msg.ptr); throw new Exception(buf[0 .. len].idup); } char getChar() { if (index == content.length) return '\0'; const c = content[index++]; if (c == '\n') ++lineNum; return c; } Token getTok(out string outStr) { if (aheadp) { immutable tok = aheadp.tok; outStr = aheadp.s; aheadp = null; return tok; } while (isspace(lastChar)) { lastChar = getChar(); } if (lastChar == '/') { lastChar = getChar(); if (lastChar != '/') { outStr = "/"; return Token.unknown; } do { lastChar = getChar(); } while (lastChar != '\n' && lastChar != '\0'); return getTok(outStr); } if (isalpha(lastChar)) { string name; do { name ~= lastChar; lastChar = getChar(); } while (isalnum(lastChar) || lastChar == '_' || lastChar == '-'); outStr = name; return Token.name; } switch (lastChar) { case ':': case '=': lastChar = getChar(); return Token.assign; case ';': lastChar = getChar(); return Token.semicolon; case ',': lastChar = getChar(); return Token.comma; case '{': lastChar = getChar(); return Token.lbrace; case '}': lastChar = getChar(); return Token.rbrace; case '[': lastChar = getChar(); return Token.lbracket; case ']': lastChar = getChar(); return Token.rbracket; case '\0': return Token.eof; default: break; } if (lastChar == '"') { string str; while (lastChar == '"') { while (1) { lastChar = getChar(); if (lastChar == '"') break; if (lastChar == '\n' || lastChar == '\r') { error("Unexpected end of line in string literal"); } else if (lastChar == '\0') { error("Unexpected end of file in string literal"); } if (lastChar == '\\') { lastChar = getChar(); switch(lastChar) { case '\\': case '"': break; case 'r': lastChar = '\r'; break; case 'n': lastChar = '\n'; break; case 't': lastChar = '\t'; break; default: error("Unexpected escape sequence: \\" ~ lastChar); break; } } str ~= lastChar; } lastChar = getChar(); while (isspace(lastChar)) lastChar = getChar(); } outStr = str; return Token.str; } if (lastChar == '`') { string str; while (lastChar == '`') { while (1) { lastChar = getChar(); if (lastChar == '`') break; if (lastChar == '\0') { error("Unexpected end of file in string literal"); } str ~= lastChar; } lastChar = getChar(); while (isspace(lastChar)) lastChar = getChar(); } outStr = str; return Token.str; } outStr = [lastChar]; lastChar = getChar(); return Token.unknown; } void ungetTok(in Token tok, in string s) { assert(!aheadp, "can only have one look ahead"); ahead.tok = tok; ahead.s = s; aheadp = &ahead; } void unexpectedTokenError(in Token tok, in Token expected, string s) { s = s.length ? " ("~s~")" : ""; error("Was expecting token " ~ humanReadableToken(expected) ~ ". Got " ~ humanReadableToken(tok) ~ s ~ " instead."); } string accept(in Token expected) { string s; immutable tok = getTok(s); if (tok != expected) { unexpectedTokenError(tok, expected, s); } return s; } Setting[] parseConfig() { Setting[] res; while (1) { { string s; auto t = getTok(s); if (t == Token.eof) { break; } ungetTok(t, s); } res ~= parseSetting(); } return res; } Setting parseSetting() { string name; auto t = getTok(name); if (t != Token.name && t != Token.str) { unexpectedTokenError(t, Token.name, name); assert(false); } accept(Token.assign); Setting res = parseValue(name); string s; t = getTok(s); if (t != Token.semicolon && t != Token.comma) { ungetTok(t, s); } return res; } Setting parseValue(string name) { string s; auto t = getTok(s); if (t == Token.str) { return new ScalarSetting(name, s); } else if (t == Token.lbracket) { string[] arrVal; while (1) { // get string or rbracket t = getTok(s); switch(t) { case Token.str: arrVal ~= s; break; case Token.rbracket: return new ArraySetting(name, arrVal); default: unexpectedTokenError(t, Token.str, s); assert(false); } // get comma or rbracket t = getTok(s); switch(t) { case Token.comma: break; case Token.rbracket: return new ArraySetting(name, arrVal); default: unexpectedTokenError(t, Token.comma, s); assert(false); } } } else if (t == Token.lbrace) { Setting[] grpVal; while (1) { t = getTok(s); if (t == Token.rbrace) { return new GroupSetting(name, grpVal); } ungetTok(t, s); grpVal ~= parseSetting(); } } error("Was expecting value."); assert(false); } } unittest { static void testScalar(string input, string expected) { auto setting = Parser(input).parseValue(null); assert(setting.type == Setting.Type.scalar); assert((cast(ScalarSetting) setting).val == expected); } testScalar(`""`, ""); testScalar(`"abc\r\ndef\t\"quoted/\\123\""`, "abc\r\ndef\t\"quoted/\\123\""); testScalar(`"concatenated" " multiline" " strings"`, "concatenated multiline strings"); testScalar("`abc\n\\ //comment \"`", "abc\n\\ //comment \""); testScalar(`"Üņïčöđë"`, "Üņïčöđë"); } unittest { static void testArray(string input, string[] expected) { auto setting = Parser(input).parseValue(null); assert(setting.type == Setting.Type.array); assert((cast(ArraySetting) setting).vals == expected); } testArray(`[]`, []); testArray(`[ "a" ]`, [ "a" ]); testArray(`[ "a", ]`, [ "a" ]); testArray(`[ "a", "b" ]`, [ "a", "b" ]); testArray(`[ // comment "a", // comment "b" ]`, [ "a", "b" ]); } unittest { enum input = `// comment // comment group-1_2: {}; // comment "86(_64)?-.*linux\\.?": { // comment scalar = "abc"; // comment Array_1-2 = [ "a" ]; }; `; auto settings = Parser(input).parseConfig(); assert(settings.length == 2); assert(settings[0].name == "group-1_2"); assert(settings[0].type == Setting.Type.group); assert((cast(GroupSetting) settings[0]).children == []); assert(settings[1].name == "86(_64)?-.*linux\\.?"); assert(settings[1].type == Setting.Type.group); auto group2 = cast(GroupSetting) settings[1]; assert(group2.children.length == 2); assert(group2.children[0].name == "scalar"); assert(group2.children[0].type == Setting.Type.scalar); assert((cast(ScalarSetting) group2.children[0]).val == "abc"); assert(group2.children[1].name == "Array_1-2"); assert(group2.children[1].type == Setting.Type.array); assert((cast(ArraySetting) group2.children[1]).vals == [ "a" ]); }