ldc/driver/config.d
Martin Kinkelin 231d7c1570 Use dmd.root.file for reading ldc2.conf
Thereby preventing to use fopen() on Windows, which expects a narrow
string in the current code page, not necessarily UTF-8.
2019-08-08 22:38:38 +02:00

592 lines
14 KiB
D
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//===-- driver/config.d - LDC config file parsing -----------------*- D -*-===//
//
// LDC the LLVM D compiler
//
// This file is distributed under the BSD-style LDC license. See the LICENSE
// file for details.
//
//===----------------------------------------------------------------------===//
//
// Parsing engine for the LDC config file (ldc2.conf).
//
//===----------------------------------------------------------------------===//
module driver.config;
import core.stdc.ctype;
import core.stdc.stdio;
import core.stdc.string;
class Setting
{
enum Type
{
scalar,
array,
group,
}
this(string name, Type type)
{
_name = name;
_type = type;
}
@property string name() const
{
return _name;
}
@property Type type() const
{
return _type;
}
private string _name;
private Type _type;
}
class ScalarSetting : Setting
{
this(string name, string val)
{
super(name, Type.scalar);
_val = val;
}
@property string val() const
{
return _val;
}
private string _val;
}
class ArraySetting : Setting
{
this(string name, string[] vals)
{
super(name, Type.array);
_vals = vals;
}
@property const(string)[] vals() const
{
return _vals;
}
private string[] _vals;
}
class GroupSetting : Setting
{
this(string name, Setting[] children)
{
super(name, Type.group);
_children = children;
}
@property const(Setting)[] children() const
{
return _children;
}
private Setting[] _children;
}
Setting[] parseConfigFile(const(char)* filename)
{
import dmd.globals : Loc;
import dmd.utils;
auto content = readFile(Loc.initial, filename).extractData();
// skip UTF-8 BOM
if (content.length >= 3 && content[0 .. 3] == "\xEF\xBB\xBF")
content = content[3 .. $];
auto parser = Parser(cast(string) content, cast(string) filename.toDString);
return parser.parseConfig();
}
private:
/+
What follows is a recursive descent parser that reads the following
EBNF grammar.
It is a subset of the libconfig grammar (http://www.hyperrealm.com/libconfig).
config = { ows , setting } , ows ;
setting = (name | string) , (":" | "=") , value , [";" | ","] ;
name = alpha , { alpha | digit | "_" | "-" } ;
value = string | array | group ;
array = "[" , ows ,
{ string , ows , "," , ows } ,
"]" ;
group = "{" , ows , { setting , ows } , "}" ;
string = ( quotstr , { ows , quotstr } ) |
( btstr , { ows, btstr } ) ;
quotstr = '"' , { ? any char but '"', '\n' and '\r' ? | escseq } , '"' ;
escseq = "\" , ["\" | '"' | "r" | "n" | "t" ] ;
btstr = '`' , { ? any char but '`' ? } , '`' ;
alpha = ? any char between "a" and "z" included
or between "A" and "Z" included ? ;
digit = ? any char between "0" and "9" included ? ;
ows = [ ws ] ; (* optional white space *)
ws = ? white space (space, tab, line feed ...) ? ;
Single line comments are also supported. They start with "//" and span until
line feed.
The "//" sequence is however allowed within strings and doesn't need to be
escaped.
White space are significant only within strings.
Physical line feeds are not allowed within strings. To span a string over
multiple lines, use concatenation ("hello " "world" == "hello world").
The following escape sequences are allowed in strings:
- \\
- \"
- \r
- \n
- \t
+/
enum Token
{
name,
assign, // ':' or '='
str,
lbrace, // '{'
rbrace, // '}'
lbracket, // '['
rbracket, // ']'
semicolon, // ';'
comma, // ','
unknown,
eof,
}
string humanReadableToken(in Token tok)
{
final switch(tok)
{
case Token.name: return `"name"`;
case Token.assign: return `':' or '='`;
case Token.str: return `"string"`;
case Token.lbrace: return `'{'`;
case Token.rbrace: return `'}'`;
case Token.lbracket: return `'['`;
case Token.rbracket: return `']'`;
case Token.semicolon: return `';'`;
case Token.comma: return `','`;
case Token.unknown: return `"unknown token"`;
case Token.eof: return `"end of file"`;
}
}
struct Parser
{
string filename;
string content;
int index;
int lineNum = 1;
char lastChar = ' ';
static struct Ahead
{
Token tok;
string s;
}
Ahead ahead;
Ahead* aheadp;
this(string content, string filename = null)
{
this.filename = filename;
this.content = content;
}
void error(in string msg)
{
enum fmt = "Error while reading config file: %.*s\nline %d: %.*s";
char[1024] buf;
auto len = snprintf(buf.ptr, buf.length, fmt, filename.length,
filename.ptr, lineNum, msg.length, msg.ptr);
throw new Exception(buf[0 .. len].idup);
}
char getChar()
{
if (index == content.length)
return '\0';
const c = content[index++];
if (c == '\n')
++lineNum;
return c;
}
Token getTok(out string outStr)
{
if (aheadp)
{
immutable tok = aheadp.tok;
outStr = aheadp.s;
aheadp = null;
return tok;
}
while (isspace(lastChar))
{
lastChar = getChar();
}
if (lastChar == '/')
{
lastChar = getChar();
if (lastChar != '/')
{
outStr = "/";
return Token.unknown;
}
do
{
lastChar = getChar();
}
while (lastChar != '\n' && lastChar != '\0');
return getTok(outStr);
}
if (isalpha(lastChar))
{
string name;
do
{
name ~= lastChar;
lastChar = getChar();
}
while (isalnum(lastChar) || lastChar == '_' || lastChar == '-');
outStr = name;
return Token.name;
}
switch (lastChar)
{
case ':':
case '=':
lastChar = getChar();
return Token.assign;
case ';':
lastChar = getChar();
return Token.semicolon;
case ',':
lastChar = getChar();
return Token.comma;
case '{':
lastChar = getChar();
return Token.lbrace;
case '}':
lastChar = getChar();
return Token.rbrace;
case '[':
lastChar = getChar();
return Token.lbracket;
case ']':
lastChar = getChar();
return Token.rbracket;
case '\0':
return Token.eof;
default:
break;
}
if (lastChar == '"')
{
string str;
while (lastChar == '"')
{
while (1)
{
lastChar = getChar();
if (lastChar == '"') break;
if (lastChar == '\n' || lastChar == '\r')
{
error("Unexpected end of line in string literal");
}
else if (lastChar == '\0')
{
error("Unexpected end of file in string literal");
}
if (lastChar == '\\')
{
lastChar = getChar();
switch(lastChar)
{
case '\\':
case '"':
break;
case 'r':
lastChar = '\r';
break;
case 'n':
lastChar = '\n';
break;
case 't':
lastChar = '\t';
break;
default:
error("Unexpected escape sequence: \\" ~ lastChar);
break;
}
}
str ~= lastChar;
}
lastChar = getChar();
while (isspace(lastChar)) lastChar = getChar();
}
outStr = str;
return Token.str;
}
if (lastChar == '`')
{
string str;
while (lastChar == '`')
{
while (1)
{
lastChar = getChar();
if (lastChar == '`') break;
if (lastChar == '\0')
{
error("Unexpected end of file in string literal");
}
str ~= lastChar;
}
lastChar = getChar();
while (isspace(lastChar)) lastChar = getChar();
}
outStr = str;
return Token.str;
}
outStr = [lastChar];
lastChar = getChar();
return Token.unknown;
}
void ungetTok(in Token tok, in string s)
{
assert(!aheadp, "can only have one look ahead");
ahead.tok = tok;
ahead.s = s;
aheadp = &ahead;
}
void unexpectedTokenError(in Token tok, in Token expected, string s)
{
s = s.length ? " ("~s~")" : "";
error("Was expecting token " ~ humanReadableToken(expected) ~
". Got " ~ humanReadableToken(tok) ~ s ~ " instead.");
}
string accept(in Token expected)
{
string s;
immutable tok = getTok(s);
if (tok != expected)
{
unexpectedTokenError(tok, expected, s);
}
return s;
}
Setting[] parseConfig()
{
Setting[] res;
while (1)
{
{
string s;
auto t = getTok(s);
if (t == Token.eof)
{
break;
}
ungetTok(t, s);
}
res ~= parseSetting();
}
return res;
}
Setting parseSetting()
{
string name;
auto t = getTok(name);
if (t != Token.name && t != Token.str)
{
unexpectedTokenError(t, Token.name, name);
assert(false);
}
accept(Token.assign);
Setting res = parseValue(name);
string s;
t = getTok(s);
if (t != Token.semicolon && t != Token.comma)
{
ungetTok(t, s);
}
return res;
}
Setting parseValue(string name)
{
string s;
auto t = getTok(s);
if (t == Token.str)
{
return new ScalarSetting(name, s);
}
else if (t == Token.lbracket)
{
string[] arrVal;
while (1)
{
// get string or rbracket
t = getTok(s);
switch(t)
{
case Token.str:
arrVal ~= s;
break;
case Token.rbracket:
return new ArraySetting(name, arrVal);
default:
unexpectedTokenError(t, Token.str, s);
assert(false);
}
// get comma or rbracket
t = getTok(s);
switch(t)
{
case Token.comma:
break;
case Token.rbracket:
return new ArraySetting(name, arrVal);
default:
unexpectedTokenError(t, Token.comma, s);
assert(false);
}
}
}
else if (t == Token.lbrace)
{
Setting[] grpVal;
while (1)
{
t = getTok(s);
if (t == Token.rbrace)
{
return new GroupSetting(name, grpVal);
}
ungetTok(t, s);
grpVal ~= parseSetting();
}
}
error("Was expecting value.");
assert(false);
}
}
unittest
{
static void testScalar(string input, string expected)
{
auto setting = Parser(input).parseValue(null);
assert(setting.type == Setting.Type.scalar);
assert((cast(ScalarSetting) setting).val == expected);
}
testScalar(`""`, "");
testScalar(`"abc\r\ndef\t\"quoted/\\123\""`,
"abc\r\ndef\t\"quoted/\\123\"");
testScalar(`"concatenated" " multiline"
" strings"`, "concatenated multiline strings");
testScalar("`abc\n\\ //comment \"`",
"abc\n\\ //comment \"");
testScalar(`"Üņïčöđë"`, "Üņïčöđë");
}
unittest
{
static void testArray(string input, string[] expected)
{
auto setting = Parser(input).parseValue(null);
assert(setting.type == Setting.Type.array);
assert((cast(ArraySetting) setting).vals == expected);
}
testArray(`[]`, []);
testArray(`[ "a" ]`, [ "a" ]);
testArray(`[ "a", ]`, [ "a" ]);
testArray(`[ "a", "b" ]`, [ "a", "b" ]);
testArray(`[
// comment
"a",
// comment
"b"
]`, [ "a", "b" ]);
}
unittest
{
enum input =
`// comment
// comment
group-1_2: {};
// comment
"86(_64)?-.*linux\\.?":
{
// comment
scalar = "abc";
// comment
Array_1-2 = [ "a" ];
};
`;
auto settings = Parser(input).parseConfig();
assert(settings.length == 2);
assert(settings[0].name == "group-1_2");
assert(settings[0].type == Setting.Type.group);
assert((cast(GroupSetting) settings[0]).children == []);
assert(settings[1].name == "86(_64)?-.*linux\\.?");
assert(settings[1].type == Setting.Type.group);
auto group2 = cast(GroupSetting) settings[1];
assert(group2.children.length == 2);
assert(group2.children[0].name == "scalar");
assert(group2.children[0].type == Setting.Type.scalar);
assert((cast(ScalarSetting) group2.children[0]).val == "abc");
assert(group2.children[1].name == "Array_1-2");
assert(group2.children[1].type == Setting.Type.array);
assert((cast(ArraySetting) group2.children[1]).vals == [ "a" ]);
}