mirror of
https://github.com/ldc-developers/ldc.git
synced 2025-05-06 19:06:02 +03:00

Thereby preventing to use fopen() on Windows, which expects a narrow string in the current code page, not necessarily UTF-8.
592 lines
14 KiB
D
592 lines
14 KiB
D
//===-- driver/config.d - LDC config file parsing -----------------*- D -*-===//
|
||
//
|
||
// LDC – the LLVM D compiler
|
||
//
|
||
// This file is distributed under the BSD-style LDC license. See the LICENSE
|
||
// file for details.
|
||
//
|
||
//===----------------------------------------------------------------------===//
|
||
//
|
||
// Parsing engine for the LDC config file (ldc2.conf).
|
||
//
|
||
//===----------------------------------------------------------------------===//
|
||
module driver.config;
|
||
|
||
import core.stdc.ctype;
|
||
import core.stdc.stdio;
|
||
import core.stdc.string;
|
||
|
||
|
||
class Setting
|
||
{
|
||
enum Type
|
||
{
|
||
scalar,
|
||
array,
|
||
group,
|
||
}
|
||
|
||
this(string name, Type type)
|
||
{
|
||
_name = name;
|
||
_type = type;
|
||
}
|
||
|
||
@property string name() const
|
||
{
|
||
return _name;
|
||
}
|
||
|
||
@property Type type() const
|
||
{
|
||
return _type;
|
||
}
|
||
|
||
private string _name;
|
||
private Type _type;
|
||
}
|
||
|
||
|
||
class ScalarSetting : Setting
|
||
{
|
||
this(string name, string val)
|
||
{
|
||
super(name, Type.scalar);
|
||
_val = val;
|
||
}
|
||
|
||
@property string val() const
|
||
{
|
||
return _val;
|
||
}
|
||
|
||
private string _val;
|
||
}
|
||
|
||
|
||
class ArraySetting : Setting
|
||
{
|
||
this(string name, string[] vals)
|
||
{
|
||
super(name, Type.array);
|
||
_vals = vals;
|
||
}
|
||
|
||
@property const(string)[] vals() const
|
||
{
|
||
return _vals;
|
||
}
|
||
|
||
private string[] _vals;
|
||
}
|
||
|
||
class GroupSetting : Setting
|
||
{
|
||
this(string name, Setting[] children)
|
||
{
|
||
super(name, Type.group);
|
||
_children = children;
|
||
}
|
||
|
||
@property const(Setting)[] children() const
|
||
{
|
||
return _children;
|
||
}
|
||
|
||
private Setting[] _children;
|
||
}
|
||
|
||
|
||
Setting[] parseConfigFile(const(char)* filename)
|
||
{
|
||
import dmd.globals : Loc;
|
||
import dmd.utils;
|
||
|
||
auto content = readFile(Loc.initial, filename).extractData();
|
||
|
||
// skip UTF-8 BOM
|
||
if (content.length >= 3 && content[0 .. 3] == "\xEF\xBB\xBF")
|
||
content = content[3 .. $];
|
||
|
||
auto parser = Parser(cast(string) content, cast(string) filename.toDString);
|
||
return parser.parseConfig();
|
||
}
|
||
|
||
|
||
private:
|
||
|
||
/+
|
||
|
||
What follows is a recursive descent parser that reads the following
|
||
EBNF grammar.
|
||
It is a subset of the libconfig grammar (http://www.hyperrealm.com/libconfig).
|
||
|
||
config = { ows , setting } , ows ;
|
||
setting = (name | string) , (":" | "=") , value , [";" | ","] ;
|
||
name = alpha , { alpha | digit | "_" | "-" } ;
|
||
value = string | array | group ;
|
||
array = "[" , ows ,
|
||
{ string , ows , "," , ows } ,
|
||
"]" ;
|
||
group = "{" , ows , { setting , ows } , "}" ;
|
||
string = ( quotstr , { ows , quotstr } ) |
|
||
( btstr , { ows, btstr } ) ;
|
||
quotstr = '"' , { ? any char but '"', '\n' and '\r' ? | escseq } , '"' ;
|
||
escseq = "\" , ["\" | '"' | "r" | "n" | "t" ] ;
|
||
btstr = '`' , { ? any char but '`' ? } , '`' ;
|
||
alpha = ? any char between "a" and "z" included
|
||
or between "A" and "Z" included ? ;
|
||
digit = ? any char between "0" and "9" included ? ;
|
||
ows = [ ws ] ; (* optional white space *)
|
||
ws = ? white space (space, tab, line feed ...) ? ;
|
||
|
||
|
||
Single line comments are also supported. They start with "//" and span until
|
||
line feed.
|
||
The "//" sequence is however allowed within strings and doesn't need to be
|
||
escaped.
|
||
White space are significant only within strings.
|
||
Physical line feeds are not allowed within strings. To span a string over
|
||
multiple lines, use concatenation ("hello " "world" == "hello world").
|
||
The following escape sequences are allowed in strings:
|
||
- \\
|
||
- \"
|
||
- \r
|
||
- \n
|
||
- \t
|
||
|
||
+/
|
||
|
||
enum Token
|
||
{
|
||
name,
|
||
assign, // ':' or '='
|
||
str,
|
||
lbrace, // '{'
|
||
rbrace, // '}'
|
||
lbracket, // '['
|
||
rbracket, // ']'
|
||
semicolon, // ';'
|
||
comma, // ','
|
||
unknown,
|
||
eof,
|
||
}
|
||
|
||
string humanReadableToken(in Token tok)
|
||
{
|
||
final switch(tok)
|
||
{
|
||
case Token.name: return `"name"`;
|
||
case Token.assign: return `':' or '='`;
|
||
case Token.str: return `"string"`;
|
||
case Token.lbrace: return `'{'`;
|
||
case Token.rbrace: return `'}'`;
|
||
case Token.lbracket: return `'['`;
|
||
case Token.rbracket: return `']'`;
|
||
case Token.semicolon: return `';'`;
|
||
case Token.comma: return `','`;
|
||
case Token.unknown: return `"unknown token"`;
|
||
case Token.eof: return `"end of file"`;
|
||
}
|
||
}
|
||
|
||
struct Parser
|
||
{
|
||
string filename;
|
||
string content;
|
||
int index;
|
||
int lineNum = 1;
|
||
|
||
char lastChar = ' ';
|
||
|
||
static struct Ahead
|
||
{
|
||
Token tok;
|
||
string s;
|
||
}
|
||
Ahead ahead;
|
||
Ahead* aheadp;
|
||
|
||
this(string content, string filename = null)
|
||
{
|
||
this.filename = filename;
|
||
this.content = content;
|
||
}
|
||
|
||
void error(in string msg)
|
||
{
|
||
enum fmt = "Error while reading config file: %.*s\nline %d: %.*s";
|
||
char[1024] buf;
|
||
auto len = snprintf(buf.ptr, buf.length, fmt, filename.length,
|
||
filename.ptr, lineNum, msg.length, msg.ptr);
|
||
throw new Exception(buf[0 .. len].idup);
|
||
}
|
||
|
||
char getChar()
|
||
{
|
||
if (index == content.length)
|
||
return '\0';
|
||
const c = content[index++];
|
||
if (c == '\n')
|
||
++lineNum;
|
||
return c;
|
||
}
|
||
|
||
Token getTok(out string outStr)
|
||
{
|
||
if (aheadp)
|
||
{
|
||
immutable tok = aheadp.tok;
|
||
outStr = aheadp.s;
|
||
aheadp = null;
|
||
return tok;
|
||
}
|
||
|
||
while (isspace(lastChar))
|
||
{
|
||
lastChar = getChar();
|
||
}
|
||
|
||
if (lastChar == '/')
|
||
{
|
||
lastChar = getChar();
|
||
if (lastChar != '/')
|
||
{
|
||
outStr = "/";
|
||
return Token.unknown;
|
||
}
|
||
|
||
do
|
||
{
|
||
lastChar = getChar();
|
||
}
|
||
while (lastChar != '\n' && lastChar != '\0');
|
||
return getTok(outStr);
|
||
}
|
||
|
||
if (isalpha(lastChar))
|
||
{
|
||
string name;
|
||
do
|
||
{
|
||
name ~= lastChar;
|
||
lastChar = getChar();
|
||
}
|
||
while (isalnum(lastChar) || lastChar == '_' || lastChar == '-');
|
||
outStr = name;
|
||
return Token.name;
|
||
}
|
||
|
||
switch (lastChar)
|
||
{
|
||
case ':':
|
||
case '=':
|
||
lastChar = getChar();
|
||
return Token.assign;
|
||
case ';':
|
||
lastChar = getChar();
|
||
return Token.semicolon;
|
||
case ',':
|
||
lastChar = getChar();
|
||
return Token.comma;
|
||
case '{':
|
||
lastChar = getChar();
|
||
return Token.lbrace;
|
||
case '}':
|
||
lastChar = getChar();
|
||
return Token.rbrace;
|
||
case '[':
|
||
lastChar = getChar();
|
||
return Token.lbracket;
|
||
case ']':
|
||
lastChar = getChar();
|
||
return Token.rbracket;
|
||
case '\0':
|
||
return Token.eof;
|
||
default:
|
||
break;
|
||
}
|
||
|
||
if (lastChar == '"')
|
||
{
|
||
string str;
|
||
while (lastChar == '"')
|
||
{
|
||
while (1)
|
||
{
|
||
lastChar = getChar();
|
||
if (lastChar == '"') break;
|
||
if (lastChar == '\n' || lastChar == '\r')
|
||
{
|
||
error("Unexpected end of line in string literal");
|
||
}
|
||
else if (lastChar == '\0')
|
||
{
|
||
error("Unexpected end of file in string literal");
|
||
}
|
||
if (lastChar == '\\')
|
||
{
|
||
lastChar = getChar();
|
||
switch(lastChar)
|
||
{
|
||
case '\\':
|
||
case '"':
|
||
break;
|
||
case 'r':
|
||
lastChar = '\r';
|
||
break;
|
||
case 'n':
|
||
lastChar = '\n';
|
||
break;
|
||
case 't':
|
||
lastChar = '\t';
|
||
break;
|
||
default:
|
||
error("Unexpected escape sequence: \\" ~ lastChar);
|
||
break;
|
||
}
|
||
}
|
||
str ~= lastChar;
|
||
}
|
||
lastChar = getChar();
|
||
while (isspace(lastChar)) lastChar = getChar();
|
||
}
|
||
|
||
outStr = str;
|
||
return Token.str;
|
||
}
|
||
|
||
if (lastChar == '`')
|
||
{
|
||
string str;
|
||
while (lastChar == '`')
|
||
{
|
||
while (1)
|
||
{
|
||
lastChar = getChar();
|
||
if (lastChar == '`') break;
|
||
if (lastChar == '\0')
|
||
{
|
||
error("Unexpected end of file in string literal");
|
||
}
|
||
str ~= lastChar;
|
||
}
|
||
lastChar = getChar();
|
||
while (isspace(lastChar)) lastChar = getChar();
|
||
}
|
||
|
||
outStr = str;
|
||
return Token.str;
|
||
}
|
||
|
||
outStr = [lastChar];
|
||
lastChar = getChar();
|
||
return Token.unknown;
|
||
}
|
||
|
||
void ungetTok(in Token tok, in string s)
|
||
{
|
||
assert(!aheadp, "can only have one look ahead");
|
||
ahead.tok = tok;
|
||
ahead.s = s;
|
||
aheadp = &ahead;
|
||
}
|
||
|
||
void unexpectedTokenError(in Token tok, in Token expected, string s)
|
||
{
|
||
s = s.length ? " ("~s~")" : "";
|
||
error("Was expecting token " ~ humanReadableToken(expected) ~
|
||
". Got " ~ humanReadableToken(tok) ~ s ~ " instead.");
|
||
}
|
||
|
||
string accept(in Token expected)
|
||
{
|
||
string s;
|
||
immutable tok = getTok(s);
|
||
if (tok != expected)
|
||
{
|
||
unexpectedTokenError(tok, expected, s);
|
||
}
|
||
return s;
|
||
}
|
||
|
||
Setting[] parseConfig()
|
||
{
|
||
Setting[] res;
|
||
while (1)
|
||
{
|
||
{
|
||
string s;
|
||
auto t = getTok(s);
|
||
if (t == Token.eof)
|
||
{
|
||
break;
|
||
}
|
||
ungetTok(t, s);
|
||
}
|
||
res ~= parseSetting();
|
||
}
|
||
return res;
|
||
}
|
||
|
||
Setting parseSetting()
|
||
{
|
||
string name;
|
||
auto t = getTok(name);
|
||
if (t != Token.name && t != Token.str)
|
||
{
|
||
unexpectedTokenError(t, Token.name, name);
|
||
assert(false);
|
||
}
|
||
|
||
accept(Token.assign);
|
||
|
||
Setting res = parseValue(name);
|
||
|
||
string s;
|
||
t = getTok(s);
|
||
if (t != Token.semicolon && t != Token.comma)
|
||
{
|
||
ungetTok(t, s);
|
||
}
|
||
|
||
return res;
|
||
}
|
||
|
||
Setting parseValue(string name)
|
||
{
|
||
string s;
|
||
auto t = getTok(s);
|
||
if (t == Token.str)
|
||
{
|
||
return new ScalarSetting(name, s);
|
||
}
|
||
else if (t == Token.lbracket)
|
||
{
|
||
string[] arrVal;
|
||
while (1)
|
||
{
|
||
// get string or rbracket
|
||
t = getTok(s);
|
||
switch(t)
|
||
{
|
||
case Token.str:
|
||
arrVal ~= s;
|
||
break;
|
||
case Token.rbracket:
|
||
return new ArraySetting(name, arrVal);
|
||
default:
|
||
unexpectedTokenError(t, Token.str, s);
|
||
assert(false);
|
||
}
|
||
|
||
// get comma or rbracket
|
||
t = getTok(s);
|
||
switch(t)
|
||
{
|
||
case Token.comma:
|
||
break;
|
||
case Token.rbracket:
|
||
return new ArraySetting(name, arrVal);
|
||
default:
|
||
unexpectedTokenError(t, Token.comma, s);
|
||
assert(false);
|
||
}
|
||
}
|
||
}
|
||
else if (t == Token.lbrace)
|
||
{
|
||
Setting[] grpVal;
|
||
while (1)
|
||
{
|
||
t = getTok(s);
|
||
if (t == Token.rbrace)
|
||
{
|
||
return new GroupSetting(name, grpVal);
|
||
}
|
||
ungetTok(t, s);
|
||
grpVal ~= parseSetting();
|
||
}
|
||
}
|
||
error("Was expecting value.");
|
||
assert(false);
|
||
}
|
||
}
|
||
|
||
unittest
|
||
{
|
||
static void testScalar(string input, string expected)
|
||
{
|
||
auto setting = Parser(input).parseValue(null);
|
||
assert(setting.type == Setting.Type.scalar);
|
||
assert((cast(ScalarSetting) setting).val == expected);
|
||
}
|
||
|
||
testScalar(`""`, "");
|
||
testScalar(`"abc\r\ndef\t\"quoted/\\123\""`,
|
||
"abc\r\ndef\t\"quoted/\\123\"");
|
||
testScalar(`"concatenated" " multiline"
|
||
" strings"`, "concatenated multiline strings");
|
||
testScalar("`abc\n\\ //comment \"`",
|
||
"abc\n\\ //comment \"");
|
||
testScalar(`"Üņïčöđë"`, "Üņïčöđë");
|
||
}
|
||
|
||
unittest
|
||
{
|
||
static void testArray(string input, string[] expected)
|
||
{
|
||
auto setting = Parser(input).parseValue(null);
|
||
assert(setting.type == Setting.Type.array);
|
||
assert((cast(ArraySetting) setting).vals == expected);
|
||
}
|
||
|
||
testArray(`[]`, []);
|
||
testArray(`[ "a" ]`, [ "a" ]);
|
||
testArray(`[ "a", ]`, [ "a" ]);
|
||
testArray(`[ "a", "b" ]`, [ "a", "b" ]);
|
||
testArray(`[
|
||
// comment
|
||
"a",
|
||
// comment
|
||
"b"
|
||
]`, [ "a", "b" ]);
|
||
}
|
||
|
||
unittest
|
||
{
|
||
enum input =
|
||
`// comment
|
||
|
||
// comment
|
||
group-1_2: {};
|
||
// comment
|
||
"86(_64)?-.*linux\\.?":
|
||
{
|
||
// comment
|
||
scalar = "abc";
|
||
// comment
|
||
Array_1-2 = [ "a" ];
|
||
};
|
||
`;
|
||
|
||
auto settings = Parser(input).parseConfig();
|
||
assert(settings.length == 2);
|
||
|
||
assert(settings[0].name == "group-1_2");
|
||
assert(settings[0].type == Setting.Type.group);
|
||
assert((cast(GroupSetting) settings[0]).children == []);
|
||
|
||
assert(settings[1].name == "86(_64)?-.*linux\\.?");
|
||
assert(settings[1].type == Setting.Type.group);
|
||
auto group2 = cast(GroupSetting) settings[1];
|
||
assert(group2.children.length == 2);
|
||
|
||
assert(group2.children[0].name == "scalar");
|
||
assert(group2.children[0].type == Setting.Type.scalar);
|
||
assert((cast(ScalarSetting) group2.children[0]).val == "abc");
|
||
|
||
assert(group2.children[1].name == "Array_1-2");
|
||
assert(group2.children[1].type == Setting.Type.array);
|
||
assert((cast(ArraySetting) group2.children[1]).vals == [ "a" ]);
|
||
}
|