mirror of https://github.com/buggins/dlangide.git
3002 lines
97 KiB
D
3002 lines
97 KiB
D
module ddc.lexer.tokenizer;
|
||
|
||
import ddc.lexer.textsource;
|
||
import ddc.lexer.exceptions;
|
||
|
||
import std.stdio;
|
||
import std.datetime;
|
||
import std.conv;
|
||
import std.utf;
|
||
import std.math;
|
||
|
||
enum TokenType : ubyte {
|
||
EOF,
|
||
//EOL,
|
||
WHITESPACE,
|
||
COMMENT,
|
||
IDENTIFIER,
|
||
STRING,
|
||
CHARACTER,
|
||
INTEGER,
|
||
FLOAT,
|
||
KEYWORD,
|
||
OP,
|
||
INVALID
|
||
}
|
||
|
||
// table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _
|
||
// max code is 0xd7ff
|
||
//1728
|
||
const uint[1728] UNIVERSAL_ALPHA_FLAGS = [
|
||
0x00000000,0x00000000,0x87fffffe,0x07fffffe,0x00000000,0x04a00400,0xff7fffff,0xff7fffff,// 0000-00ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xfc3fffff,// 0100-01ff
|
||
0x00ffffff,0x00000000,0xffff0000,0xffffffff,0xffffffff,0xe9ff01ff,0x00030003,0x0000001f,// 0200-02ff
|
||
0x00000000,0x00000000,0x00000000,0x04000000,0xffffd740,0xfffffffb,0x547f7fff,0x000ffffd,// 0300-03ff
|
||
0xffffdffe,0xffffffff,0xdffeffff,0xffffffff,0xffff0003,0xffffffff,0xffff199f,0x033fcfff,// 0400-04ff
|
||
0x00000000,0xfffe0000,0x027fffff,0xfffffffe,0x000000ff,0xbbff0000,0xffff0006,0x000707ff,// 0500-05ff
|
||
0x00000000,0x07fffffe,0x0007ffff,0xffff03ff,0xffffffff,0x7cffffff,0x1fff7fff,0x03ff3de0,// 0600-06ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 0700-07ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 0800-08ff
|
||
0xffffffee,0xe3ffffff,0xff073fff,0x0000ffcf,0xfff99fee,0xc3c5fdff,0xb000399f,0x0003ffcf,// 0900-09ff
|
||
0xfff987e4,0xc36dfdff,0x5e003987,0x0010ffc0,0xfffbafee,0xe3edfdff,0x00013bbf,0x0000ffc1,// 0a00-0aff
|
||
0xfff99fee,0xe3cdfdff,0xb000398f,0x0000ffc3,0xd63dc7ec,0xc3bfc718,0x00003dc7,0x0000ff80,// 0b00-0bff
|
||
0xfffddfee,0xc3effdff,0x00003ddf,0x0000ffc3,0xfffddfec,0xc3effdff,0x40003ddf,0x0000ffc3,// 0c00-0cff
|
||
0xfffddfec,0xc3fffdff,0x00003dcf,0x0000ffc3,0x00000000,0x00000000,0x00000000,0x00000000,// 0d00-0dff
|
||
0xfffffffe,0x07ffffff,0x0fffffff,0x00000000,0xfef02596,0x3bff6cae,0x33ff3f5f,0x00000000,// 0e00-0eff
|
||
0x03000001,0xc2afffff,0xfffffeff,0xfffe03ff,0xfebf0fdf,0x02fe3fff,0x00000000,0x00000000,// 0f00-0fff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0xffffffff,0xffff003f,0x007fffff,// 1000-10ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1100-11ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1200-12ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1300-13ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1400-14ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1500-15ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1600-16ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1700-17ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1800-18ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1900-19ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1a00-1aff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1b00-1bff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1c00-1cff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 1d00-1dff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0fffffff,0xffffffff,0xffffffff,0x03ffffff,// 1e00-1eff
|
||
0x3f3fffff,0xffffffff,0xaaff3f3f,0x3fffffff,0xffffffff,0x5fdfffff,0x0fcf1fdc,0x1fdc1fff,// 1f00-1fff
|
||
0x00000000,0x80000000,0x00000001,0x80000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2000-20ff
|
||
0x3f2ffc84,0x01fbfd50,0x00000000,0xffffffff,0x00000007,0x00000000,0x00000000,0x00000000,// 2100-21ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2200-22ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2300-23ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2400-24ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2500-25ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2600-26ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2700-27ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2800-28ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2900-29ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2a00-2aff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2b00-2bff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2c00-2cff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2d00-2dff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2e00-2eff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 2f00-2fff
|
||
0x000000e0,0x000003fe,0xfffffffe,0xffffffff,0x180fffff,0xfffffffe,0xffffffff,0x187fffff,// 3000-30ff
|
||
0xffffffe0,0x00001fff,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3100-31ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3200-32ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3300-33ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3400-34ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3500-35ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3600-36ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3700-37ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3800-38ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3900-39ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3a00-3aff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3b00-3bff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3c00-3cff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3d00-3dff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3e00-3eff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 3f00-3fff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4000-40ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4100-41ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4200-42ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4300-43ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4400-44ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4500-45ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4600-46ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4700-47ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4800-48ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4900-49ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4a00-4aff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4b00-4bff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4c00-4cff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// 4d00-4dff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 4e00-4eff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 4f00-4fff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5000-50ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5100-51ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5200-52ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5300-53ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5400-54ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5500-55ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5600-56ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5700-57ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5800-58ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5900-59ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5a00-5aff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5b00-5bff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5c00-5cff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5d00-5dff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5e00-5eff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 5f00-5fff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6000-60ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6100-61ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6200-62ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6300-63ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6400-64ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6500-65ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6600-66ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6700-67ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6800-68ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6900-69ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6a00-6aff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6b00-6bff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6c00-6cff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6d00-6dff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6e00-6eff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 6f00-6fff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7000-70ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7100-71ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7200-72ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7300-73ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7400-74ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7500-75ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7600-76ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7700-77ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7800-78ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7900-79ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7a00-7aff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7b00-7bff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7c00-7cff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7d00-7dff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7e00-7eff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 7f00-7fff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8000-80ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8100-81ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8200-82ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8300-83ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8400-84ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8500-85ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8600-86ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8700-87ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8800-88ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8900-89ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8a00-8aff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8b00-8bff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8c00-8cff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8d00-8dff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8e00-8eff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 8f00-8fff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9000-90ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9100-91ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9200-92ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9300-93ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9400-94ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9500-95ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9600-96ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9700-97ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9800-98ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9900-99ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9a00-9aff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9b00-9bff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9c00-9cff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9d00-9dff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// 9e00-9eff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000003f,0x00000000,0x00000000,// 9f00-9fff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a000-a0ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a100-a1ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a200-a2ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a300-a3ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a400-a4ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a500-a5ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a600-a6ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a700-a7ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a800-a8ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// a900-a9ff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// aa00-aaff
|
||
0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,// ab00-abff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ac00-acff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ad00-adff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ae00-aeff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// af00-afff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b000-b0ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b100-b1ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b200-b2ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b300-b3ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b400-b4ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b500-b5ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b600-b6ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b700-b7ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b800-b8ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// b900-b9ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ba00-baff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bb00-bbff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bc00-bcff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bd00-bdff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// be00-beff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// bf00-bfff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c000-c0ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c100-c1ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c200-c2ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c300-c3ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c400-c4ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c500-c5ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c600-c6ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c700-c7ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c800-c8ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// c900-c9ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ca00-caff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cb00-cbff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cc00-ccff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cd00-cdff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// ce00-ceff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// cf00-cfff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d000-d0ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d100-d1ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d200-d2ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d300-d3ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d400-d4ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d500-d5ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,// d600-d6ff
|
||
0xffffffff,0xffffffff,0xffffffff,0xffffffff,0xffffffff,0x0000000f,0x00000000,0x00000000// d700-d7ff
|
||
];
|
||
|
||
/// returns true if character is A..Z, a..z, _ or universal alpha
|
||
bool isUniversalAlpha(dchar ch) pure nothrow {
|
||
return (ch <= 0xd7ff && (UNIVERSAL_ALPHA_FLAGS[ch >> 5] & (1 << (ch & 31))));
|
||
}
|
||
|
||
/// character can present at the beginning of identifier
|
||
bool isIdentStartChar(dchar ch) pure nothrow {
|
||
return isUniversalAlpha(ch);
|
||
}
|
||
|
||
/// character can present in middle of identifier
|
||
bool isIdentMiddleChar(dchar ch) pure nothrow {
|
||
return (ch >= '0' && ch <='9') || isUniversalAlpha(ch);
|
||
}
|
||
|
||
immutable bool ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE = false;
|
||
static if (ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE) {
|
||
bool r(dchar ch, wchar v) pure nothrow {
|
||
return ch == v;
|
||
}
|
||
|
||
bool r(dchar ch, wchar v1, wchar v2) pure nothrow {
|
||
return ch >= v1 && ch <= v2;
|
||
}
|
||
|
||
bool isUniversalAlphaSlow(dchar c) pure nothrow {
|
||
return
|
||
// Latin: 00AA, 00BA, 00C0−00D6, 00D8−00F6, 00F8−01F5, 01FA−0217,
|
||
// 0250−02A8, 1E00−1E9B, 1EA0−1EF9, 207F
|
||
r(c, 0xAA) || r(c, 0x00BA) || r(c, 0x00C0,0x00D6) || r(c, 0x00D8,0x00F6) || r(c, 0x00F8,0x01F5) || r(c, 0x01FA,0x0217)
|
||
|| r(c, 0x0250,0x02A8) || r(c, 0x1E00,0x1E9B) || r(c, 0x1EA0,0x1EF9) || r(c, 0x207F)
|
||
//Greek: 0386, 0388−038A, 038C, 038E−03A1, 03A3−03CE, 03D0−03D6,
|
||
//03DA, 03DC, 03DE, 03E0, 03E2−03F3, 1F00−1F15, 1F18−1F1D,
|
||
//1F20−1F45, 1F48−1F4D, 1F50−1F57, 1F59, 1F5B, 1F5D,
|
||
//1F5F−1F7D, 1F80−1FB4, 1FB6−1FBC, 1FC2−1FC4, 1FC6−1FCC,
|
||
//1FD0−1FD3, 1FD6−1FDB, 1FE0−1FEC, 1FF2−1FF4, 1FF6−1FFC
|
||
|| r(c, 0x0386) || r(c, 0x0388,0x038A) || r(c, 0x038C) || r(c, 0x038E,0x03A1) || r(c, 0x03A3,0x03CE) || r(c, 0x03D0,0x03D6)
|
||
|| r(c, 0x03DA) || r(c, 0x03DC) || r(c, 0x03DE) || r(c, 0x03E0) || r(c, 0x03E2,0x03F3) || r(c, 0x1F00,0x1F15) || r(c, 0x1F18,0x1F1D)
|
||
|| r(c, 0x1F20,0x1F45) || r(c, 0x1F48,0x1F4D) || r(c, 0x1F50,0x1F57) || r(c, 0x1F59) || r(c, 0x1F5B) || r(c, 0x1F5D)
|
||
|| r(c, 0x1F5F,0x1F7D) || r(c, 0x1F80,0x1FB4) || r(c, 0x1FB6,0x1FBC) || r(c, 0x1FC2,0x1FC4) || r(c, 0x1FC6,0x1FCC)
|
||
|| r(c, 0x1FD0,0x1FD3) || r(c, 0x1FD6,0x1FDB) || r(c, 0x1FE0,0x1FEC) || r(c, 0x1FF2,0x1FF4) || r(c, 0x1FF6,0x1FFC)
|
||
//Cyrillic: 0401−040C, 040E−044F, 0451−045C, 045E−0481, 0490−04C4,
|
||
//04C7−04C8, 04CB−04CC, 04D0−04EB, 04EE−04F5, 04F8−04F9
|
||
|| r(c, 0x0401,0x040C) || r(c, 0x040E,0x044F) || r(c, 0x0451,0x045C) || r(c, 0x045E,0x0481) || r(c, 0x0490,0x04C4)
|
||
|| r(c, 0x04C7,0x04C8) || r(c, 0x04CB,0x04CC) || r(c, 0x04D0,0x04EB) || r(c, 0x04EE,0x04F5) || r(c, 0x04F8,0x04F9)
|
||
//Armenian: 0531−0556, 0561−0587
|
||
|| r(c, 0x0531,0x0556) || r(c, 0x0561,0x0587)
|
||
//Hebrew: 05B0−05B9, 05BB−05BD, 05BF, 05C1−05C2, 05D0−05EA,
|
||
//05F0−05F2
|
||
|| r(c, 0x05B0,0x05B9) || r(c, 0x05BB,0x05BD) || r(c, 0x05BF) || r(c, 0x05C1,0x05C2) || r(c, 0x05D0,0x05EA)
|
||
|| r(c, 0x05F0,0x05F2)
|
||
//Arabic: 0621−063A, 0640−0652, 0670−06B7, 06BA−06BE, 06C0−06CE,
|
||
//06D0−06DC, 06E5−06E8, 06EA−06ED
|
||
|| r(c, 0x0621,0x063A) || r(c, 0x0640,0x0652) || r(c, 0x0670,0x06B7) || r(c, 0x06BA,0x06BE) || r(c, 0x06C0,0x06CE)
|
||
|| r(c, 0x06D0,0x06DC) || r(c, 0x06E5,0x06E8) || r(c, 0x06EA,0x06ED)
|
||
//Devanagari: 0901−0903, 0905−0939, 093E−094D, 0950−0952, 0958−0963
|
||
|| r(c, 0x0901,0x0903) || r(c, 0x0905,0x0939) || r(c, 0x093E,0x094D) || r(c, 0x0950,0x0952) || r(c, 0x0958,0x0963)
|
||
//Bengali: 0981−0983, 0985−098C, 098F−0990, 0993−09A8, 09AA−09B0,
|
||
//09B2, 09B6−09B9, 09BE−09C4, 09C7−09C8, 09CB−09CD,
|
||
//09DC−09DD, 09DF−09E3, 09F0−09F1
|
||
|| r(c, 0x0981,0x0983) || r(c, 0x0985,0x098C) || r(c, 0x098F,0x0990) || r(c, 0x0993,0x09A8) || r(c, 0x09AA,0x09B0)
|
||
|| r(c, 0x09B2) || r(c, 0x09B6,0x09B9) || r(c, 0x09BE,0x09C4) || r(c, 0x09C7,0x09C8) || r(c, 0x09CB,0x09CD)
|
||
|| r(c, 0x09DC,0x09DD) || r(c, 0x09DF,0x09E3) || r(c, 0x09F0,0x09F1)
|
||
//Gurmukhi: 0A02, 0A05−0A0A, 0A0F−0A10, 0A13−0A28, 0A2A−0A30,
|
||
//0A32−0A33, 0A35−0A36, 0A38−0A39, 0A3E−0A42, 0A47−0A48,
|
||
//0A4B−0A4D, 0A59−0A5C, 0A5E, 0A74
|
||
|| r(c, 0x0A02) || r(c, 0x0A05,0x0A0A) || r(c, 0x0A0F,0x0A10) || r(c, 0x0A13,0x0A28) || r(c, 0x0A2A,0x0A30)
|
||
|| r(c, 0x0A32,0x0A33) || r(c, 0x0A35,0x0A36) || r(c, 0x0A38,0x0A39) || r(c, 0x0A3E,0x0A42) || r(c, 0x0A47,0x0A48)
|
||
|| r(c, 0x0A4B,0x0A4D) || r(c, 0x0A59,0x0A5C) || r(c, 0x0A5E) || r(c, 0x0A74)
|
||
//Gujarati: 0A81−0A83, 0A85−0A8B, 0A8D, 0A8F−0A91, 0A93−0AA8,
|
||
//0AAA−0AB0, 0AB2−0AB3, 0AB5−0AB9, 0ABD−0AC5,
|
||
//0AC7−0AC9, 0ACB−0ACD, 0AD0, 0AE0
|
||
|| r(c, 0x0A81,0x0A83) || r(c, 0x0A85,0x0A8B) || r(c, 0x0A8D) || r(c, 0x0A8F,0x0A91) || r(c, 0x0A93,0x0AA8)
|
||
|| r(c, 0x0AAA,0x0AB0) || r(c, 0x0AB2,0x0AB3) || r(c, 0x0AB5,0x0AB9) || r(c, 0x0ABD,0x0AC5)
|
||
|| r(c, 0x0AC7,0x0AC9) || r(c, 0x0ACB,0x0ACD) || r(c, 0x0AD0) || r(c, 0x0AE0)
|
||
// Oriya: 0B01−0B03, 0B05−0B0C, 0B0F−0B10, 0B13−0B28, 0B2A−0B30,
|
||
//0B32−0B33, 0B36−0B39, 0B3E−0B43, 0B47−0B48, 0B4B−0B4D,
|
||
//0B5C−0B5D, 0B5F−0B61
|
||
|| r(c, 0x0B01,0x0B03) || r(c, 0x0B05,0x0B0C) || r(c, 0x0B0F,0x0B10) || r(c, 0x0B13,0x0B28) || r(c, 0x0B2A,0x0B30)
|
||
|| r(c, 0x0B32,0x0B33) || r(c, 0x0B36,0x0B39) || r(c, 0x0B3E,0x0B43) || r(c, 0x0B47,0x0B48) || r(c, 0x0B4B,0x0B4D)
|
||
|| r(c, 0x0B5C,0x0B5D) || r(c, 0x0B5F,0x0B61)
|
||
//Tamil: 0B82−0B83, 0B85−0B8A, 0B8E−0B90, 0B92−0B95, 0B99−0B9A,
|
||
//0B9C, 0B9E−0B9F, 0BA3−0BA4, 0BA8−0BAA, 0BAE−0BB5,
|
||
//0BB7−0BB9, 0BBE−0BC2, 0BC6−0BC8, 0BCA−0BCD
|
||
|| r(c, 0x0B82,0x0B83) || r(c, 0x0B85,0x0B8A) || r(c, 0x0B8E,0x0B90) || r(c, 0x0B92,0x0B95) || r(c, 0x0B99,0x0B9A)
|
||
|| r(c, 0x0B9C) || r(c, 0x0B9E,0x0B9F) || r(c, 0x0BA3,0x0BA4) || r(c, 0x0BA8,0x0BAA) || r(c, 0x0BAE,0x0BB5)
|
||
|| r(c, 0x0BB7,0x0BB9) || r(c, 0x0BBE,0x0BC2) || r(c, 0x0BC6,0x0BC8) || r(c, 0x0BCA,0x0BCD)
|
||
//Telugu: 0C01−0C03, 0C05−0C0C, 0C0E−0C10, 0C12−0C28, 0C2A−0C33,
|
||
//0C35−0C39, 0C3E−0C44, 0C46−0C48, 0C4A−0C4D, 0C60−0C61
|
||
|| r(c, 0x0C01,0x0C03) || r(c, 0x0C05,0x0C0C) || r(c, 0x0C0E,0x0C10) || r(c, 0x0C12,0x0C28) || r(c, 0x0C2A,0x0C33)
|
||
|| r(c, 0x0C35,0x0C39) || r(c, 0x0C3E,0x0C44) || r(c, 0x0C46,0x0C48) || r(c, 0x0C4A,0x0C4D) || r(c, 0x0C60,0x0C61)
|
||
//Kannada: 0C82−0C83, 0C85−0C8C, 0C8E−0C90, 0C92−0CA8, 0CAA−0CB3,
|
||
//0CB5−0CB9, 0CBE−0CC4, 0CC6−0CC8, 0CCA−0CCD, 0CDE,
|
||
//0CE0−0CE1
|
||
|| r(c, 0x0C82,0x0C83) || r(c, 0x0C85,0x0C8C) || r(c, 0x0C8E,0x0C90) || r(c, 0x0C92,0x0CA8) || r(c, 0x0CAA,0x0CB3)
|
||
|| r(c, 0x0CB5,0x0CB9) || r(c, 0x0CBE,0x0CC4) || r(c, 0x0CC6,0x0CC8) || r(c, 0x0CCA,0x0CCD) || r(c, 0x0CDE)
|
||
|| r(c, 0x0CE0,0x0CE1)
|
||
//Malayalam: 0D02−0D03, 0D05−0D0C, 0D0E−0D10, 0D12−0D28, 0D2A−0D39,
|
||
//0D3E−0D43, 0D46−0D48, 0D4A−0D4D, 0D60−0D61
|
||
|| r(c, 0x0D02,0x0D03) || r(c, 0x0D05,0x0D0C) || r(c, 0x0D0E,0x0D10) || r(c, 0x0D12,0x0D28) || r(c, 0x0D2A,0x0D39)
|
||
|| r(c, 0xD3E,0x0D43) || r(c, 0x0D46,0x0D48) || r(c, 0x0D4A,0x0D4D) || r(c, 0x0D60,0x0D61)
|
||
//Thai: 0E01−0E3A, 0E40−0E5B
|
||
|| r(c, 0x0E01,0x0E3A) || r(c, 0x0E40,0x0E5B)
|
||
//Lao: 0E81−0E82, 0E84, 0E87−0E88, 0E8A, 0E8D, 0E94−0E97,
|
||
//0E99−0E9F, 0EA1−0EA3, 0EA5, 0EA7, 0EAA−0EAB,
|
||
//0EAD−0EAE, 0EB0−0EB9, 0EBB−0EBD, 0EC0−0EC4, 0EC6,
|
||
//0EC8−0ECD, 0EDC−0EDD
|
||
|| r(c, 0x0E81,0x0E82) || r(c, 0x0E84) || r(c, 0x0E87,0x0E88) || r(c, 0x0E8A) || r(c, 0x0E8D) || r(c, 0x0E94,0x0E97)
|
||
|| r(c, 0x0E99,0x0E9F) || r(c, 0x0EA1,0x0EA3) || r(c, 0x0EA5) || r(c, 0x0EA7) || r(c, 0x0EAA,0x0EAB)
|
||
|| r(c, 0x0EAD,0x0EAE) || r(c, 0x0EB0,0x0EB9) || r(c, 0x0EBB,0x0EBD) || r(c, 0x0EC0,0x0EC4) || r(c, 0x0EC6)
|
||
|| r(c, 0x0EC8,0x0ECD) || r(c, 0x0EDC,0x0EDD)
|
||
//Tibetan: 0F00, 0F18−0F19, 0F35, 0F37, 0F39, 0F3E−0F47, 0F49−0F69,
|
||
//0F71−0F84, 0F86−0F8B, 0F90−0F95, 0F97, 0F99−0FAD,
|
||
//0FB1−0FB7, 0FB9
|
||
|| r(c, 0x0F00) || r(c, 0x0F18,0x0F19) || r(c, 0x0F35) || r(c, 0x0F37) || r(c, 0x0F39) || r(c, 0x0F3E,0x0F47) || r(c, 0x0F49,0x0F69)
|
||
|| r(c, 0x0F71,0x0F84) || r(c, 0x0F86,0x0F8B) || r(c, 0x0F90,0x0F95) || r(c, 0x0F97) || r(c, 0x0F99,0x0FAD)
|
||
|| r(c, 0x0FB1,0x0FB7) || r(c, 0x0FB9)
|
||
//Georgian: 10A0−10C5, 10D0−10F6
|
||
|| r(c, 0x10A0,0x10C5) || r(c, 0x10D0,0x10F6)
|
||
//Hiragana: 3041−3093, 309B−309C
|
||
|| r(c, 0x3041,0x3093) || r(c, 0x309B,0x309C)
|
||
//Katakana: 30A1−30F6, 30FB−30FC
|
||
|| r(c, 0x30A1,0x30F6) || r(c, 0x30FB,0x30FC)
|
||
//Bopomofo: 3105−312C
|
||
|| r(c, 0x3105,0x312C)
|
||
//CJK Unified Ideographs: 4E00−9FA5
|
||
|| r(c, 0x4E00,0x9FA5)
|
||
//Hangul: AC00−D7A3
|
||
|| r(c, 0xAC00,0xD7A3)
|
||
//Digits: 0660−0669, 06F0−06F9, 0966−096F, 09E6−09EF, 0A66−0A6F,
|
||
//0AE6−0AEF, 0B66−0B6F, 0BE7−0BEF, 0C66−0C6F, 0CE6−0CEF,
|
||
//0D66−0D6F, 0E50−0E59, 0ED0−0ED9, 0F20−0F33
|
||
|| r(c, 0x0660,0x0669) || r(c, 0x06F0,0x06F9) || r(c, 0x0966,0x096F) || r(c, 0x09E6,0x09EF) || r(c, 0x0A66,0x0A6F)
|
||
|| r(c, 0x0AE6,0x0AEF) || r(c, 0x0B66,0x0B6F) || r(c, 0x0BE7,0x0BEF) || r(c, 0x0C66,0x0C6F) || r(c, 0x0CE6,0x0CEF)
|
||
|| r(c, 0x0D66,0x0D6F) || r(c, 0x0E50,0x0E59) || r(c, 0x0ED0,0x0ED9) || r(c, 0x0F20,0x0F33)
|
||
//Special characters: 00B5, 00B7, 02B0−02B8, 02BB, 02BD−02C1, 02D0−02D1,
|
||
//02E0−02E4, 037A, 0559, 093D, 0B3D, 1FBE, 203F−2040, 2102,
|
||
//2107, 210A−2113, 2115, 2118−211D, 2124, 2126, 2128, 212A−2131,
|
||
//2133−2138, 2160−2182, 3005−3007, 3021−3029
|
||
|| r(c, 0x00B5) || r(c, 0x00B7) || r(c, 0x02B0,0x02B8) || r(c, 0x02BB) || r(c, 0x02BD,0x02C1) || r(c, 0x02D0,0x02D1)
|
||
|| r(c, 0x2E0,0x02E4) || r(c, 0x037A) || r(c, 0x0559) || r(c, 0x093D) || r(c, 0x0B3D) || r(c, 0x1FBE) || r(c, 0x203F,0x2040) || r(c, 0x2102)
|
||
|| r(c, 0x2107) || r(c, 0x210A,0x2113) || r(c, 0x2115) || r(c, 0x2118,0x211D) || r(c, 0x2124) || r(c, 0x2126) || r(c, 0x2128) || r(c, 0x212A,0x2131)
|
||
|| r(c, 0x2133,0x2138) || r(c, 0x2160,0x2182) || r(c, 0x3005,0x3007) || r(c, 0x3021,0x3029)
|
||
;
|
||
}
|
||
|
||
}
|
||
|
||
unittest {
|
||
|
||
|
||
static if (ENABLE_DUMP_UNIVERSAL_ALPHA_TABLE) {
|
||
immutable uint itemsInRow = 8;
|
||
|
||
uint maxAlpha = 0;
|
||
for (uint i = 0; i < 0x10000; i++) {
|
||
uint ch = i;
|
||
if (isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
|
||
maxAlpha = i;
|
||
}
|
||
maxAlpha = (maxAlpha + itemsInRow * 32 - 1) / (itemsInRow * 32) * (itemsInRow * 32) - 1;
|
||
writeln("// table for fast checking of UniversalAlpha (as per ISO/IEC 9899:1999 Annex E) OR a..z OR A..Z OR _");
|
||
writefln("// max code is 0x%04x", maxAlpha);
|
||
writeln("immutable uint[", (maxAlpha + 1) / 32,"] UNIVERSAL_ALPHA_FLAGS = [");
|
||
for (uint i = 0; i <= maxAlpha; i += 32) {
|
||
if ((i / 32) % itemsInRow == 0)
|
||
write(" ");
|
||
uint flags = 0;
|
||
for (uint j = 0; j < 32; j++) {
|
||
uint ch = i + j;
|
||
bool flag = isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
|
||
if (flag)
|
||
flags |= (1 << j);
|
||
}
|
||
writef("0x%08x", flags);
|
||
if (i != maxAlpha / 32 * 32)
|
||
write(",");
|
||
if ((i / 32) % itemsInRow == itemsInRow - 1)
|
||
writefln("// %04x-%04x", i - itemsInRow * 32 + 1 + 31, i + 31);
|
||
}
|
||
writeln("];");
|
||
|
||
for (uint ch = 0; ch < 0x100000; ch++) {
|
||
bool flag = isUniversalAlphaSlow(ch) || ch == '_' || (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
|
||
bool flag2 = isUniversalAlpha(ch);
|
||
if (flag2 != flag) {
|
||
isUniversalAlpha(ch);
|
||
writefln("universalAlpha test failed for char %06x expeced %d actual %d", ch, flag ? 1 : 0, flag2 ? 1 : 0);
|
||
}
|
||
assert(flag2 == flag);
|
||
}
|
||
}
|
||
}
|
||
|
||
enum OpCode : ubyte {
|
||
NONE, // no op
|
||
DIV, // /
|
||
DIV_EQ, // /=
|
||
DOT, // .
|
||
DOT_DOT, // ..
|
||
DOT_DOT_DOT,// ...
|
||
AND, // &
|
||
AND_EQ, // &=
|
||
LOG_AND, // &&
|
||
OR, // |
|
||
OR_EQ, // |=
|
||
LOG_OR, // ||
|
||
MINUS, // -
|
||
MINUS_EQ, // -=
|
||
MINUS_MINUS,// --
|
||
PLUS, // +
|
||
PLUS_EQ, // +=
|
||
PLUS_PLUS, // ++
|
||
LT, // <
|
||
LT_EQ, // <=
|
||
SHL, // <<
|
||
SHL_EQ, // <<=
|
||
LT_GT, // <>
|
||
NE_EQ, // <>=
|
||
GT, // >
|
||
GT_EQ, // >=
|
||
SHR_EQ, // >>=
|
||
ASR_EQ, // >>>=
|
||
SHR, // >>
|
||
ASR, // >>>
|
||
NOT, // !
|
||
NOT_EQ, // !=
|
||
NOT_LT_GT, // !<>
|
||
NOT_LT_GT_EQ, // !<>=
|
||
NOT_LT, // !<
|
||
NOT_LT_EQ, // !<=
|
||
NOT_GT, // !>
|
||
NOT_GT_EQ, // !>=
|
||
PAR_OPEN, // (
|
||
PAR_CLOSE, // )
|
||
SQ_OPEN, // [
|
||
SQ_CLOSE, // ]
|
||
CURL_OPEN, // {
|
||
CURL_CLOSE, // }
|
||
QUEST, // ?
|
||
COMMA, // ,
|
||
SEMICOLON, // ;
|
||
COLON, // :
|
||
DOLLAR, // $
|
||
EQ, // =
|
||
QE_EQ, // ==
|
||
MUL, // *
|
||
MUL_EQ, // *=
|
||
MOD, // %
|
||
MOD_EQ, // %=
|
||
XOR, // ^
|
||
XOR_EQ, // ^=
|
||
LOG_XOR, // ^^
|
||
LOG_XOR_EQ, // ^^=
|
||
INV, // ~
|
||
INV_EQ, // ~=
|
||
AT, // @
|
||
EQ_GT, // =>
|
||
SHARP // #
|
||
};
|
||
|
||
immutable dstring[] OP_CODE_STRINGS = [
|
||
"",
|
||
"/",
|
||
"/=",
|
||
".",
|
||
"..",
|
||
"...",
|
||
"&",
|
||
"&=",
|
||
"&&",
|
||
"|",
|
||
"|=",
|
||
"||",
|
||
"-",
|
||
"-=",
|
||
"--",
|
||
"+",
|
||
"+=",
|
||
"++",
|
||
"<",
|
||
"<=",
|
||
"<<",
|
||
"<<=",
|
||
"<>",
|
||
"<>=",
|
||
">",
|
||
">=",
|
||
">>=",
|
||
">>>=",
|
||
">>",
|
||
">>>",
|
||
"!",
|
||
"!=",
|
||
"!<>",
|
||
"!<>=",
|
||
"!<",
|
||
"!<=",
|
||
"!>",
|
||
"!>=",
|
||
"(",
|
||
")",
|
||
"[",
|
||
"]",
|
||
"{",
|
||
"}",
|
||
"?",
|
||
",",
|
||
";",
|
||
":",
|
||
"$",
|
||
"=",
|
||
"==",
|
||
"*",
|
||
"*=",
|
||
"%",
|
||
"%=",
|
||
"^",
|
||
"^=",
|
||
"^^",
|
||
"^^=",
|
||
"~",
|
||
"~=",
|
||
"@",
|
||
"=>",
|
||
"#"
|
||
];
|
||
|
||
dstring getOpNameD(OpCode op) pure nothrow {
|
||
return OP_CODE_STRINGS[op];
|
||
};
|
||
|
||
enum Keyword : ubyte {
|
||
NONE,
|
||
ABSTRACT,
|
||
ALIAS,
|
||
ALIGN,
|
||
ASM,
|
||
ASSERT,
|
||
AUTO,
|
||
|
||
BODY,
|
||
BOOL,
|
||
BREAK,
|
||
BYTE,
|
||
|
||
CASE,
|
||
CAST,
|
||
CATCH,
|
||
CDOUBLE,
|
||
CENT,
|
||
CFLOAT,
|
||
CHAR,
|
||
CLASS,
|
||
CONST,
|
||
CONTINUE,
|
||
CREAL,
|
||
|
||
DCHAR,
|
||
DEBUG,
|
||
DEFAULT,
|
||
DELEGATE,
|
||
DELETE,
|
||
DEPRECATED,
|
||
DO,
|
||
DOUBLE,
|
||
|
||
ELSE,
|
||
ENUM,
|
||
EXPORT,
|
||
EXTERN,
|
||
|
||
FALSE,
|
||
FINAL,
|
||
FINALLY,
|
||
FLOAT,
|
||
FOR,
|
||
FOREACH,
|
||
FOREACH_REVERSE,
|
||
FUNCTION,
|
||
|
||
GOTO,
|
||
|
||
IDOUBLE,
|
||
IF,
|
||
IFLOAT,
|
||
IMMUTABLE,
|
||
IMPORT,
|
||
IN,
|
||
INOUT,
|
||
INT,
|
||
INTERFACE,
|
||
INVARIANT,
|
||
IREAL,
|
||
IS,
|
||
|
||
LAZY,
|
||
LONG,
|
||
|
||
MACRO,
|
||
MIXIN,
|
||
MODULE,
|
||
|
||
NEW,
|
||
NOTHROW,
|
||
NULL,
|
||
|
||
OUT,
|
||
OVERRIDE,
|
||
|
||
PACKAGE,
|
||
PRAGMA,
|
||
PRIVATE,
|
||
PROTECTED,
|
||
PUBLIC,
|
||
PURE,
|
||
|
||
REAL,
|
||
REF,
|
||
RETURN,
|
||
|
||
SCOPE,
|
||
SHARED,
|
||
SHORT,
|
||
STATIC,
|
||
STRUCT,
|
||
SUPER,
|
||
SWITCH,
|
||
SYNCHRONIZED,
|
||
|
||
TEMPLATE,
|
||
THIS,
|
||
THROW,
|
||
TRUE,
|
||
TRY,
|
||
TYPEDEF,
|
||
TYPEID,
|
||
TYPEOF,
|
||
|
||
UBYTE,
|
||
UCENT,
|
||
UINT,
|
||
ULONG,
|
||
UNION,
|
||
UNITTEST,
|
||
USHORT,
|
||
|
||
VERSION,
|
||
VOID,
|
||
VOLATILE,
|
||
|
||
WCHAR,
|
||
WHILE,
|
||
WITH,
|
||
|
||
FILE,
|
||
MODULE__,
|
||
LINE,
|
||
FUNCTION__,
|
||
PRETTY_FUNCTION,
|
||
|
||
//Special Token Replaced with
|
||
DATE, // string literal of the date of compilation "mmm dd yyyy"
|
||
EOF, // sets the scanner to the end of the file
|
||
TIME, // string literal of the time of compilation "hh:mm:ss"
|
||
TIMESTAMP, // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
|
||
VENDOR, // Compiler vendor string, such as "Digital Mars D"
|
||
VERSION_, // Compiler version as an integer, such as 2001
|
||
|
||
GSHARED,
|
||
TRAITS,
|
||
VECTOR,
|
||
PARAMETERS,
|
||
|
||
}
|
||
|
||
immutable dstring[] KEYWORD_STRINGS = [
|
||
"",
|
||
"abstract",
|
||
"alias",
|
||
"align",
|
||
"asm",
|
||
"assert",
|
||
"auto",
|
||
|
||
"body",
|
||
"bool",
|
||
"break",
|
||
"byte",
|
||
|
||
"case",
|
||
"cast",
|
||
"catch",
|
||
"cdouble",
|
||
"cent",
|
||
"cfloat",
|
||
"char",
|
||
"class",
|
||
"const",
|
||
"continue",
|
||
"creal",
|
||
|
||
"dchar",
|
||
"debug",
|
||
"default",
|
||
"delegate",
|
||
"delete",
|
||
"deprecated",
|
||
"do",
|
||
"double",
|
||
|
||
"else",
|
||
"enum",
|
||
"export",
|
||
"extern",
|
||
|
||
"false",
|
||
"final",
|
||
"finally",
|
||
"float",
|
||
"for",
|
||
"foreach",
|
||
"foreach_reverse",
|
||
"function",
|
||
|
||
"goto",
|
||
|
||
"idouble",
|
||
"if",
|
||
"ifloat",
|
||
"immutable",
|
||
"import",
|
||
"in",
|
||
"inout",
|
||
"int",
|
||
"interface",
|
||
"invariant",
|
||
"ireal",
|
||
"is",
|
||
|
||
"lazy",
|
||
"long",
|
||
|
||
"macro",
|
||
"mixin",
|
||
"module",
|
||
|
||
"new",
|
||
"nothrow",
|
||
"null",
|
||
|
||
"out",
|
||
"override",
|
||
|
||
"package",
|
||
"pragma",
|
||
"private",
|
||
"protected",
|
||
"public",
|
||
"pure",
|
||
|
||
"real",
|
||
"ref",
|
||
"return",
|
||
|
||
"scope",
|
||
"shared",
|
||
"short",
|
||
"static",
|
||
"struct",
|
||
"super",
|
||
"switch",
|
||
"synchronized",
|
||
|
||
"template",
|
||
"this",
|
||
"throw",
|
||
"true",
|
||
"try",
|
||
"typedef",
|
||
"typeid",
|
||
"typeof",
|
||
|
||
"ubyte",
|
||
"ucent",
|
||
"uint",
|
||
"ulong",
|
||
"union",
|
||
"unittest",
|
||
"ushort",
|
||
|
||
"version",
|
||
"void",
|
||
"volatile",
|
||
|
||
"wchar",
|
||
"while",
|
||
"with",
|
||
|
||
"__FILE__",
|
||
"__MODULE__",
|
||
"__LINE__",
|
||
"__FUNCTION__",
|
||
"__PRETTY_FUNCTION__",
|
||
|
||
//Special Token Replaced with
|
||
"__DATE__", // string literal of the date of compilation "mmm dd yyyy"
|
||
"__EOF__", // sets the scanner to the end of the file
|
||
"__TIME__", // string literal of the time of compilation "hh:mm:ss"
|
||
"__TIMESTAMP__", // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
|
||
"__VENDOR__", // Compiler vendor string, such as "Digital Mars D"
|
||
"__VERSION__", // Compiler version as an integer, such as 2001
|
||
|
||
|
||
"__gshared",
|
||
"__traits",
|
||
"__vector",
|
||
"__parameters"
|
||
];
|
||
|
||
public dstring getKeywordNameD(Keyword keyword) pure nothrow {
|
||
return KEYWORD_STRINGS[keyword];
|
||
};
|
||
|
||
public Keyword findKeyword(Keyword start, Keyword end, dchar * name, int len, ref int pos) pure nothrow {
|
||
for (Keyword i = start; i <= end; i++) {
|
||
dstring s = KEYWORD_STRINGS[i];
|
||
if (s.length > len + 1)
|
||
continue; // too long
|
||
bool found = true;
|
||
for (uint j = 1; j < s.length; j++) {
|
||
if (s[j] != name[j - 1]) {
|
||
found = false;
|
||
break;
|
||
}
|
||
}
|
||
if (found) {
|
||
if (s.length == len - 1 || !isIdentMiddleChar(name[s.length - 1])) {
|
||
pos += s.length - 1;
|
||
return i;
|
||
}
|
||
}
|
||
}
|
||
return Keyword.NONE;
|
||
}
|
||
|
||
/**
|
||
* Token.
|
||
*/
|
||
class Token {
|
||
protected SourceFile _file;
|
||
protected int _line;
|
||
protected int _pos;
|
||
protected TokenType _type;
|
||
/// returns token type
|
||
@property TokenType type() { return _type; }
|
||
/// returns file info for source
|
||
@property SourceFile filename() { return _file; }
|
||
/// returns 1-based source line number of token start
|
||
@property int line() { return _line; }
|
||
/// returns 1-based source line position of token start
|
||
@property int pos() { return _pos; }
|
||
/// returns token text
|
||
@property dchar[] text() { return null; }
|
||
|
||
// number token properties
|
||
@property dchar literalType() { return 0; }
|
||
@property ulong intValue() { return 0; }
|
||
@property bool isUnsigned() { return false; }
|
||
@property ulong isLong() { return false; }
|
||
@property real realValue() { return 0; }
|
||
@property double doubleValue() { return 0; }
|
||
@property float floatValue() { return 0; }
|
||
@property byte precision() { return 0; }
|
||
@property bool isImaginary() { return false; }
|
||
|
||
/// returns opcode ID - for opcode tokens
|
||
@property OpCode opCode() { return OpCode.NONE; }
|
||
/// returns keyword ID - for keyword tokens
|
||
@property Keyword keyword() { return Keyword.NONE; }
|
||
/// returns true if this is documentation comment token
|
||
@property bool isDocumentationComment() { return false; }
|
||
/// returns true if this is multiline
|
||
@property bool isMultilineComment() { return false; }
|
||
|
||
// error handling
|
||
|
||
/// returns true if it's invalid token (can be returned in error tolerant mode of tokenizer)
|
||
@property bool isError() { return type == TokenType.INVALID; }
|
||
/// returns error message if it's invalid token (can be returned in error tolerant mode of tokenizer)
|
||
@property string errorMessage() { return null; }
|
||
/// returns error code if it's invalid token (can be returned in error tolerant mode of tokenizer)
|
||
@property int errorCode() { return 0; }
|
||
/// returns type of token parsing of which has been failed - if it's invalid token (can be returned in error tolerant mode of tokenizer)
|
||
@property TokenType invalidTokenType() { return TokenType.INVALID; }
|
||
|
||
|
||
this(TokenType type) {
|
||
_type = type;
|
||
}
|
||
|
||
this(TokenType type, SourceFile file, int line, int pos) {
|
||
_type = type;
|
||
_file = file;
|
||
_line = line;
|
||
_pos = pos;
|
||
}
|
||
/// set start position for token (line is 1-based, pos is 0-based)
|
||
void setPos(SourceFile file, int line, int pos) {
|
||
_file = file;
|
||
_line = line;
|
||
_pos = pos + 1;
|
||
}
|
||
/// set source file information for token
|
||
void setFile(SourceFile file) {
|
||
_file = file;
|
||
}
|
||
/// set start position for token (line is 1-based, pos is 0-based)
|
||
void setPos(int line, int pos) {
|
||
_line = line;
|
||
_pos = pos + 1;
|
||
}
|
||
|
||
public abstract Token clone();
|
||
public override @property string toString() {
|
||
return "" ~ to!string(_line) ~ ":" ~ to!string(_pos) ~ " " ~ to!string(type) ~ " " ~ to!string(opCode) ~ " " ~ to!string(keyword)
|
||
~" \"" ~ toUTF8(text()) ~ "\"";
|
||
}
|
||
}
|
||
|
||
class EofToken : Token {
|
||
this() {
|
||
super(TokenType.EOF);
|
||
}
|
||
this(SourceFile file, uint line, uint pos) {
|
||
super(TokenType.EOF, file, line, pos);
|
||
}
|
||
override public Token clone() {
|
||
return new EofToken(_file, _line, _pos);
|
||
}
|
||
public override @property string toString() {
|
||
return "EOF";
|
||
}
|
||
}
|
||
|
||
// treat as white space
|
||
//class EolToken : Token {
|
||
// this(string file, uint line, uint pos) {
|
||
// super(TokenType.EOL, file, line, pos);
|
||
// }
|
||
//}
|
||
|
||
/// white space token
|
||
class WhiteSpaceToken : Token {
|
||
this() {
|
||
super(TokenType.WHITESPACE);
|
||
}
|
||
this(SourceFile file, uint line, uint pos) {
|
||
super(TokenType.WHITESPACE, file, line, pos);
|
||
}
|
||
override public Token clone() {
|
||
return new WhiteSpaceToken(_file, _line, _pos);
|
||
}
|
||
public override @property string toString() {
|
||
return "WhiteSpace";
|
||
}
|
||
}
|
||
|
||
class OpToken : Token {
|
||
OpCode _op;
|
||
public @property override OpCode opCode() { return _op; }
|
||
public @property void opCode(OpCode op) { _op = op; }
|
||
public @property override dchar[] text() { return cast(dchar[])getOpNameD(_op); }
|
||
this() {
|
||
super(TokenType.OP);
|
||
}
|
||
this(SourceFile file, uint line, uint pos) {
|
||
super(TokenType.OP, file, line, pos);
|
||
}
|
||
override public Token clone() {
|
||
OpToken res = new OpToken(_file, _line, _pos);
|
||
res._op = _op;
|
||
return res;
|
||
}
|
||
public override @property string toString() {
|
||
return "Op:" ~ to!string(_op);
|
||
}
|
||
}
|
||
|
||
class KeywordToken : Token {
|
||
Keyword _keyword;
|
||
public @property override Keyword keyword() { return _keyword; }
|
||
public @property void keyword(Keyword keyword) { _keyword = keyword; }
|
||
public @property override dchar[] text() { return cast(dchar[])getKeywordNameD(_keyword); }
|
||
this() {
|
||
super(TokenType.KEYWORD);
|
||
}
|
||
this(SourceFile file, uint line, uint pos) {
|
||
super(TokenType.KEYWORD, file, line, pos);
|
||
}
|
||
override public Token clone() {
|
||
KeywordToken res = new KeywordToken(_file, _line, _pos);
|
||
res._keyword = _keyword;
|
||
return res;
|
||
}
|
||
public override @property string toString() {
|
||
return "Keyword:" ~ to!string(_keyword);
|
||
}
|
||
}
|
||
|
||
/// comment token
|
||
class CommentToken : Token {
|
||
protected dchar[] _text;
|
||
protected bool _isDocumentationComment;
|
||
protected bool _isMultilineComment;
|
||
|
||
|
||
override @property bool isDocumentationComment() {
|
||
return _isDocumentationComment;
|
||
}
|
||
|
||
@property void isDocumentationComment(bool f) {
|
||
_isDocumentationComment = f;
|
||
}
|
||
|
||
/// returns true if this is multiline
|
||
override @property bool isMultilineComment() {
|
||
return _isMultilineComment;
|
||
}
|
||
|
||
@property void isMultilineComment(bool f) {
|
||
_isMultilineComment = f;
|
||
}
|
||
|
||
@property override dchar[] text() { return _text; }
|
||
@property void text(dchar[] text) { _text = text; }
|
||
this() {
|
||
super(TokenType.COMMENT);
|
||
}
|
||
this(SourceFile file, uint line, uint pos, dchar[] text) {
|
||
super(TokenType.COMMENT, file, line, pos);
|
||
_text = text;
|
||
}
|
||
override public Token clone() {
|
||
CommentToken res = new CommentToken(_file, _line, _pos, _text.dup);
|
||
res._isDocumentationComment = _isDocumentationComment;
|
||
res._isMultilineComment = _isMultilineComment;
|
||
return res;
|
||
}
|
||
public override @property string toString() {
|
||
return "Comment:" ~ to!string(_text);
|
||
}
|
||
}
|
||
|
||
/// Invalid token holder - for error tolerant parsing
|
||
class InvalidToken : Token {
|
||
protected dchar[] _text;
|
||
protected TokenType _invalidTokenType;
|
||
protected int _errorCode;
|
||
protected string _errorMessage;
|
||
|
||
/// returns error message if it's invalid token (can be returned in error tolerant mode of tokenizer)
|
||
override @property string errorMessage() { return _errorMessage; }
|
||
/// sets error message
|
||
@property void errorMessage(string s) { _errorMessage = s; }
|
||
/// returns error code if it's invalid token (can be returned in error tolerant mode of tokenizer)
|
||
override @property int errorCode() { return _errorCode; }
|
||
/// sets error code
|
||
@property void errorCode(int c) { _errorCode = c; }
|
||
/// returns type of token parsing of which has been failed - if it's invalid token (can be returned in error tolerant mode of tokenizer)
|
||
override @property TokenType invalidTokenType() { return _invalidTokenType; }
|
||
/// sets type of token parsing of which has been failed
|
||
@property void invalidTokenType(TokenType t) { _invalidTokenType = t; }
|
||
|
||
/// text of invalid token
|
||
@property override dchar[] text() { return _text; }
|
||
/// text of invalid token
|
||
@property void text(dchar[] text) { _text = text; }
|
||
|
||
this() {
|
||
super(TokenType.INVALID);
|
||
}
|
||
this(SourceFile file, uint line, uint pos, dchar[] text) {
|
||
super(TokenType.INVALID, file, line, pos);
|
||
_text = text;
|
||
}
|
||
override Token clone() {
|
||
InvalidToken res = new InvalidToken(_file, _line, _pos, _text.dup);
|
||
res._errorMessage = _errorMessage.dup;
|
||
res._errorCode = _errorCode;
|
||
res._invalidTokenType = _invalidTokenType;
|
||
return res;
|
||
}
|
||
override @property string toString() {
|
||
return "Invalid:" ~ to!string(_text);
|
||
}
|
||
}
|
||
|
||
alias tokenizer_ident_t = uint;
|
||
alias tokenizer_ident_name_t = dchar[];
|
||
|
||
enum : tokenizer_ident_t {
|
||
NO_IDENT = 0
|
||
}
|
||
|
||
/**
|
||
* Global storage for identifier strings.
|
||
*/
|
||
class IdentHolder {
|
||
protected tokenizer_ident_t _nextId;
|
||
protected tokenizer_ident_name_t[tokenizer_ident_t] _idToName;
|
||
protected tokenizer_ident_t[tokenizer_ident_name_t] _nameToId;
|
||
|
||
public this() {
|
||
_nextId = NO_IDENT + 1;
|
||
}
|
||
|
||
/**
|
||
* Search for id by name, return NO_IDENT if not found.
|
||
*/
|
||
uint findByName(tokenizer_ident_name_t name) {
|
||
tokenizer_ident_t * found = (name in _nameToId);
|
||
if (found)
|
||
return *found;
|
||
return NO_IDENT;
|
||
}
|
||
|
||
/**
|
||
* Search for name by id, return null if not found.
|
||
*/
|
||
tokenizer_ident_name_t nameById(tokenizer_ident_t id) {
|
||
auto found = (id in _idToName);
|
||
if (found)
|
||
return *found;
|
||
return null;
|
||
}
|
||
|
||
/**
|
||
* Search for ident id by name, create new entry if not found.
|
||
*/
|
||
tokenizer_ident_t idByName(tokenizer_ident_name_t name) {
|
||
uint * found = (name in _nameToId);
|
||
if (found)
|
||
return *found;
|
||
uint newid = _nextId++;
|
||
_nameToId[cast(dstring)name] = newid;
|
||
_idToName[newid] = cast(tokenizer_ident_name_t)name;
|
||
return newid;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Thread local storage for IDs.
|
||
*/
|
||
IdentHolder identMap;
|
||
|
||
static this() {
|
||
// init ID storage
|
||
identMap = new IdentHolder();
|
||
}
|
||
|
||
class StringLiteralToken : Token {
|
||
dchar[] _text;
|
||
dchar _literalType;
|
||
public @property override dchar literalType() { return _literalType; }
|
||
public @property override dchar[] text() { return _text; }
|
||
public void setText(dchar[] text, dchar type) { _text = text; _literalType = type; }
|
||
this() {
|
||
super(TokenType.STRING);
|
||
}
|
||
this(SourceFile file, uint line, uint pos, dchar[] text, dchar type) {
|
||
super(TokenType.STRING, file, line, pos);
|
||
_text = text;
|
||
_literalType = type;
|
||
}
|
||
override public Token clone() {
|
||
return new StringLiteralToken(_file, _line, _pos, _text.dup, _literalType);
|
||
}
|
||
public override @property string toString() {
|
||
return "String:" ~ to!string(_text);
|
||
}
|
||
}
|
||
|
||
class CharacterLiteralToken : Token {
|
||
dchar _character;
|
||
dchar _literalType;
|
||
@property override dchar literalType() { return _literalType; }
|
||
@property dchar character() { return _character; }
|
||
@property override dchar[] text() { return [_character]; }
|
||
void setCharacter(dchar ch, dchar type) { _character = ch; _literalType = type; }
|
||
this() {
|
||
super(TokenType.CHARACTER);
|
||
}
|
||
this(SourceFile file, uint line, uint pos, dchar character, dchar type) {
|
||
super(TokenType.CHARACTER, file, line, pos);
|
||
_character = character;
|
||
_literalType = type;
|
||
}
|
||
override public Token clone() {
|
||
return new CharacterLiteralToken(_file, _line, _pos, _character, _literalType);
|
||
}
|
||
public override @property string toString() {
|
||
return "Char:" ~ toUTF8([_character]);
|
||
}
|
||
}
|
||
|
||
class IntegerLiteralToken : Token {
|
||
ulong _value;
|
||
bool _unsigned;
|
||
bool _long;
|
||
public @property override ulong intValue() { return _value; }
|
||
public @property override bool isUnsigned() { return _unsigned; }
|
||
public @property override ulong isLong() { return _long; }
|
||
public @property override dchar[] text() { return cast(dchar[])to!dstring(_value); }
|
||
public void setValue(ulong value, bool unsignedFlag = false, bool longFlag = false) {
|
||
_value = value;
|
||
_unsigned = unsignedFlag;
|
||
_long = longFlag;
|
||
}
|
||
public void setFlags(bool unsignedFlag = false, bool longFlag = false) {
|
||
_unsigned = unsignedFlag;
|
||
_long = longFlag;
|
||
}
|
||
this() {
|
||
super(TokenType.INTEGER);
|
||
}
|
||
this(SourceFile file, uint line, uint pos, ulong value, bool unsignedFlag, bool longFlag) {
|
||
super(TokenType.INTEGER, file, line, pos);
|
||
_value = value;
|
||
_unsigned = unsignedFlag;
|
||
_long = longFlag;
|
||
}
|
||
override public Token clone() {
|
||
return new IntegerLiteralToken(_file, _line, _pos, _value, _unsigned, _long);
|
||
}
|
||
public override @property string toString() {
|
||
return "Integer:" ~ to!string(_value) ~ (_long ? "L" : "") ~ (_unsigned ? "U" : "");
|
||
}
|
||
}
|
||
|
||
class RealLiteralToken : Token {
|
||
real _value;
|
||
byte _precision;
|
||
bool _imaginary;
|
||
public @property override ulong intValue() { return to!long(_value); }
|
||
public @property override real realValue() { return _value; }
|
||
public @property override double doubleValue() { return cast(double)_value; }
|
||
public @property override float floatValue() { return cast(float)_value; }
|
||
public @property override byte precision() { return _precision; }
|
||
public @property override bool isImaginary() { return _imaginary; }
|
||
public @property override dchar[] text() { return cast(dchar[])to!dstring(_value); }
|
||
public void setValue(real value, byte precision = 1, bool imaginary = false) {
|
||
_value = value;
|
||
_precision = precision;
|
||
_imaginary = imaginary;
|
||
}
|
||
public void setFlags(byte precision = 1, bool imaginary = false) {
|
||
_precision = precision;
|
||
_imaginary = imaginary;
|
||
}
|
||
this() {
|
||
super(TokenType.FLOAT);
|
||
}
|
||
this(SourceFile file, uint line, uint pos, real value, byte precision, bool imaginary) {
|
||
super(TokenType.FLOAT, file, line, pos);
|
||
_value = value;
|
||
_precision = precision;
|
||
_imaginary = imaginary;
|
||
}
|
||
override public Token clone() {
|
||
return new RealLiteralToken(_file, _line, _pos, _value, _precision, _imaginary);
|
||
}
|
||
public override @property string toString() {
|
||
return "Integer:" ~ to!string(_value) ~ (_precision == 0 ? "f" : (_precision == 2 ? "L" : "")) ~ (_imaginary ? "i" : "");
|
||
}
|
||
}
|
||
|
||
class IdentToken : Token {
|
||
tokenizer_ident_t _id;
|
||
public @property override dchar[] text() { return identMap.nameById(_id); }
|
||
public void setText(dchar[] text) { _id = identMap.idByName(text); }
|
||
this() {
|
||
super(TokenType.IDENTIFIER);
|
||
}
|
||
this(SourceFile file, uint line, uint pos, dchar[] text) {
|
||
super(TokenType.IDENTIFIER, file, line, pos);
|
||
_id = identMap.idByName(text);
|
||
}
|
||
this(SourceFile file, uint line, uint pos, tokenizer_ident_t id) {
|
||
super(TokenType.IDENTIFIER, file, line, pos);
|
||
_id = id;
|
||
}
|
||
override public Token clone() {
|
||
return new IdentToken(_file, _line, _pos, _id);
|
||
}
|
||
public override @property string toString() {
|
||
return "Ident:" ~ to!string(text);
|
||
}
|
||
}
|
||
|
||
// shared appender buffer, to avoid extra heap allocations
|
||
struct StringAppender {
|
||
dchar[] buf;
|
||
uint len;
|
||
dchar[] get() {
|
||
return buf[0 .. len];
|
||
}
|
||
void appendEol() {
|
||
if (len + 1 > buf.length) {
|
||
uint newsize = cast(uint)((len + 1 + buf.length) * 2);
|
||
if (newsize < 128)
|
||
newsize = 128;
|
||
buf.length = newsize;
|
||
}
|
||
buf[len] = '\n';
|
||
len++;
|
||
}
|
||
void append(dchar[] s) {
|
||
if (s.length == 0)
|
||
return;
|
||
if (len + s.length > buf.length) {
|
||
uint newsize = cast(uint)((len + s.length + buf.length) * 2);
|
||
if (newsize < 128)
|
||
newsize = 128;
|
||
buf.length = newsize;
|
||
}
|
||
buf[len .. len + s.length] = s;
|
||
len += s.length;
|
||
}
|
||
void reset() {
|
||
len = 0;
|
||
}
|
||
static int parseHexDigit(dchar ch) {
|
||
if (ch >= '0' && ch <='9')
|
||
return ch - '0';
|
||
if (ch >= 'a' && ch <='f')
|
||
return ch - 'a' + 10;
|
||
if (ch >= 'A' && ch <='F')
|
||
return ch - 'A' + 10;
|
||
return -1;
|
||
}
|
||
bool errorFlag = false;
|
||
dchar decodeHex(ref int pos, int count) {
|
||
dchar res = 0;
|
||
for (int i = 0; i < count; i++) {
|
||
if (pos >= len - 1) {
|
||
errorFlag = true;
|
||
return res;
|
||
}
|
||
dchar ch = buf[++pos];
|
||
int digit = parseHexDigit(ch);
|
||
if (digit < 0) {
|
||
errorFlag = true;
|
||
digit = 0;
|
||
}
|
||
res = (res << 4) | digit;
|
||
}
|
||
return res;
|
||
}
|
||
dchar decodeOct(dchar firstChar, ref int pos) {
|
||
dchar res = 0;
|
||
res = firstChar - '0';
|
||
if (pos < len - 1 && buf[pos + 1] >= '0' && buf[pos + 1] <= '7') {
|
||
res = (res << 3) | (buf[++pos] - '0');
|
||
}
|
||
if (pos < len - 1 && buf[pos + 1] >= '0' && buf[pos + 1] <= '7') {
|
||
res = (res << 3) | (buf[++pos] - '0');
|
||
}
|
||
return res;
|
||
}
|
||
bool processEscapeSequences() {
|
||
errorFlag = false;
|
||
int dst = 0;
|
||
for (int src = 0; src < len; src++) {
|
||
dchar ch = buf[src];
|
||
if (ch == '\\') {
|
||
if (src == len - 1)
|
||
break; // INVALID
|
||
ch = buf[++src];
|
||
switch (ch) {
|
||
case '\'':
|
||
case '\"':
|
||
case '?':
|
||
case '\\':
|
||
buf[dst++] = ch;
|
||
break;
|
||
case '0':
|
||
buf[dst++] = '\0';
|
||
break;
|
||
case 'a':
|
||
buf[dst++] = '\a';
|
||
break;
|
||
case 'b':
|
||
buf[dst++] = '\b';
|
||
break;
|
||
case 'f':
|
||
buf[dst++] = '\f';
|
||
break;
|
||
case 'n':
|
||
buf[dst++] = '\n';
|
||
break;
|
||
case 'r':
|
||
buf[dst++] = '\r';
|
||
break;
|
||
case 't':
|
||
buf[dst++] = '\t';
|
||
break;
|
||
case 'v':
|
||
buf[dst++] = '\v';
|
||
break;
|
||
case 'x':
|
||
buf[dst++] = decodeHex(src, 2);
|
||
break;
|
||
case 'u':
|
||
buf[dst++] = decodeHex(src, 4);
|
||
break;
|
||
case 'U':
|
||
buf[dst++] = decodeHex(src, 8);
|
||
break;
|
||
default:
|
||
if (ch >= '0' && ch <= '7') {
|
||
// octal X XX or XXX
|
||
buf[dst++] = decodeOct(ch, src); // something wrong
|
||
} else if (ch == '&') {
|
||
// named character entity
|
||
buf[dst++] = ch;
|
||
// just show it as is
|
||
} else {
|
||
buf[dst++] = ch; // something wrong
|
||
errorFlag = true;
|
||
}
|
||
break;
|
||
}
|
||
} else {
|
||
buf[dst++] = ch;
|
||
}
|
||
}
|
||
len = dst;
|
||
return errorFlag;
|
||
}
|
||
}
|
||
|
||
class Tokenizer
|
||
{
|
||
protected SourceLines _lineStream;
|
||
protected dchar[] _lineText;
|
||
protected int _line; // current line number
|
||
protected int _len; // current line length
|
||
protected int _pos; // current line read position
|
||
protected int _prevLineLength; // previous line length
|
||
protected uint _state; // tokenizer state
|
||
|
||
enum : int {
|
||
EOF_CHAR = 0x001A,
|
||
EOL_CHAR = 0x000A
|
||
};
|
||
|
||
protected WhiteSpaceToken _sharedWhiteSpaceToken = new WhiteSpaceToken();
|
||
protected CommentToken _sharedCommentToken = new CommentToken();
|
||
protected StringLiteralToken _sharedStringLiteralToken = new StringLiteralToken();
|
||
protected IdentToken _sharedIdentToken = new IdentToken();
|
||
protected OpToken _sharedOpToken = new OpToken();
|
||
protected KeywordToken _sharedKeywordToken = new KeywordToken();
|
||
protected IntegerLiteralToken _sharedIntegerToken = new IntegerLiteralToken();
|
||
protected RealLiteralToken _sharedRealToken = new RealLiteralToken();
|
||
protected InvalidToken _sharedInvalidToken = new InvalidToken();
|
||
protected CharacterLiteralToken _sharedCharacterLiteralToken = new CharacterLiteralToken();
|
||
protected StringAppender _stringLiteralAppender;
|
||
protected StringAppender _commentAppender;
|
||
protected StringAppender _identAppender;
|
||
|
||
protected bool _enableCommentText = true;
|
||
/// when false, does not put comment text into comment token - for less allocations
|
||
@property void enableCommentText(bool enabled) {
|
||
_enableCommentText = enabled;
|
||
}
|
||
/// when false, does not put comment text into comment token - for less allocations
|
||
@property bool enableCommentText() {
|
||
return _enableCommentText;
|
||
}
|
||
|
||
protected bool _errorTolerant = false;
|
||
/// when true, returns BadToken instead of throwing exception
|
||
@property void errorTolerant(bool enabled) {
|
||
_errorTolerant = enabled;
|
||
}
|
||
/// when true, returns BadToken instead of throwing exception
|
||
@property bool errorTolerant() {
|
||
return _errorTolerant;
|
||
}
|
||
|
||
this(SourceLines lineStream) {
|
||
init(lineStream);
|
||
}
|
||
|
||
void init(SourceLines lineStream, int pos = 0) {
|
||
_lineStream = lineStream;
|
||
SourceFile file = _lineStream.file;
|
||
_sharedWhiteSpaceToken.setFile(file);
|
||
_sharedCommentToken.setFile(file);
|
||
_sharedStringLiteralToken.setFile(file);
|
||
_sharedIdentToken.setFile(file);
|
||
_sharedOpToken.setFile(file);
|
||
_sharedKeywordToken.setFile(file);
|
||
_sharedIntegerToken.setFile(file);
|
||
_sharedRealToken.setFile(file);
|
||
_sharedInvalidToken.setFile(file);
|
||
_sharedCharacterLiteralToken.setFile(file);
|
||
buildTime = Clock.currTime();
|
||
_line = lineStream.line;
|
||
_pos = 0;
|
||
_prevLineLength = 0;
|
||
_lineText = null;
|
||
nextLine();
|
||
_pos = pos;
|
||
}
|
||
|
||
this(string code, string filename = "") {
|
||
this(new ArraySourceLines(code, filename));
|
||
}
|
||
|
||
// fetch next line from source stream
|
||
protected bool nextLine() {
|
||
_prevLineLength = cast(int)_lineText.length;
|
||
_lineText = _lineStream.readLine();
|
||
if (!_lineText) {
|
||
if (_lineStream.errorCode != 0)
|
||
throw new SourceEncodingException(_lineStream.errorMessage, _lineStream.file, _lineStream.errorLine, _lineStream.errorPos);
|
||
if (_lineStream.eof) {
|
||
// end of file
|
||
_pos = 0;
|
||
_len = 0;
|
||
return false;
|
||
}
|
||
// just an empty line
|
||
}
|
||
_line = _lineStream.line;
|
||
_pos = 0;
|
||
_len = cast(int)_lineText.length; // do not support lines longer that 4Gb
|
||
return true;
|
||
}
|
||
|
||
protected dchar nextChar() {
|
||
if (_pos >= _len) {
|
||
if (!nextLine()) {
|
||
_pos = _prevLineLength + 1;
|
||
return EOF_CHAR;
|
||
}
|
||
return EOL_CHAR;
|
||
}
|
||
dchar res = _lineText[_pos++];
|
||
if (_pos >= _len)
|
||
nextLine();
|
||
return res;
|
||
}
|
||
|
||
protected dchar peekChar() {
|
||
if (_lineText is null) {
|
||
if (!nextLine()) {
|
||
return EOF_CHAR;
|
||
}
|
||
}
|
||
if (_pos >= _len)
|
||
return EOL_CHAR;
|
||
return _lineText[_pos++];
|
||
}
|
||
|
||
protected Token emitEof() {
|
||
// TODO: check for current state
|
||
return new EofToken(_lineStream.file, _startLine, _startPos + 2);
|
||
}
|
||
|
||
protected Token processWhiteSpace(dchar firstChar) {
|
||
// reuse the same token instance, to avoid extra heap spamming
|
||
_sharedWhiteSpaceToken.setPos(_startLine, _startPos);
|
||
for (;;) {
|
||
int i = _pos;
|
||
for (; i < _len; i++) {
|
||
dchar ch = _lineText[i];
|
||
if (!(ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C || ch == EOL_CHAR))
|
||
break;
|
||
}
|
||
_pos = i;
|
||
if (_pos < _len)
|
||
break;
|
||
// go to next line
|
||
if (!nextLine())
|
||
break;
|
||
}
|
||
return _sharedWhiteSpaceToken;
|
||
}
|
||
|
||
protected Token processOneLineComment() {
|
||
_sharedCommentToken.setPos(_startLine, _startPos);
|
||
_sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '/';
|
||
_sharedCommentToken.isMultilineComment = false;
|
||
if (_enableCommentText) {
|
||
_sharedCommentToken.text = _lineText[_pos + 1 .. $];
|
||
}
|
||
_pos = _len;
|
||
nextChar();
|
||
return _sharedCommentToken;
|
||
}
|
||
|
||
protected Token processOneLineSharpComment() {
|
||
_sharedCommentToken.setPos(_startLine, _startPos);
|
||
if (_enableCommentText) {
|
||
_sharedCommentToken.text = _lineText[_pos .. $];
|
||
}
|
||
_pos = _len;
|
||
return _sharedCommentToken;
|
||
}
|
||
|
||
// Comment /* */
|
||
protected Token processMultilineComment() {
|
||
_sharedCommentToken.setPos(_startLine, _startPos);
|
||
_sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '*';
|
||
_sharedCommentToken.isMultilineComment = true;
|
||
_commentAppender.reset();
|
||
int textStart = _pos + 1;
|
||
for (;;) {
|
||
int textEnd = int.max;
|
||
int i = textStart;
|
||
for (; i < _len - 1; i++) {
|
||
if (_lineText[i] == '*' && _lineText[i + 1] == '/') {
|
||
textEnd = i;
|
||
break;
|
||
}
|
||
}
|
||
if (textEnd != int.max) {
|
||
if (_enableCommentText)
|
||
_commentAppender.append(_lineText[textStart .. textEnd]);
|
||
_pos = textEnd + 2;
|
||
break;
|
||
}
|
||
if (!nextLine()) {
|
||
// TODO: do we need throw exception if comment not closed by end of file?
|
||
_pos = _len;
|
||
break;
|
||
}
|
||
textStart = 0;
|
||
}
|
||
if (_enableCommentText) {
|
||
_sharedCommentToken.text = _commentAppender.get();
|
||
}
|
||
return _sharedCommentToken;
|
||
}
|
||
|
||
// Comment /+ +/
|
||
protected Token processNestedComment() {
|
||
_sharedCommentToken.setPos(_startLine, _startPos);
|
||
_sharedCommentToken.isDocumentationComment = _pos + 1 < _lineText.length && _lineText[_pos + 1] == '+';
|
||
_sharedCommentToken.isMultilineComment = true;
|
||
_commentAppender.reset();
|
||
dchar[] text;
|
||
int textStart = _pos + 1;
|
||
int level = 1;
|
||
for (;;) {
|
||
int textEnd = int.max;
|
||
int i = textStart;
|
||
for (; i < _len - 1; i++) {
|
||
if (_lineText[i] == '/' && _lineText[i + 1] == '+') {
|
||
level++;
|
||
i++;
|
||
} else if (_lineText[i] == '+' && _lineText[i + 1] == '/') {
|
||
if (--level == 0) {
|
||
textEnd = i;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
if (textEnd != int.max) {
|
||
if (_enableCommentText)
|
||
_commentAppender.append(_lineText[textStart .. textEnd]);
|
||
_pos = textEnd + 2;
|
||
break;
|
||
}
|
||
if (!nextLine()) {
|
||
// TODO: do we need throw exception if comment not closed by end of file?
|
||
_pos = _len;
|
||
break;
|
||
}
|
||
if (_enableCommentText)
|
||
_commentAppender.appendEol();
|
||
textStart = 0;
|
||
}
|
||
if (_enableCommentText) {
|
||
_sharedCommentToken.text = _commentAppender.get();
|
||
}
|
||
return _sharedCommentToken;
|
||
}
|
||
|
||
protected Token processHexString() {
|
||
_pos++;
|
||
// TODO:
|
||
return null;
|
||
}
|
||
|
||
protected Token processDelimitedString() {
|
||
_pos++;
|
||
// TODO:
|
||
return null;
|
||
}
|
||
|
||
// r"string" or `string`
|
||
protected Token processWysiwygString(dchar ch) {
|
||
_pos++;
|
||
// TODO:
|
||
return null;
|
||
}
|
||
|
||
protected Token processIdent() {
|
||
_sharedIdentToken.setPos(_startLine, _startPos);
|
||
_identAppender.reset();
|
||
int startPos = _startPos;
|
||
int endPos = _len;
|
||
for (int i = startPos + 1; i < _len; i++) {
|
||
dchar ch = _lineText[i];
|
||
if (!isIdentMiddleChar(ch)) {
|
||
endPos = i;
|
||
break;
|
||
}
|
||
}
|
||
_pos = endPos;
|
||
_sharedIdentToken.setText(_lineText[startPos .. endPos]);
|
||
return _sharedIdentToken;
|
||
}
|
||
|
||
protected Token processIntegerSuffix() {
|
||
if (_pos >= _len)
|
||
return _sharedIntegerToken;
|
||
bool longFlag = false;
|
||
bool unsignedFlag = false;
|
||
dchar ch = _lineText[_pos];
|
||
dchar ch2 = _pos < _len - 1 ? _lineText[_pos + 1] : 0;
|
||
if (ch == 'l' || ch == 'L') {
|
||
longFlag = true;
|
||
_pos++;
|
||
if (ch2 == 'u' || ch2 == 'U') {
|
||
unsignedFlag = true;
|
||
_pos++;
|
||
}
|
||
} else if (ch == 'u' || ch == 'U') {
|
||
unsignedFlag = true;
|
||
_pos++;
|
||
if (ch2 == 'l' || ch2 == 'L') {
|
||
longFlag = true;
|
||
_pos++;
|
||
}
|
||
}
|
||
_sharedIntegerToken.setFlags(unsignedFlag, longFlag);
|
||
ch = _pos < _len ? _lineText[_pos] : 0;
|
||
if (isIdentMiddleChar(ch))
|
||
return parserError("Unexpected character after number", _sharedIntegerToken);
|
||
return _sharedIntegerToken;
|
||
}
|
||
|
||
protected Token processBinaryNumber() {
|
||
_sharedIntegerToken.setPos(_startLine, _startPos);
|
||
_pos++;
|
||
if (_pos >= _len)
|
||
return parserError("Unexpected end of line in binary number", _sharedIntegerToken);
|
||
int digits = 0;
|
||
ulong number = 0;
|
||
int i = _pos;
|
||
for (;i < _len; i++) {
|
||
dchar ch = _lineText[i];
|
||
if (ch != '0' && ch != '1')
|
||
break;
|
||
number = (number << 1) | (ch == '1' ? 1 : 0);
|
||
digits++;
|
||
}
|
||
_pos = i;
|
||
if (digits > 64)
|
||
return parserError("number is too big", _sharedIntegerToken);
|
||
_sharedIntegerToken.setValue(number);
|
||
return processIntegerSuffix();
|
||
}
|
||
|
||
protected Token processHexNumber() {
|
||
_sharedIntegerToken.setPos(_startLine, _startPos);
|
||
_sharedRealToken.setPos(_startLine, _startPos);
|
||
_pos++;
|
||
if (_pos >= _len)
|
||
return parserError("Unexpected end of line in hex number", _sharedIntegerToken);
|
||
int digits = 0;
|
||
ulong number = 0;
|
||
int i = _pos;
|
||
for (;i < _len; i++) {
|
||
dchar ch = _lineText[i];
|
||
uint digit = 0;
|
||
if (ch >= '0' && ch <= '9')
|
||
digit = ch - '0';
|
||
else if (ch >= 'a' && ch <= 'f')
|
||
digit = ch - 'a' + 10;
|
||
else if (ch >= 'A' && ch <= 'F')
|
||
digit = ch - 'A' + 10;
|
||
else if (ch == '_')
|
||
continue;
|
||
else
|
||
break;
|
||
number = (number << 4) | digit;
|
||
digits++;
|
||
}
|
||
_pos = i;
|
||
if (digits > 16)
|
||
return parserError("number is too big to fit 64 bits", _sharedIntegerToken);
|
||
_sharedIntegerToken.setValue(number);
|
||
return processIntegerSuffix();
|
||
}
|
||
|
||
protected Token processOctNumber() {
|
||
_sharedIntegerToken.setPos(_startLine, _startPos);
|
||
if (_pos >= _len)
|
||
return parserError("Unexpected end of line in octal number", _sharedIntegerToken);
|
||
int digits = 0;
|
||
ulong number = 0;
|
||
int i = _pos;
|
||
bool overflow = false;
|
||
for (;i < _len; i++) {
|
||
dchar ch = _lineText[i];
|
||
int digit = 0;
|
||
if (ch >= '0' && ch <= '7')
|
||
digit = ch - '0';
|
||
else if (ch == '_')
|
||
continue;
|
||
else
|
||
break;
|
||
number <<= 3;
|
||
if (digits >= 20) {
|
||
if ((number >> 3) << 3 != number) {
|
||
overflow = true;
|
||
break;
|
||
}
|
||
}
|
||
number |= digit;
|
||
digits++;
|
||
}
|
||
_pos = i;
|
||
if (overflow)
|
||
return parserError("number is too big to fit 64 bits", _sharedIntegerToken);
|
||
_sharedIntegerToken.setValue(number);
|
||
return processIntegerSuffix();
|
||
}
|
||
|
||
//
|
||
protected Token processDecFloatSuffix(real value) {
|
||
_sharedRealToken.setValue(value);
|
||
// TODO
|
||
return _sharedRealToken;
|
||
}
|
||
|
||
// after E char
|
||
protected Token processDecFloatExponent(real value) {
|
||
dchar next = _pos < _len ? _lineText[_pos] : 0;
|
||
int sign = 1;
|
||
if (next == '+') {
|
||
_pos++;
|
||
} else if (next == '-') {
|
||
_pos++;
|
||
sign = -1;
|
||
}
|
||
if (_pos >= _len)
|
||
return parserError("Invalid exponent", _sharedRealToken);
|
||
ulong digits = 0;
|
||
ulong number = 0;
|
||
int i = _pos;
|
||
bool overflow = false;
|
||
for (;i < _len; i++) {
|
||
dchar ch = _lineText[i];
|
||
uint digit = 0;
|
||
if (ch >= '0' && ch <= '9')
|
||
digit = ch - '0';
|
||
else if (ch == '_')
|
||
continue;
|
||
else
|
||
break;
|
||
number *= 10;
|
||
if (digits >= 18) {
|
||
if ((number * 10) / 10 != number) {
|
||
overflow = true;
|
||
break;
|
||
}
|
||
}
|
||
number += digit;
|
||
digits++;
|
||
}
|
||
if (digits == 0)
|
||
return parserError("Invalid exponent", _sharedRealToken);
|
||
_pos = i;
|
||
value *= pow(10., cast(long)number * sign);
|
||
return processDecFloatSuffix(value);
|
||
}
|
||
|
||
protected Token processDecFloatSecondPart(ulong firstPart) {
|
||
if (_pos >= _len) {
|
||
_sharedRealToken.setValue(cast(real)firstPart);
|
||
return _sharedRealToken;
|
||
}
|
||
ulong divider = 1;
|
||
ulong number = 0;
|
||
int i = _pos;
|
||
bool overflow = false;
|
||
for (;i < _len; i++) {
|
||
dchar ch = _lineText[i];
|
||
uint digit = 0;
|
||
if (ch >= '0' && ch <= '9')
|
||
digit = ch - '0';
|
||
else if (ch == '_')
|
||
continue;
|
||
else
|
||
break;
|
||
if (divider * 10 < divider)
|
||
continue; // ignore extra digits
|
||
number *= 10;
|
||
number += digit;
|
||
divider *= 10;
|
||
}
|
||
_pos = i;
|
||
real value = cast(real)firstPart + (cast(real)number / divider);
|
||
dchar next = _pos < _len ? _lineText[_pos] : 0;
|
||
if (next == 0) {
|
||
// neither exponent nor suffix
|
||
_sharedRealToken.setValue(value);
|
||
return _sharedRealToken;
|
||
}
|
||
if (next == 'e' || next == 'E') {
|
||
_pos++;
|
||
return processDecFloatExponent(value);
|
||
}
|
||
return processDecFloatSuffix(value);
|
||
}
|
||
|
||
protected Token processDecNumber(dchar c) {
|
||
_pos--;
|
||
_sharedIntegerToken.setPos(_startLine, _startPos);
|
||
_sharedRealToken.setPos(_startLine, _startPos);
|
||
if (_pos >= _len)
|
||
return parserError("Unexpected end of line in number", _sharedIntegerToken);
|
||
int digits = 0;
|
||
ulong number = 0;
|
||
int i = _pos;
|
||
bool overflow = false;
|
||
for (;i < _len; i++) {
|
||
dchar ch = _lineText[i];
|
||
uint digit = 0;
|
||
if (ch >= '0' && ch <= '9')
|
||
digit = ch - '0';
|
||
else if (ch == '_')
|
||
continue;
|
||
else
|
||
break;
|
||
number *= 10;
|
||
if (digits >= 18) {
|
||
if ((number * 10) / 10 != number) {
|
||
overflow = true;
|
||
break;
|
||
}
|
||
}
|
||
number += digit;
|
||
digits++;
|
||
}
|
||
_pos = i;
|
||
if (overflow)
|
||
return parserError("number is too big to fit 64 bits", _sharedIntegerToken);
|
||
_sharedIntegerToken.setValue(number);
|
||
dchar next = _pos < _len ? _lineText[_pos] : 0;
|
||
if (next == 0)
|
||
return _sharedIntegerToken;
|
||
if (next == '.') {
|
||
_pos++;
|
||
return processDecFloatSecondPart(number);
|
||
}
|
||
return processIntegerSuffix();
|
||
}
|
||
|
||
/// Either return InvalidToken or throw parser exception depending on current errorTolerant flag
|
||
protected Token parserError(string msg, Token incompleteToken) {
|
||
return parserError(msg, incompleteToken.line, incompleteToken.pos, incompleteToken.type);
|
||
}
|
||
/// Either return InvalidToken or throw parser exception depending on current errorTolerant flag
|
||
protected Token parserError(string msg, int startLine, int startPos, TokenType failedTokenType = TokenType.INVALID) {
|
||
if (_errorTolerant) {
|
||
startPos--;
|
||
_sharedInvalidToken.setPos(startLine, startPos);
|
||
_sharedInvalidToken.errorMessage = msg;
|
||
_sharedInvalidToken.errorCode = 1; // for future extension
|
||
_sharedInvalidToken.invalidTokenType = failedTokenType; // for future extension
|
||
// make invalid source text
|
||
dchar[] invalidText;
|
||
int p = startLine == _line ? startPos : 0;
|
||
for (int i = p; i < _pos && i < _lineText.length; i++)
|
||
invalidText ~= _lineText[i];
|
||
|
||
// recover after error
|
||
for (; _pos < _lineText.length; _pos++) {
|
||
dchar ch = _lineText[_pos];
|
||
if (ch == ' ' || ch == '\t' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '{' || ch == '}')
|
||
break;
|
||
if (failedTokenType == TokenType.INTEGER || failedTokenType == TokenType.FLOAT) {
|
||
if (ch == '*' || ch == '/')
|
||
break;
|
||
}
|
||
invalidText ~= ch;
|
||
}
|
||
_sharedInvalidToken.text = invalidText;
|
||
return _sharedInvalidToken;
|
||
}
|
||
throw new ParserException(msg, _lineStream.file, _line, _pos);
|
||
}
|
||
|
||
protected Keyword detectKeyword(dchar ch) {
|
||
if (ch > 'z')
|
||
return Keyword.NONE;
|
||
int len = _len - _pos;
|
||
switch (cast(ubyte)ch) {
|
||
// ABSTRACT,
|
||
// ALIAS,
|
||
// ALIGN,
|
||
// ASM,
|
||
// ASSERT,
|
||
// AUTO,
|
||
case 'a': return findKeyword(Keyword.ABSTRACT, Keyword.AUTO, _lineText.ptr + _pos, len, _pos);
|
||
|
||
// BODY,
|
||
// BOOL,
|
||
// BREAK,
|
||
// BYTE,
|
||
case 'b': return findKeyword(Keyword.BODY, Keyword.BYTE, _lineText.ptr + _pos, len, _pos);
|
||
|
||
// CASE,
|
||
// CAST,
|
||
// CATCH,
|
||
// CDOUBLE,
|
||
// CENT,
|
||
// CFLOAT,
|
||
// CHAR,
|
||
// CLASS,
|
||
// CONST,
|
||
// CONTINUE,
|
||
// CREAL,
|
||
case 'c': return findKeyword(Keyword.CASE, Keyword.CREAL, _lineText.ptr + _pos, len, _pos);
|
||
|
||
// DCHAR,
|
||
// DEBUG,
|
||
// DEFAULT,
|
||
// DELEGATE,
|
||
// DELETE,
|
||
// DEPRECATED,
|
||
// DO,
|
||
// DOUBLE,
|
||
case 'd': return findKeyword(Keyword.DCHAR, Keyword.DOUBLE, _lineText.ptr + _pos, len, _pos);
|
||
|
||
// ELSE,
|
||
// ENUM,
|
||
// EXPORT,
|
||
// EXTERN,
|
||
case 'e': return findKeyword(Keyword.ELSE, Keyword.EXTERN, _lineText.ptr + _pos, len, _pos);
|
||
|
||
// FALSE,
|
||
// FINAL,
|
||
// FINALLY,
|
||
// FLOAT,
|
||
// FOR,
|
||
// FOREACH,
|
||
// FOREACH_REVERSE,
|
||
// FUNCTION,
|
||
case 'f': return findKeyword(Keyword.FALSE, Keyword.FUNCTION, _lineText.ptr + _pos, len, _pos);
|
||
|
||
// GOTO,
|
||
case 'g': return findKeyword(Keyword.GOTO, Keyword.GOTO, _lineText.ptr + _pos, len, _pos);
|
||
|
||
// IDOUBLE,
|
||
// IF,
|
||
// IFLOAT,
|
||
// IMMUTABLE,
|
||
// IMPORT,
|
||
// IN,
|
||
// INOUT,
|
||
// INT,
|
||
// INTERFACE,
|
||
// INVARIANT,
|
||
// IREAL,
|
||
// IS,
|
||
case 'i': return findKeyword(Keyword.IDOUBLE, Keyword.IS, _lineText.ptr + _pos, len, _pos);
|
||
|
||
// LAZY,
|
||
// LONG,
|
||
case 'l': return findKeyword(Keyword.LAZY, Keyword.LONG, _lineText.ptr + _pos, len, _pos);
|
||
|
||
// MACRO,
|
||
// MIXIN,
|
||
// MODULE,
|
||
case 'm': return findKeyword(Keyword.MACRO, Keyword.MODULE, _lineText.ptr + _pos, len, _pos);
|
||
|
||
// NEW,
|
||
// NOTHROW,
|
||
// NULL,
|
||
case 'n': return findKeyword(Keyword.NEW, Keyword.NULL, _lineText.ptr + _pos, len, _pos);
|
||
|
||
// OUT,
|
||
// OVERRIDE,
|
||
case 'o': return findKeyword(Keyword.OUT, Keyword.OVERRIDE, _lineText.ptr + _pos, len, _pos);
|
||
|
||
// PACKAGE,
|
||
// PRAGMA,
|
||
// PRIVATE,
|
||
// PROTECTED,
|
||
// PUBLIC,
|
||
// PURE,
|
||
case 'p': return findKeyword(Keyword.PACKAGE, Keyword.PURE, _lineText.ptr + _pos, len, _pos);
|
||
|
||
// REAL,
|
||
// REF,
|
||
// RETURN,
|
||
case 'r': return findKeyword(Keyword.REAL, Keyword.RETURN, _lineText.ptr + _pos, len, _pos);
|
||
|
||
// SCOPE,
|
||
// SHARED,
|
||
// SHORT,
|
||
// STATIC,
|
||
// STRUCT,
|
||
// SUPER,
|
||
// SWITCH,
|
||
// SYNCHRONIZED,
|
||
case 's': return findKeyword(Keyword.SCOPE, Keyword.SYNCHRONIZED, _lineText.ptr + _pos, len, _pos);
|
||
|
||
// TEMPLATE,
|
||
// THIS,
|
||
// THROW,
|
||
// TRUE,
|
||
// TRY,
|
||
// TYPEDEF,
|
||
// TYPEID,
|
||
// TYPEOF,
|
||
case 't': return findKeyword(Keyword.TEMPLATE, Keyword.TYPEOF, _lineText.ptr + _pos, len, _pos);
|
||
|
||
// UBYTE,
|
||
// UCENT,
|
||
// UINT,
|
||
// ULONG,
|
||
// UNION,
|
||
// UNITTEST,
|
||
// USHORT,
|
||
case 'u': return findKeyword(Keyword.UBYTE, Keyword.USHORT, _lineText.ptr + _pos, len, _pos);
|
||
|
||
// VERSION,
|
||
// VOID,
|
||
// VOLATILE,
|
||
case 'v': return findKeyword(Keyword.VERSION, Keyword.VOLATILE, _lineText.ptr + _pos, len, _pos);
|
||
|
||
// WCHAR,
|
||
// WHILE,
|
||
// WITH,
|
||
case 'w': return findKeyword(Keyword.WCHAR, Keyword.WITH, _lineText.ptr + _pos, len, _pos);
|
||
|
||
// FILE,
|
||
// MODULE,
|
||
// LINE,
|
||
// FUNCTION,
|
||
// PRETTY_FUNCTION,
|
||
//
|
||
// GSHARED,
|
||
// TRAITS,
|
||
// VECTOR,
|
||
// PARAMETERS,
|
||
case '_': return findKeyword(Keyword.FILE, Keyword.PARAMETERS, _lineText.ptr + _pos, len, _pos);
|
||
default: return Keyword.NONE;
|
||
}
|
||
}
|
||
protected OpCode detectOp(dchar ch) nothrow {
|
||
if (ch >= 128)
|
||
return OpCode.NONE;
|
||
dchar ch2 = _pos < _len ? _lineText[_pos] : 0;
|
||
dchar ch3 = _pos < _len - 1 ? _lineText[_pos + 1] : 0;
|
||
switch(cast(ubyte)ch) {
|
||
// DIV, // /
|
||
// DIV_EQ, // /=
|
||
case '/':
|
||
if (ch2 == '=') {
|
||
_pos++;
|
||
return OpCode.DIV_EQ;
|
||
}
|
||
return OpCode.DIV;
|
||
// DOT, // .
|
||
// DOT_DOT, // ..
|
||
// DOT_DOT_DOT,// ...
|
||
case '.':
|
||
if (ch2 == '.') {
|
||
if (ch3 == '.') {
|
||
_pos += 2;
|
||
return OpCode.DOT_DOT_DOT;
|
||
}
|
||
_pos++;
|
||
return OpCode.DOT_DOT;
|
||
}
|
||
return OpCode.DOT;
|
||
// AND, // &
|
||
// AND_EQ, // &=
|
||
// LOG_AND, // &&
|
||
case '&':
|
||
if (ch2 == '=') {
|
||
_pos++;
|
||
return OpCode.AND_EQ;
|
||
}
|
||
if (ch2 == '&') {
|
||
_pos++;
|
||
return OpCode.LOG_AND;
|
||
}
|
||
return OpCode.AND;
|
||
// OR, // |
|
||
// OR_EQ, // |=
|
||
// LOG_OR, // ||
|
||
case '|':
|
||
if (ch2 == '=') {
|
||
_pos++;
|
||
return OpCode.OR_EQ;
|
||
}
|
||
if (ch2 == '|') {
|
||
_pos++;
|
||
return OpCode.LOG_OR;
|
||
}
|
||
return OpCode.OR;
|
||
// MINUS, // -
|
||
// MINUS_EQ, // -=
|
||
// MINUS_MINUS,// --
|
||
case '-':
|
||
if (ch2 == '=') {
|
||
_pos++;
|
||
return OpCode.MINUS_EQ;
|
||
}
|
||
if (ch2 == '-') {
|
||
_pos++;
|
||
return OpCode.MINUS_MINUS;
|
||
}
|
||
return OpCode.MINUS;
|
||
// PLUS, // +
|
||
// PLUS_EQ, // +=
|
||
// PLUS_PLUS, // ++
|
||
case '+':
|
||
if (ch2 == '=') {
|
||
_pos++;
|
||
return OpCode.PLUS_EQ;
|
||
}
|
||
if (ch2 == '+') {
|
||
_pos++;
|
||
return OpCode.PLUS_PLUS;
|
||
}
|
||
return OpCode.PLUS;
|
||
// LT, // <
|
||
// LT_EQ, // <=
|
||
// SHL, // <<
|
||
// SHL_EQ, // <<=
|
||
// LT_GT, // <>
|
||
// NE_EQ, // <>=
|
||
case '<':
|
||
if (ch2 == '<') {
|
||
if (ch3 == '=') {
|
||
_pos += 2;
|
||
return OpCode.SHL_EQ;
|
||
}
|
||
_pos++;
|
||
return OpCode.SHL;
|
||
}
|
||
if (ch2 == '>') {
|
||
if (ch3 == '=') {
|
||
_pos += 2;
|
||
return OpCode.NE_EQ;
|
||
}
|
||
_pos++;
|
||
return OpCode.LT_GT;
|
||
}
|
||
if (ch2 == '=') {
|
||
_pos++;
|
||
return OpCode.LT_EQ;
|
||
}
|
||
return OpCode.LT;
|
||
// GT, // >
|
||
// GT_EQ, // >=
|
||
// SHR_EQ // >>=
|
||
// ASR_EQ, // >>>=
|
||
// SHR, // >>
|
||
// ASR, // >>>
|
||
case '>':
|
||
if (ch2 == '>') {
|
||
if (ch3 == '>') {
|
||
dchar ch4 = _pos < _len - 2 ? _lineText[_pos + 2] : 0;
|
||
if (ch4 == '=') { // >>>=
|
||
_pos += 3;
|
||
return OpCode.ASR_EQ;
|
||
}
|
||
_pos += 2;
|
||
return OpCode.ASR; // >>>
|
||
}
|
||
if (ch3 == '=') { // >>=
|
||
_pos += 2;
|
||
return OpCode.SHR_EQ;
|
||
}
|
||
_pos++;
|
||
return OpCode.SHR;
|
||
}
|
||
if (ch2 == '=') { // >=
|
||
_pos++;
|
||
return OpCode.GT_EQ;
|
||
}
|
||
// >
|
||
return OpCode.GT;
|
||
// NOT, // !
|
||
// NOT_EQ // !=
|
||
// NOT_LT_GT, // !<>
|
||
// NOT_LT_GT_EQ, // !<>=
|
||
// NOT_LT, // !<
|
||
// NOT_LT_EQ, // !<=
|
||
// NOT_GT, // !>
|
||
// NOT_GT_EQ, // !>=
|
||
case '!':
|
||
if (ch2 == '<') { // !<
|
||
if (ch3 == '>') { // !<>
|
||
dchar ch4 = _pos < _len - 2 ? _lineText[_pos + 2] : 0;
|
||
if (ch4 == '=') { // !<>=
|
||
_pos += 3;
|
||
return OpCode.NOT_LT_GT_EQ;
|
||
}
|
||
_pos += 2;
|
||
return OpCode.NOT_LT_GT; // !<>
|
||
}
|
||
if (ch3 == '=') { // !<=
|
||
_pos += 2;
|
||
return OpCode.NOT_LT_EQ;
|
||
}
|
||
_pos++;
|
||
return OpCode.NOT_LT; // !<
|
||
}
|
||
if (ch2 == '=') { // !=
|
||
_pos++;
|
||
return OpCode.NOT_EQ;
|
||
}
|
||
return OpCode.NOT;
|
||
// PAR_OPEN, // (
|
||
case '(':
|
||
return OpCode.PAR_OPEN;
|
||
// PAR_CLOSE, // )
|
||
case ')':
|
||
return OpCode.PAR_CLOSE;
|
||
// SQ_OPEN, // [
|
||
case '[':
|
||
return OpCode.SQ_OPEN;
|
||
// SQ_CLOSE, // ]
|
||
case ']':
|
||
return OpCode.SQ_CLOSE;
|
||
// CURL_OPEN, // {
|
||
case '{':
|
||
return OpCode.CURL_OPEN;
|
||
// CURL_CLOSE, // }
|
||
case '}':
|
||
return OpCode.CURL_CLOSE;
|
||
// QUEST, // ?
|
||
case '?':
|
||
return OpCode.QUEST;
|
||
// COMMA, // ,
|
||
case ',':
|
||
return OpCode.COMMA;
|
||
// SEMICOLON, // ;
|
||
case ';':
|
||
return OpCode.SEMICOLON;
|
||
// COLON, // :
|
||
case ':':
|
||
return OpCode.COLON;
|
||
// DOLLAR, // $
|
||
case '$':
|
||
return OpCode.DOLLAR;
|
||
// EQ, // =
|
||
// QE_EQ, // ==
|
||
// EQ_GT, // =>
|
||
case '=':
|
||
if (ch2 == '=') { // ==
|
||
_pos++;
|
||
return OpCode.QE_EQ;
|
||
}
|
||
if (ch2 == '>') { // =>
|
||
_pos++;
|
||
return OpCode.EQ_GT;
|
||
}
|
||
return OpCode.EQ;
|
||
// MUL, // *
|
||
// MUL_EQ, // *=
|
||
case '*':
|
||
if (ch2 == '=') {
|
||
_pos++;
|
||
return OpCode.MUL_EQ;
|
||
}
|
||
return OpCode.MUL;
|
||
// MOD, // %
|
||
// MOD_EQ, // %=
|
||
case '%':
|
||
if (ch2 == '=') {
|
||
_pos++;
|
||
return OpCode.MOD_EQ;
|
||
}
|
||
return OpCode.MOD;
|
||
// XOR, // ^
|
||
// XOR_EQ, // ^=
|
||
// LOG_XOR, // ^^
|
||
// LOG_XOR_EQ, // ^^=
|
||
case '^':
|
||
if (ch2 == '^') {
|
||
if (ch3 == '=') {
|
||
_pos += 2;
|
||
return OpCode.LOG_XOR_EQ;
|
||
}
|
||
_pos++;
|
||
return OpCode.LOG_XOR;
|
||
}
|
||
if (ch2 == '=') {
|
||
_pos++;
|
||
return OpCode.XOR_EQ;
|
||
}
|
||
return OpCode.XOR;
|
||
// INV, // ~
|
||
// INV_EQ, // ~=
|
||
case '~':
|
||
if (ch2 == '=') {
|
||
_pos++;
|
||
return OpCode.INV_EQ;
|
||
}
|
||
return OpCode.INV;
|
||
// AT, // @
|
||
case '@':
|
||
return OpCode.AT;
|
||
// SHARP // #
|
||
case '#':
|
||
return OpCode.SHARP;
|
||
default:
|
||
return OpCode.NONE;
|
||
}
|
||
}
|
||
|
||
protected Token processCharacterLiteral() {
|
||
_sharedCharacterLiteralToken.setPos(_startLine, _startPos);
|
||
if (_pos + 2 > _len)
|
||
return parserError("Invalid character literal", _sharedCharacterLiteralToken);
|
||
dchar ch = _lineText[_pos++];
|
||
dchar ch2 = _lineText[_pos++];
|
||
dchar type = 0;
|
||
if (ch == '\\') {
|
||
// process escaped character - store it in ch
|
||
// TODO: support all escape sequences
|
||
switch(ch2) {
|
||
case 'r':
|
||
ch = '\r';
|
||
break;
|
||
case 'n':
|
||
ch = '\n';
|
||
break;
|
||
case 't':
|
||
ch = '\t';
|
||
break;
|
||
case '\\':
|
||
ch = '\\';
|
||
break;
|
||
default:
|
||
ch = ch2;
|
||
break;
|
||
}
|
||
// here must be closing '
|
||
if (_pos + 1 > _len)
|
||
return parserError("Invalid character literal", _sharedCharacterLiteralToken);
|
||
ch2 = _lineText[_pos++];
|
||
}
|
||
if (ch2 != '\'')
|
||
return parserError("Invalid character literal", _sharedCharacterLiteralToken);
|
||
if (_pos < _len) {
|
||
dchar t = _lineText[_pos];
|
||
if (t == 'd' || t == 'w' || t == 'c') {
|
||
type = t;
|
||
_pos++;
|
||
} else if (isIdentMiddleChar(ch)) {
|
||
return parserError("Unexpected character after character literal", _sharedCharacterLiteralToken);
|
||
}
|
||
}
|
||
_sharedCharacterLiteralToken.setCharacter(ch, type);
|
||
return _sharedCharacterLiteralToken;
|
||
}
|
||
|
||
protected Token processDoubleQuotedOrWysiwygString(dchar delimiter) {
|
||
bool wysiwyg = (delimiter == 'r' || delimiter == '`');
|
||
//writeln("processDoubleQuotedString()");
|
||
_sharedStringLiteralToken.setPos(_startLine, _startPos);
|
||
_stringLiteralAppender.reset();
|
||
if (delimiter == 'r') {
|
||
_pos++;
|
||
delimiter = '\"';
|
||
}
|
||
dchar type = 0;
|
||
for (;;) {
|
||
int i = _pos;
|
||
int endPos = int.max;
|
||
for(; i < _len; i++) {
|
||
if (_lineText[i] == delimiter && (i == 0 || _lineText[i - 1] != '\\')) {
|
||
endPos = i;
|
||
break;
|
||
}
|
||
}
|
||
if (endPos != int.max) {
|
||
// found end quote
|
||
_stringLiteralAppender.append(_lineText[_pos .. endPos]);
|
||
_pos = endPos + 1;
|
||
break;
|
||
}
|
||
// no quote by end of line
|
||
_stringLiteralAppender.append(_lineText[_pos .. $]);
|
||
_stringLiteralAppender.appendEol();
|
||
if (!nextLine()) {
|
||
// do we need to throw exception if eof comes before end of string?
|
||
break;
|
||
}
|
||
}
|
||
dchar t = 0;
|
||
if (_pos < _len) {
|
||
dchar ch = _lineText[_pos];
|
||
if (ch == 'c' || ch == 'w' || ch == 'd')
|
||
t = ch;
|
||
else if (isIdentMiddleChar(ch))
|
||
return parserError("Unexpected character after string literal", _sharedStringLiteralToken);
|
||
}
|
||
if (t != 0) {
|
||
if (type != 0 && t != type)
|
||
return parserError("Cannot concatenate strings of different type", _sharedStringLiteralToken);
|
||
type = t;
|
||
}
|
||
if (!wysiwyg) {
|
||
// no escape processing
|
||
_sharedStringLiteralToken.setText(_stringLiteralAppender.get(), type);
|
||
return _sharedStringLiteralToken;
|
||
}
|
||
_stringLiteralAppender.processEscapeSequences();
|
||
_sharedStringLiteralToken.setText(_stringLiteralAppender.get(), type);
|
||
return _sharedStringLiteralToken;
|
||
}
|
||
|
||
protected SysTime buildTime;
|
||
|
||
// string literal of the date of compilation "mmm dd yyyy"
|
||
protected dstring formatBuildDate() {
|
||
// TODO: provide proper format
|
||
return to!dstring(buildTime);
|
||
}
|
||
|
||
// string literal of the time of compilation "hh:mm:ss"
|
||
protected dstring formatBuildTime() {
|
||
// TODO: provide proper format
|
||
return to!dstring(buildTime);
|
||
}
|
||
|
||
// string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
|
||
protected dstring formatBuildTimestamp() {
|
||
// TODO: provide proper format
|
||
return to!dstring(buildTime);
|
||
}
|
||
|
||
static immutable dstring VERSION = "0.1";
|
||
static immutable dstring VENDOR = "coolreader.org";
|
||
|
||
protected Token makeSpecialTokenString(dstring str, int pos) {
|
||
_sharedStringLiteralToken.setPos(_startLine, _startPos);
|
||
_sharedStringLiteralToken.setText(cast(dchar[])str, 0);
|
||
return _sharedStringLiteralToken;
|
||
}
|
||
|
||
protected Token processSpecialToken(Keyword keyword, int pos) {
|
||
switch (keyword) {
|
||
//Special Token Replaced with
|
||
case Keyword.DATE: // string literal of the date of compilation "mmm dd yyyy"
|
||
return makeSpecialTokenString(formatBuildDate(), pos);
|
||
case Keyword.TIME: // string literal of the time of compilation "hh:mm:ss"
|
||
return makeSpecialTokenString(formatBuildTime(), pos);
|
||
case Keyword.TIMESTAMP: // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
|
||
return makeSpecialTokenString(formatBuildTimestamp(), pos);
|
||
case Keyword.VENDOR: // Compiler vendor string, such as "Digital Mars D"
|
||
return makeSpecialTokenString(VENDOR, pos);
|
||
case Keyword.VERSION_: // Compiler version as an integer, such as 2001
|
||
return makeSpecialTokenString(VERSION, pos);
|
||
default:
|
||
parserError("Unknown special token", _line, pos);
|
||
}
|
||
return null;
|
||
}
|
||
|
||
protected int _startLine;
|
||
protected int _startPos;
|
||
|
||
// returns next token (clone it if you want to store for future usage, otherwise it may be overwritten by further nextToken() calls).
|
||
Token nextToken() {
|
||
_startLine = _line;
|
||
_startPos = _pos;
|
||
dchar ch = nextChar();
|
||
if (ch == EOF_CHAR) {
|
||
return emitEof();
|
||
}
|
||
if (ch == EOL_CHAR || ch == 0x0020 || ch == 0x0009 || ch == 0x000B || ch == 0x000C) {
|
||
// white space (treat EOL as whitespace, too)
|
||
return processWhiteSpace(ch);
|
||
}
|
||
dchar next = _pos < _len ? _lineText[_pos] : 0;
|
||
if (ch == '/') {
|
||
if (next == '/')
|
||
return processOneLineComment();
|
||
else if (next == '*')
|
||
return processMultilineComment();
|
||
else if (next == '+')
|
||
return processNestedComment();
|
||
}
|
||
if (ch == '#' && _line == 1)
|
||
return processOneLineSharpComment();
|
||
if (ch == '\"')
|
||
return processDoubleQuotedOrWysiwygString(ch);
|
||
if (ch == '\'')
|
||
return processCharacterLiteral();
|
||
if (ch == 'x' && next == '\"')
|
||
return processHexString();
|
||
if (ch == 'q' && next == '\"')
|
||
return processDelimitedString();
|
||
if ((ch == 'r' && next == '\"') || (ch == '`'))
|
||
return processDoubleQuotedOrWysiwygString(ch);
|
||
int oldPos = _pos - 1;
|
||
|
||
if (ch == '0') {
|
||
if (next == 'b' || next == 'B')
|
||
return processBinaryNumber();
|
||
if (next == 'x' || next == 'X')
|
||
return processHexNumber();
|
||
if (next >= '0' && next <= '9')
|
||
return processOctNumber();
|
||
if (next >= '0' && next <= '9')
|
||
return processDecNumber(ch);
|
||
}
|
||
if (ch >= '0' && ch <= '9')
|
||
return processDecNumber(ch);
|
||
if (ch == '.' && next >= '0' && next <= '9') // .123
|
||
return processDecFloatSecondPart(0);
|
||
|
||
if (ch == '_' || isUniversalAlpha(ch)) {
|
||
// start of identifier or keyword?
|
||
Keyword keyword = detectKeyword(ch);
|
||
if (keyword != Keyword.NONE) {
|
||
switch (keyword) {
|
||
//Special Token Replaced with
|
||
case Keyword.EOF: return emitEof(); // sets the scanner to the end of the file
|
||
case Keyword.DATE: // string literal of the date of compilation "mmm dd yyyy"
|
||
case Keyword.TIME: // string literal of the time of compilation "hh:mm:ss"
|
||
case Keyword.TIMESTAMP: // string literal of the date and time of compilation "www mmm dd hh:mm:ss yyyy"
|
||
case Keyword.VENDOR: // Compiler vendor string, such as "Digital Mars D"
|
||
case Keyword.VERSION_: // Compiler version as an integer, such as 2001
|
||
return processSpecialToken(keyword, oldPos);
|
||
default:
|
||
_sharedKeywordToken.setPos(_startLine, _startPos);
|
||
_sharedKeywordToken.keyword = keyword;
|
||
return _sharedKeywordToken;
|
||
}
|
||
}
|
||
return processIdent();
|
||
}
|
||
OpCode op = detectOp(ch);
|
||
if (op != OpCode.NONE) {
|
||
_sharedOpToken.setPos(_startLine, _startPos);
|
||
_sharedOpToken.opCode = op;
|
||
return _sharedOpToken;
|
||
}
|
||
return parserError("Invalid token", _line, _pos);
|
||
}
|
||
|
||
|
||
}
|
||
|
||
unittest {
|
||
import std.algorithm;
|
||
class TokenTest {
|
||
int _line;
|
||
string _file;
|
||
this(string file, int line) {
|
||
_file = file;
|
||
_line = line;
|
||
}
|
||
bool doTest(Token token) {
|
||
return true;
|
||
}
|
||
void execute(Tokenizer tokenizer) {
|
||
Token token = tokenizer.nextToken();
|
||
if (!doTest(token)) {
|
||
assert(false, " token doesn not match at " ~ _file ~ ":" ~ to!string(_line) ~ " foundToken: " ~ token.toString ~ " expected: " ~ toString);
|
||
}
|
||
}
|
||
public override @property string toString() {
|
||
return "TokenTest";
|
||
}
|
||
}
|
||
void testTokenizer(string code, TokenTest[] tokens, string file = __FILE__, uint line = __LINE__) {
|
||
Tokenizer tokenizer = new Tokenizer(code, "tokenizerTest:" ~ file ~ ":" ~ to!string(line));
|
||
for (int i = 0; i < tokens.length; i++) {
|
||
tokens[i].execute(tokenizer);
|
||
}
|
||
}
|
||
class KeywordTest : TokenTest {
|
||
Keyword _code;
|
||
this(Keyword code, string file = __FILE__, uint line = __LINE__) {
|
||
super(file, line);
|
||
_code = code;
|
||
}
|
||
override bool doTest(Token token) {
|
||
if (token.type != TokenType.KEYWORD)
|
||
return false;
|
||
if (token.keyword != _code)
|
||
return false;
|
||
return true;
|
||
}
|
||
public override @property string toString() {
|
||
return "Keyword:" ~ to!string(_code);
|
||
}
|
||
}
|
||
class OpTest : TokenTest {
|
||
OpCode _code;
|
||
this(OpCode code, string file = __FILE__, uint line = __LINE__) {
|
||
super(file, line);
|
||
_code = code;
|
||
}
|
||
override bool doTest(Token token) {
|
||
if (token.type != TokenType.OP)
|
||
return false;
|
||
if (token.opCode != _code)
|
||
return false;
|
||
return true;
|
||
}
|
||
public override @property string toString() {
|
||
return "Op:" ~ to!string(_code);
|
||
}
|
||
}
|
||
class StringTest : TokenTest {
|
||
string _value;
|
||
this(string value, string file = __FILE__, uint line = __LINE__) {
|
||
super(file, line);
|
||
_value = value;
|
||
}
|
||
override bool doTest(Token token) {
|
||
if (token.type != TokenType.STRING)
|
||
return false;
|
||
if (to!string(token.text).equal(_value))
|
||
return false;
|
||
return true;
|
||
}
|
||
public override @property string toString() {
|
||
return "String:" ~ _value;
|
||
}
|
||
}
|
||
class IntegerTest : TokenTest {
|
||
ulong _value;
|
||
bool _unsigned;
|
||
bool _long;
|
||
this(ulong value, bool unsignedFlag = false, bool longFlag = false, string file = __FILE__, uint line = __LINE__) {
|
||
super(file, line);
|
||
_value = value;
|
||
_unsigned = unsignedFlag;
|
||
_long = longFlag;
|
||
}
|
||
override bool doTest(Token token) {
|
||
if (token.type != TokenType.INTEGER)
|
||
return false;
|
||
if (token.intValue != _value)
|
||
return false;
|
||
if (token.isUnsigned != _unsigned)
|
||
return false;
|
||
if (token.isLong != _long)
|
||
return false;
|
||
return true;
|
||
}
|
||
public override @property string toString() {
|
||
return "Integer:" ~ to!string(_value);
|
||
}
|
||
}
|
||
class RealTest : TokenTest {
|
||
real _value;
|
||
ubyte _precision;
|
||
bool _imaginary;
|
||
this(real value, ubyte precision = 1, bool imaginary = false, string file = __FILE__, uint line = __LINE__) {
|
||
super(file, line);
|
||
_value = value;
|
||
_precision = precision;
|
||
_imaginary = imaginary;
|
||
}
|
||
override bool doTest(Token token) {
|
||
if (token.type != TokenType.FLOAT)
|
||
return false;
|
||
if (token.realValue != _value)
|
||
return false;
|
||
if (token.precision != _precision)
|
||
return false;
|
||
if (token.isImaginary != _imaginary)
|
||
return false;
|
||
return true;
|
||
}
|
||
public override @property string toString() {
|
||
return "Real:" ~ to!string(_value);
|
||
}
|
||
}
|
||
class IdentTest : TokenTest {
|
||
string _value;
|
||
this(string value, string file = __FILE__, uint line = __LINE__) {
|
||
super(file, line);
|
||
_value = value;
|
||
}
|
||
override bool doTest(Token token) {
|
||
if (token.type != TokenType.IDENTIFIER)
|
||
return false;
|
||
if (! to!string(token.text).equal(_value))
|
||
return false;
|
||
return true;
|
||
}
|
||
public override @property string toString() {
|
||
return "Ident:" ~ _value;
|
||
}
|
||
}
|
||
class CommentTest : TokenTest {
|
||
this(string file = __FILE__, uint line = __LINE__) {
|
||
super(file, line);
|
||
}
|
||
override bool doTest(Token token) {
|
||
if (token.type != TokenType.COMMENT)
|
||
return false;
|
||
return true;
|
||
}
|
||
public override @property string toString() {
|
||
return "Comment";
|
||
}
|
||
}
|
||
class EOFTest : TokenTest {
|
||
this(string file = __FILE__, uint line = __LINE__) {
|
||
super(file, line);
|
||
}
|
||
override bool doTest(Token token) {
|
||
if (token.type != TokenType.EOF)
|
||
return false;
|
||
return true;
|
||
}
|
||
public override @property string toString() {
|
||
return "EOF";
|
||
}
|
||
}
|
||
class WhiteSpaceTest : TokenTest {
|
||
this(string file = __FILE__, uint line = __LINE__) {
|
||
super(file, line);
|
||
}
|
||
override bool doTest(Token token) {
|
||
if (token.type != TokenType.WHITESPACE)
|
||
return false;
|
||
return true;
|
||
}
|
||
public override @property string toString() {
|
||
return "whiteSpace";
|
||
}
|
||
}
|
||
TokenTest checkString(string value, string file = __FILE__, uint line = __LINE__) {
|
||
return new StringTest(value, file, line);
|
||
}
|
||
TokenTest checkInteger(ulong value, bool unsignedFlag = false, bool longFlag = false, string file = __FILE__, uint line = __LINE__) {
|
||
return new IntegerTest(value, unsignedFlag, longFlag, file, line);
|
||
}
|
||
TokenTest checkReal(real value, byte precision = 0, bool imaginary = false, string file = __FILE__, uint line = __LINE__) {
|
||
return new RealTest(value, precision, imaginary, file, line);
|
||
}
|
||
TokenTest checkIdent(string value, string file = __FILE__, uint line = __LINE__) {
|
||
return new IdentTest(value, file, line);
|
||
}
|
||
TokenTest checkKeyword(Keyword value, string file = __FILE__, uint line = __LINE__) {
|
||
return new KeywordTest(value, file, line);
|
||
}
|
||
TokenTest checkOp(OpCode value, string file = __FILE__, uint line = __LINE__) {
|
||
return new OpTest(value, file, line);
|
||
}
|
||
TokenTest checkSpace(string file = __FILE__, uint line = __LINE__) {
|
||
return new WhiteSpaceTest(file, line);
|
||
}
|
||
TokenTest checkComment(string file = __FILE__, uint line = __LINE__) {
|
||
return new CommentTest(file, line);
|
||
}
|
||
TokenTest checkEOF(string file = __FILE__, uint line = __LINE__) {
|
||
return new EOFTest(file, line);
|
||
}
|
||
|
||
testTokenizer(q"TEST
|
||
int i;
|
||
TEST"
|
||
, [
|
||
checkKeyword(Keyword.INT),
|
||
checkSpace(),
|
||
checkIdent("i"),
|
||
checkOp(OpCode.SEMICOLON),
|
||
checkEOF()
|
||
]);
|
||
testTokenizer("0b1101 0x123abcdU 0xABCL 0743 192837465 0 192_837_465 5.25"
|
||
, [
|
||
checkInteger(13),
|
||
checkSpace(),
|
||
checkInteger(0x123abcd, true, false),
|
||
checkSpace(),
|
||
checkInteger(0xabc, false, true),
|
||
checkSpace(),
|
||
checkInteger(std.conv.octal!743),
|
||
checkSpace(),
|
||
checkInteger(192_837_465),
|
||
checkSpace(),
|
||
checkInteger(0),
|
||
checkSpace(),
|
||
checkInteger(192837465),
|
||
checkSpace(),
|
||
checkReal(5.25),
|
||
checkEOF()
|
||
]);
|
||
}
|
||
|
||
|
||
unittest {
|
||
version(DisableLexerTest) {
|
||
import std.stdio;
|
||
import std.conv;
|
||
import std.utf;
|
||
import dlangui.core.linestream;
|
||
string fname = "/home/lve/src/d/ddc/ddclexer/tests/tokenizer_test.d";
|
||
writeln("opening file");
|
||
try {
|
||
std.stream.File f = new std.stream.File(fname);
|
||
scope(exit) { f.close(); }
|
||
try {
|
||
LineStream lines = LineStream.create(f, fname);
|
||
Tokenizer tokenizer = new Tokenizer(lines);
|
||
for (;;) {
|
||
Token token = tokenizer.nextToken();
|
||
if (token is null) {
|
||
writeln("Null token returned");
|
||
break;
|
||
}
|
||
if (token.type == TokenType.EOF) {
|
||
writeln("EOF token");
|
||
break;
|
||
}
|
||
writeln("", token.line, ":", token.pos, "\t", token.toString);
|
||
}
|
||
} catch (Exception e) {
|
||
writeln("Exception " ~ e.toString);
|
||
}
|
||
} catch (Exception e) {
|
||
writeln("Exception " ~ e.toString);
|
||
}
|
||
}
|
||
}
|