Implement UAX31 character ranges (#15307)

2025-04-28 06:00:13 +03:00 · 2024-03-19 07:19:16 +13:00 · 2024-03-19 07:19:16 +13:00 · dffd899508
commit dffd899508
parent e74da19bcd
32 changed files with 5902 additions and 458 deletions
--- a/changelog/dmd.identifier-tables.dd
+++ b/changelog/dmd.identifier-tables.dd
@ -0,0 +1,12 @@
+Expansion of identifier tables to allow new characters to match C23 have been added along with CLI configurability
+
+You can currently choose between ``c99``, ``c11``, ``UAX31`` (C23's) and ``all`` (the least restrictive set) for both D and ImportC.
+
+This can be done with ``-identifiers=<table>`` and for ImportC ``-identifiers-importc=<table>``.
+
+The default table for D is currently set to ``all``, while ImportC is set to ``c11``.
+Previously both D and ImportC used the ``c99`` tables.
+
+D's table will be swapped over at a later date to [UAX31](https://unicode.org/reports/tr31/), this should be done in 2.117.
+If you find yourself at this time using ``c99`` specific characters and not willing to change them, you may switch back to ``all``.
+Although it should be unlikely that you will need to.
--- a/changelog/dmd.importc-unicode.dd
+++ b/changelog/dmd.importc-unicode.dd
@ -0,0 +1,6 @@
+ImportC has improved Unicode support
+
+Universal Character Names are now supported, allowing you to use the ``\uXXXX`` and ``\UXXXXXXXX`` syntax where ``X`` is a hex digit as part of an identifier.
+
+DigitalMars sppn does not support anything newer than C99.
+It is known to be limited and using any Unicode character not in those ranges will result in an error.
--- a/compiler/src/build.d
+++ b/compiler/src/build.d
@ -1584,7 +1584,7 @@ auto sourceFiles()
            stringtable.d utf.d
        "),
        common: fileArray(env["COMMON"], "
-            bitfields.d file.d int128.d blake3.d outbuffer.d smallbuffer.d
+            bitfields.d file.d int128.d blake3.d outbuffer.d smallbuffer.d charactertables.d identifiertables.d
        "),
        commonHeaders: fileArray(env["COMMON"], "
            outbuffer.h
--- a/compiler/src/dmd/cli.d
+++ b/compiler/src/dmd/cli.d
@ -466,6 +466,26 @@ dmd -cov -unittest myprog.d

             $(P Note that multiple `-i=...` options are allowed, each one adds a pattern.)}"
        ),
+        Option("identifiers=<table>",
+            "Specify the non-ASCII tables for D identifiers",
+            `Set the identifier table to use for the non-ASCII values.
+                $(UL
+                    $(LI $(I UAX31): UAX31)
+                    $(LI $(I c99): C99)
+                    $(LI $(I c11): C11)
+                    $(LI $(I all): All, the least restrictive set, which comes all others (default))
+                )`
+        ),
+        Option("identifiers-importc=<table>",
+            "Specify the non-ASCII tables for ImportC identifiers",
+            `Set the identifier table to use for the non-ASCII values.
+                $(UL
+                    $(LI $(I UAX31): UAX31)
+                    $(LI $(I c99): C99)
+                    $(LI $(I c11): C11 (default))
+                    $(LI $(I all): All, the least restrictive set, which comes all others)
+                )`
+        ),
        Option("ignore",
            "deprecated flag, unsupported pragmas are always ignored now"
        ),
--- a/compiler/src/dmd/common/charactertables.d
+++ b/compiler/src/dmd/common/charactertables.d
@ -0,0 +1,267 @@
+/**
+ * Character tables related to identifiers.
+ *
+ * Supports UAX31, C99, C11 and least restrictive (All).
+ *
+ * Copyright: Copyright (C) 1999-2024 by The D Language Foundation, All Rights Reserved
+ * Authors:   $(LINK2 https://cattermole.co.nz, Richard (Rikki) Andrew Cattermole)
+ * License:   $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
+ * Source:    $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/common/charactertables.d, common/charactertables.d)
+ * Documentation: https://dlang.org/phobos/dmd_common_charactertables.html
+ * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/common/charactertables.d
+ */
+module dmd.common.charactertables;
+
+@safe nothrow @nogc pure:
+
+extern(C++):
+
+///
+enum IdentifierTable {
+    UAX31, ///
+    C99, ///
+    C11, ///
+    LR, /// Least Restrictive aka All
+}
+
+///
+struct IdentifierCharLookup
+{
+    @safe nothrow @nogc pure:
+
+    ///
+    extern(C++) bool function(dchar) isStart;
+    ///
+    extern(C++) bool function(dchar) isContinue;
+
+    /// Lookup the table given the table name
+    static IdentifierCharLookup forTable(IdentifierTable table)
+    {
+        import dmd.common.identifiertables;
+
+        // Awful solution to require these lambdas.
+        // However without them the extern(C++) ABI issues crop up for isInRange,
+        //  and then it can't access the tables.
+        final switch(table) {
+            case IdentifierTable.UAX31:
+                return IdentifierCharLookup(
+                        (c) => isInRange!UAX31_Start(c),
+                        (c) => isInRange!UAX31_Continue(c));
+            case IdentifierTable.C99:
+                return IdentifierCharLookup(
+                        (c) => isInRange!FixedTable_C99_Start(c),
+                        (c) => isInRange!FixedTable_C99_Continue(c));
+            case IdentifierTable.C11:
+                return IdentifierCharLookup(
+                        (c) => isInRange!FixedTable_C11_Start(c),
+                        (c) => isInRange!FixedTable_C11_Continue(c));
+            case IdentifierTable.LR:
+                return IdentifierCharLookup(
+                        (c) => isInRange!LeastRestrictive_Start(c),
+                        (c) => isInRange!LeastRestrictive_Continue(c));
+        }
+    }
+}
+
+/**
+Convenience function for use in places where we just don't care,
+what the identifier ranges are, or if it is start/continue.
+
+Returns: is character a member of least restrictive of all.
+*/
+bool isAnyIdentifierCharacter(dchar c)
+{
+    import dmd.common.identifiertables;
+    return isInRange!LeastRestrictive_OfAll(c);
+}
+
+///
+unittest
+{
+    assert(isAnyContinue('ğ'));
+}
+
+/**
+Convenience function for use in places where we just don't care,
+what the identifier ranges are.
+
+Returns: is character a member of restrictive Start
+*/
+bool isAnyStart(dchar c)
+{
+    import dmd.common.identifiertables;
+    return isInRange!LeastRestrictive_Start(c);
+}
+
+///
+unittest
+{
+    assert(isAnyStart('ğ'));
+}
+
+/**
+Convenience function for use in places where we just don't care,
+what the identifier ranges are.
+
+Returns: is character a member of least restrictive Continue
+*/
+bool isAnyContinue(dchar c)
+{
+    import dmd.common.identifiertables;
+    return isInRange!LeastRestrictive_Continue(c);
+}
+
+///
+unittest
+{
+    assert(isAnyContinue('ğ'));
+}
+
+/// UTF line separator
+enum LS = 0x2028;
+/// UTF paragraph separator
+enum PS = 0x2029;
+
+private
+{
+    enum CMoctal  = 0x1;
+    enum CMhex    = 0x2;
+    enum CMidchar = 0x4;
+    enum CMzerosecond = 0x8;
+    enum CMdigitsecond = 0x10;
+    enum CMsinglechar = 0x20;
+}
+
+///
+bool isoctal(const char c)
+{
+    return (cmtable[c] & CMoctal) != 0;
+}
+
+///
+bool ishex(const char c)
+{
+    return (cmtable[c] & CMhex) != 0;
+}
+
+///
+bool isidchar(const char c)
+{
+    return (cmtable[c] & CMidchar) != 0;
+}
+
+///
+bool isZeroSecond(const char c)
+{
+    return (cmtable[c] & CMzerosecond) != 0;
+}
+
+///
+bool isDigitSecond(const char c)
+{
+    return (cmtable[c] & CMdigitsecond) != 0;
+}
+
+///
+bool issinglechar(const char c)
+{
+    return (cmtable[c] & CMsinglechar) != 0;
+}
+
+///
+bool c_isxdigit(const int c)
+{
+    return (( c >= '0' && c <= '9') ||
+        ( c >= 'a' && c <= 'f') ||
+        ( c >= 'A' && c <= 'F'));
+}
+
+///
+bool c_isalnum(const int c)
+{
+    return (( c >= '0' && c <= '9') ||
+        ( c >= 'a' && c <= 'z') ||
+        ( c >= 'A' && c <= 'Z'));
+}
+
+extern(D) private:
+
+// originally from dmd.root.utf
+bool isInRange(alias Ranges)(dchar c)
+{
+    size_t high = Ranges.length - 1;
+    // Shortcut search if c is out of range
+    size_t low = (c < Ranges[0][0] || Ranges[high][1] < c) ? high + 1 : 0;
+    // Binary search
+    while (low <= high)
+    {
+        const size_t mid = low + ((high - low) >> 1);
+        if (c < Ranges[mid][0])
+            high = mid - 1;
+        else if (Ranges[mid][1] < c)
+            low = mid + 1;
+        else
+        {
+            assert(Ranges[mid][0] <= c && c <= Ranges[mid][1]);
+            return true;
+        }
+    }
+    return false;
+}
+
+/********************************************
+ * Do our own char maps
+ */
+// originally from dmd.lexer (was private)
+static immutable cmtable = ()
+{
+    ubyte[256] table;
+    foreach (const c; 0 .. table.length)
+    {
+        if ('0' <= c && c <= '7')
+            table[c] |= CMoctal;
+        if (c_isxdigit(c))
+            table[c] |= CMhex;
+        if (c_isalnum(c) || c == '_')
+            table[c] |= CMidchar;
+
+        switch (c)
+        {
+            case 'x': case 'X':
+            case 'b': case 'B':
+                table[c] |= CMzerosecond;
+                break;
+
+            case '0': .. case '9':
+            case 'e': case 'E':
+            case 'f': case 'F':
+            case 'l': case 'L':
+            case 'p': case 'P':
+            case 'u': case 'U':
+            case 'i':
+            case '.':
+            case '_':
+                table[c] |= CMzerosecond | CMdigitsecond;
+                break;
+
+            default:
+                break;
+        }
+
+        switch (c)
+        {
+            case '\\':
+            case '\n':
+            case '\r':
+            case 0:
+            case 0x1A:
+            case '\'':
+                break;
+            default:
+                if (!(c & 0x80))
+                    table[c] |= CMsinglechar;
+                break;
+        }
+    }
+    return table;
+}();
--- a/compiler/src/dmd/common/charactertables.h
+++ b/compiler/src/dmd/common/charactertables.h
@ -0,0 +1,20 @@
+/**
+ * Character tables related to identifiers.
+ *
+ * Supports UAX31, C99, C11 and least restrictive (All).
+ *
+ * Copyright: Copyright (C) 1999-2024 by The D Language Foundation, All Rights Reserved
+ * Authors:   $(LINK2 https://cattermole.co.nz, Richard (Rikki) Andrew Cattermole)
+ * License:   $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
+ * Source:    $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/common/charactertables.d, common/charactertables.d)
+ */
+
+#pragma once
+
+struct IdentifierCharLookup final
+{
+    bool(*isStart)(char32_t);
+    bool(*isContinue)(char32_t);
+
+    // constructor not provided here.
+};
--- a/compiler/src/dmd/common/identifiertables.d
+++ b/compiler/src/dmd/common/identifiertables.d
--- a/compiler/src/dmd/dmangle.d
+++ b/compiler/src/dmd/dmangle.d
@ -72,12 +72,14 @@ void mangleToBuffer(TemplateInstance ti, ref OutBuffer buf)
 /// Returns: `true` if the given character is a valid mangled character
 package bool isValidMangling(dchar c) nothrow
 {
+    import dmd.common.charactertables;
+
    return
        c >= 'A' && c <= 'Z' ||
        c >= 'a' && c <= 'z' ||
        c >= '0' && c <= '9' ||
        c != 0 && strchr("$%().:?@[]_", c) ||
-        isUniAlpha(c);
+        isAnyIdentifierCharacter(c);
 }

 // valid mangled characters
--- a/compiler/src/dmd/doc.d
+++ b/compiler/src/dmd/doc.d
@ -2106,43 +2106,13 @@ int getMarkdownIndent(ref OutBuffer buf, size_t from, size_t to) @safe
    return indent;
 }

-/************************************************
- * Scan forward to one of:
- *      start of identifier
- *      beginning of next line
- *      end of buf
- */
-size_t skiptoident(ref OutBuffer buf, size_t i) @safe
-{
-    const slice = buf[];
-    while (i < slice.length)
-    {
-        dchar c;
-        size_t oi = i;
-        if (utf_decodeChar(slice, i, c))
-        {
-            /* Ignore UTF errors, but still consume input
-             */
-            break;
-        }
-        if (c >= 0x80)
-        {
-            if (!isUniAlpha(c))
-                continue;
-        }
-        else if (!(isalpha(c) || c == '_' || c == '\n'))
-            continue;
-        i = oi;
-        break;
-    }
-    return i;
-}
-
 /************************************************
 * Scan forward past end of identifier.
 */
 size_t skippastident(ref OutBuffer buf, size_t i) @safe
 {
+    import dmd.common.charactertables;
+
    const slice = buf[];
    while (i < slice.length)
    {
@ -2156,7 +2126,8 @@ size_t skippastident(ref OutBuffer buf, size_t i) @safe
        }
        if (c >= 0x80)
        {
-            if (isUniAlpha(c))
+            // we don't care if it is start/continue here
+            if (isAnyIdentifierCharacter(c))
                continue;
        }
        else if (isalnum(c) || c == '_')
@ -2173,6 +2144,8 @@ size_t skippastident(ref OutBuffer buf, size_t i) @safe
 */
 size_t skipPastIdentWithDots(ref OutBuffer buf, size_t i) @safe
 {
+    import dmd.common.charactertables;
+
    const slice = buf[];
    bool lastCharWasDot;
    while (i < slice.length)
@ -2203,7 +2176,8 @@ size_t skipPastIdentWithDots(ref OutBuffer buf, size_t i) @safe
        {
            if (c >= 0x80)
            {
-                if (isUniAlpha(c))
+                // we don't care if it is start/continue here
+                if (isAnyIdentifierCharacter(c))
                {
                    lastCharWasDot = false;
                    continue;
@ -5249,6 +5223,8 @@ bool isCVariadicArg(const(char)[] p) @nogc nothrow pure @safe
@trusted
 bool isIdStart(const(char)* p) @nogc nothrow pure
 {
+    import dmd.common.charactertables;
+
    dchar c = *p;
    if (isalpha(c) || c == '_')
        return true;
@ -5257,7 +5233,7 @@ bool isIdStart(const(char)* p) @nogc nothrow pure
        size_t i = 0;
        if (utf_decodeChar(p[0 .. 4], i, c))
            return false; // ignore errors
-        if (isUniAlpha(c))
+        if (isAnyStart(c))
            return true;
    }
    return false;
@ -5269,6 +5245,8 @@ bool isIdStart(const(char)* p) @nogc nothrow pure
@trusted
 bool isIdTail(const(char)* p) @nogc nothrow pure
 {
+    import dmd.common.charactertables;
+
    dchar c = *p;
    if (isalnum(c) || c == '_')
        return true;
@ -5277,7 +5255,7 @@ bool isIdTail(const(char)* p) @nogc nothrow pure
        size_t i = 0;
        if (utf_decodeChar(p[0 .. 4], i, c))
            return false; // ignore errors
-        if (isUniAlpha(c))
+        if (isAnyContinue(c))
            return true;
    }
    return false;
--- a/compiler/src/dmd/frontend.h
+++ b/compiler/src/dmd/frontend.h
@ -6118,6 +6118,15 @@ enum class CHECKACTION : uint8_t
    context = 3u,
 };

+enum class CLIIdentifierTable : uint8_t
+{
+    default_ = 0u,
+    C99 = 1u,
+    C11 = 2u,
+    UAX31 = 3u,
+    All = 4u,
+};
+
 enum class JsonFieldFlags : uint32_t
 {
    none = 0u,
@ -6137,6 +6146,8 @@ struct CompileEnv final
    bool previewIn;
    bool ddocOutput;
    bool masm;
+    IdentifierCharLookup cCharLookupTable;
+    IdentifierCharLookup dCharLookupTable;
    CompileEnv() :
        versionNumber(),
        date(),
@ -6145,10 +6156,12 @@ struct CompileEnv final
        timestamp(),
        previewIn(),
        ddocOutput(),
-        masm()
+        masm(),
+        cCharLookupTable(),
+        dCharLookupTable()
    {
    }
-    CompileEnv(uint32_t versionNumber, _d_dynamicArray< const char > date = {}, _d_dynamicArray< const char > time = {}, _d_dynamicArray< const char > vendor = {}, _d_dynamicArray< const char > timestamp = {}, bool previewIn = false, bool ddocOutput = false, bool masm = false) :
+    CompileEnv(uint32_t versionNumber, _d_dynamicArray< const char > date = {}, _d_dynamicArray< const char > time = {}, _d_dynamicArray< const char > vendor = {}, _d_dynamicArray< const char > timestamp = {}, bool previewIn = false, bool ddocOutput = false, bool masm = false, IdentifierCharLookup cCharLookupTable = IdentifierCharLookup(), IdentifierCharLookup dCharLookupTable = IdentifierCharLookup()) :
        versionNumber(versionNumber),
        date(date),
        time(time),
@ -6156,7 +6169,9 @@ struct CompileEnv final
        timestamp(timestamp),
        previewIn(previewIn),
        ddocOutput(ddocOutput),
-        masm(masm)
+        masm(masm),
+        cCharLookupTable(cCharLookupTable),
+        dCharLookupTable(dCharLookupTable)
        {}
 };

@ -7804,6 +7819,56 @@ extern _d_real cimagl(complex_t x);

 extern void browse(const char* url);

+enum class IdentifierTable
+{
+    UAX31 = 0,
+    C99 = 1,
+    C11 = 2,
+    LR = 3,
+};
+
+struct IdentifierCharLookup final
+{
+    bool(*isStart)(char32_t );
+    bool(*isContinue)(char32_t );
+    static IdentifierCharLookup forTable(IdentifierTable table);
+    IdentifierCharLookup() :
+        isStart(),
+        isContinue()
+    {
+    }
+    IdentifierCharLookup(bool(*isStart)(char32_t ), bool(*isContinue)(char32_t ) = nullptr) :
+        isStart(isStart),
+        isContinue(isContinue)
+        {}
+};
+
+extern bool isAnyIdentifierCharacter(char32_t c);
+
+extern bool isAnyStart(char32_t c);
+
+extern bool isAnyContinue(char32_t c);
+
+enum : int32_t { LS = 8232 };
+
+enum : int32_t { PS = 8233 };
+
+extern bool isoctal(const char c);
+
+extern bool ishex(const char c);
+
+extern bool isidchar(const char c);
+
+extern bool isZeroSecond(const char c);
+
+extern bool isDigitSecond(const char c);
+
+extern bool issinglechar(const char c);
+
+extern bool c_isxdigit(const int32_t c);
+
+extern bool c_isalnum(const int32_t c);
+
 extern void error(const Loc& loc, const char* format, ...);

 extern void error(const char* filename, uint32_t linnum, uint32_t charnum, const char* format, ...);
@ -8013,6 +8078,8 @@ struct Param final
    CHECKENABLE useSwitchError;
    CHECKENABLE boundscheck;
    CHECKACTION checkAction;
+    CLIIdentifierTable dIdentifierTable;
+    CLIIdentifierTable cIdentifierTable;
    _d_dynamicArray< const char > argv0;
    Array<const char* > modFileAliasStrings;
    Array<const char* > imppath;
@ -8088,6 +8155,8 @@ struct Param final
        useSwitchError((CHECKENABLE)0u),
        boundscheck((CHECKENABLE)0u),
        checkAction((CHECKACTION)0u),
+        dIdentifierTable((CLIIdentifierTable)0u),
+        cIdentifierTable((CLIIdentifierTable)0u),
        argv0(),
        modFileAliasStrings(),
        imppath(),
@ -8119,7 +8188,7 @@ struct Param final
        mapfile()
    {
    }
-    Param(bool obj, bool multiobj = false, bool trace = false, bool tracegc = false, bool vcg_ast = false, DiagnosticReporting useDeprecated = (DiagnosticReporting)1u, bool useUnitTests = false, bool useInline = false, bool release = false, bool preservePaths = false, DiagnosticReporting warnings = (DiagnosticReporting)2u, bool cov = false, uint8_t covPercent = 0u, bool ctfe_cov = false, bool ignoreUnsupportedPragmas = true, bool useModuleInfo = true, bool useTypeInfo = true, bool useExceptions = true, bool useGC = true, bool betterC = false, bool addMain = false, bool allInst = false, bool bitfields = false, CppStdRevision cplusplus = (CppStdRevision)201103u, Help help = Help(), Verbose v = Verbose(), FeatureState useDIP25 = (FeatureState)2u, FeatureState useDIP1000 = (FeatureState)0u, bool ehnogc = false, bool useDIP1021 = false, FeatureState fieldwise = (FeatureState)0u, bool fixAliasThis = false, FeatureState rvalueRefParam = (FeatureState)0u, FeatureState noSharedAccess = (FeatureState)0u, bool previewIn = false, bool inclusiveInContracts = false, bool shortenedMethods = true, bool fixImmutableConv = false, bool fix16997 = true, FeatureState dtorFields = (FeatureState)0u, FeatureState systemVariables = (FeatureState)0u, CHECKENABLE useInvariants = (CHECKENABLE)0u, CHECKENABLE useIn = (CHECKENABLE)0u, CHECKENABLE useOut = (CHECKENABLE)0u, CHECKENABLE useArrayBounds = (CHECKENABLE)0u, CHECKENABLE useAssert = (CHECKENABLE)0u, CHECKENABLE useSwitchError = (CHECKENABLE)0u, CHECKENABLE boundscheck = (CHECKENABLE)0u, CHECKACTION checkAction = (CHECKACTION)0u, _d_dynamicArray< const char > argv0 = {}, Array<const char* > modFileAliasStrings = Array<const char* >(), Array<const char* > imppath = Array<const char* >(), Array<const char* > fileImppath = Array<const char* >(), _d_dynamicArray< const char > objdir = {}, _d_dynamicArray< const char > objname = {}, _d_dynamicArray< const char > libname = {}, Output ddoc = Output(), Output dihdr = Output(), Output cxxhdr = Output(), Output json = Output(), JsonFieldFlags jsonFieldFlags = (JsonFieldFlags)0u, Output makeDeps = Output(), Output mixinOut = Output(), Output moduleDeps = Output(), uint32_t debuglevel = 0u, uint32_t versionlevel = 0u, bool run = false, Array<const char* > runargs = Array<const char* >(), Array<const char* > cppswitches = Array<const char* >(), const char* cpp = nullptr, Array<const char* > objfiles = Array<const char* >(), Array<const char* > linkswitches = Array<const char* >(), Array<bool > linkswitchIsForCC = Array<bool >(), Array<const char* > libfiles = Array<const char* >(), Array<const char* > dllfiles = Array<const char* >(), _d_dynamicArray< const char > deffile = {}, _d_dynamicArray< const char > resfile = {}, _d_dynamicArray< const char > exefile = {}, _d_dynamicArray< const char > mapfile = {}) :
+    Param(bool obj, bool multiobj = false, bool trace = false, bool tracegc = false, bool vcg_ast = false, DiagnosticReporting useDeprecated = (DiagnosticReporting)1u, bool useUnitTests = false, bool useInline = false, bool release = false, bool preservePaths = false, DiagnosticReporting warnings = (DiagnosticReporting)2u, bool cov = false, uint8_t covPercent = 0u, bool ctfe_cov = false, bool ignoreUnsupportedPragmas = true, bool useModuleInfo = true, bool useTypeInfo = true, bool useExceptions = true, bool useGC = true, bool betterC = false, bool addMain = false, bool allInst = false, bool bitfields = false, CppStdRevision cplusplus = (CppStdRevision)201103u, Help help = Help(), Verbose v = Verbose(), FeatureState useDIP25 = (FeatureState)2u, FeatureState useDIP1000 = (FeatureState)0u, bool ehnogc = false, bool useDIP1021 = false, FeatureState fieldwise = (FeatureState)0u, bool fixAliasThis = false, FeatureState rvalueRefParam = (FeatureState)0u, FeatureState noSharedAccess = (FeatureState)0u, bool previewIn = false, bool inclusiveInContracts = false, bool shortenedMethods = true, bool fixImmutableConv = false, bool fix16997 = true, FeatureState dtorFields = (FeatureState)0u, FeatureState systemVariables = (FeatureState)0u, CHECKENABLE useInvariants = (CHECKENABLE)0u, CHECKENABLE useIn = (CHECKENABLE)0u, CHECKENABLE useOut = (CHECKENABLE)0u, CHECKENABLE useArrayBounds = (CHECKENABLE)0u, CHECKENABLE useAssert = (CHECKENABLE)0u, CHECKENABLE useSwitchError = (CHECKENABLE)0u, CHECKENABLE boundscheck = (CHECKENABLE)0u, CHECKACTION checkAction = (CHECKACTION)0u, CLIIdentifierTable dIdentifierTable = (CLIIdentifierTable)0u, CLIIdentifierTable cIdentifierTable = (CLIIdentifierTable)0u, _d_dynamicArray< const char > argv0 = {}, Array<const char* > modFileAliasStrings = Array<const char* >(), Array<const char* > imppath = Array<const char* >(), Array<const char* > fileImppath = Array<const char* >(), _d_dynamicArray< const char > objdir = {}, _d_dynamicArray< const char > objname = {}, _d_dynamicArray< const char > libname = {}, Output ddoc = Output(), Output dihdr = Output(), Output cxxhdr = Output(), Output json = Output(), JsonFieldFlags jsonFieldFlags = (JsonFieldFlags)0u, Output makeDeps = Output(), Output mixinOut = Output(), Output moduleDeps = Output(), uint32_t debuglevel = 0u, uint32_t versionlevel = 0u, bool run = false, Array<const char* > runargs = Array<const char* >(), Array<const char* > cppswitches = Array<const char* >(), const char* cpp = nullptr, Array<const char* > objfiles = Array<const char* >(), Array<const char* > linkswitches = Array<const char* >(), Array<bool > linkswitchIsForCC = Array<bool >(), Array<const char* > libfiles = Array<const char* >(), Array<const char* > dllfiles = Array<const char* >(), _d_dynamicArray< const char > deffile = {}, _d_dynamicArray< const char > resfile = {}, _d_dynamicArray< const char > exefile = {}, _d_dynamicArray< const char > mapfile = {}) :
        obj(obj),
        multiobj(multiobj),
        trace(trace),
@ -8169,6 +8238,8 @@ struct Param final
        useSwitchError(useSwitchError),
        boundscheck(boundscheck),
        checkAction(checkAction),
+        dIdentifierTable(dIdentifierTable),
+        cIdentifierTable(cIdentifierTable),
        argv0(argv0),
        modFileAliasStrings(modFileAliasStrings),
        imppath(imppath),
--- a/compiler/src/dmd/globals.d
+++ b/compiler/src/dmd/globals.d
@ -72,6 +72,16 @@ enum FeatureState : ubyte
    enabled  = 2,  /// Specified as `-preview=`
 }

+/// Different identifier tables specifiable by CLI
+enum CLIIdentifierTable : ubyte
+{
+    default_ = 0, /// Not specified by user
+    C99      = 1, /// Tables from C99 standard
+    C11      = 2, /// Tables from C11 standard
+    UAX31    = 3, /// Tables from the Unicode Standard Annex 31: UNICODE IDENTIFIERS AND SYNTAX
+    All      = 4, /// The least restrictive set of all other tables
+}
+
 extern(C++) struct Output
 {
    bool doOutput;      // Output is enabled
@ -199,6 +209,9 @@ extern (C++) struct Param

    CHECKACTION checkAction = CHECKACTION.D; // action to take when bounds, asserts or switch defaults are violated

+    CLIIdentifierTable dIdentifierTable = CLIIdentifierTable.default_;
+    CLIIdentifierTable cIdentifierTable = CLIIdentifierTable.default_;
+
    const(char)[] argv0;                // program name
    Array!(const(char)*) modFileAliasStrings; // array of char*'s of -I module filename alias strings
    Array!(const(char)*) imppath;       // array of char*'s of where to look for import modules
--- a/compiler/src/dmd/globals.h
+++ b/compiler/src/dmd/globals.h
@ -13,6 +13,7 @@
 #include "root/dcompat.h"
 #include "root/ctfloat.h"
 #include "common/outbuffer.h"
+#include "common/charactertables.h"
 #include "root/filename.h"
 #include "compiler.h"

@ -82,6 +83,16 @@ enum class FeatureState : unsigned char
    enabled  = 2,  /// Specified as `-preview=`
 };

+/// Different identifier tables specifiable by CLI
+enum class CLIIdentifierTable : unsigned char
+{
+    default_ = 0, /// Not specified by user
+    C99      = 1, /// Tables from C99 standard
+    C11      = 2, /// Tables from C11 standard
+    UAX31    = 3, /// Tables from the Unicode Standard Annex 31: UNICODE IDENTIFIERS AND SYNTAX
+    All      = 4, /// The least restrictive set of all other tables
+};
+
 struct Output
 {
    /// Configuration for the compiler generator
@ -200,6 +211,9 @@ struct Param

    CHECKACTION checkAction;       // action to take when bounds, asserts or switch defaults are violated

+    CLIIdentifierTable dIdentifierTable;
+    CLIIdentifierTable cIdentifierTable;
+
    DString  argv0;    // program name
    Array<const char *> modFileAliasStrings; // array of char*'s of -I module filename alias strings
    Array<const char *> imppath;     // array of char*'s of where to look for import modules
@ -274,6 +288,9 @@ struct CompileEnv
    DString timestamp;
    d_bool previewIn;
    d_bool ddocOutput;
+    d_bool masm;
+    IdentifierCharLookup cCharLookupTable;
+    IdentifierCharLookup dCharLookupTable;
 };

 struct Global
--- a/compiler/src/dmd/identifier.d
+++ b/compiler/src/dmd/identifier.d
@ -315,28 +315,83 @@ nothrow:
    /**********************************
     * ditto
     */
-    extern (D) static bool isValidIdentifier(const(char)[] str) @safe
+    extern (D) static bool isValidIdentifier(const(char)[] str) @trusted
    {
+        import dmd.common.charactertables;
+
        if (str.length == 0 ||
            (str[0] >= '0' && str[0] <= '9')) // beware of isdigit() on signed chars
        {
            return false;
        }

-        size_t idx = 0;
-        while (idx < str.length)
+        // In a previous implementation this was implemented quite naively,
+        //  by utilizing the libc.
+        // However we can do better, by copying the lexer approach to identifier validation.
+
+        const(char)* p = &str[0], pEnd = str.ptr + str.length;
+
+        // handle start characters
        {
-            dchar dc;
-            const s = utf_decodeChar(str, idx, dc);
-            if (s ||
-                !((dc >= 0x80 && isUniAlpha(dc)) || isalnum(dc) || dc == '_'))
+            const c = *p;
+
+            if (isidchar(c))
+                p++;
+            else if (c & 0x80)
            {
-                return false;
+                size_t countDecoded;
+                dchar decoded;
+
+                if (utf_decodeChar(p[0 .. pEnd - p], countDecoded, decoded) is null ||
+                    isAnyStart(decoded))
+                    p += countDecoded;
+                else
+                    return false;
            }
+            else
+                return false;
        }
+
+        // handle continue characters
+        while(p !is pEnd)
+        {
+            const c = *p;
+
+            if (isidchar(c)) // handles ASCII subset
+            {
+                p++;
+                continue;
+            }
+            else if (c & 0x80)
+            {
+                size_t countDecoded;
+                dchar decoded;
+
+                if (utf_decodeChar(p[0 .. pEnd - p], countDecoded, decoded) is null ||
+                    isAnyContinue(decoded))
+                {
+                    p += countDecoded;
+                    continue;
+                }
+                else
+                    return false;
+            }
+            else
+                return false;
+        }
+
        return true;
    }

+    ///
+    unittest
+    {
+        assert(Identifier.isValidIdentifier("tes123_t".ptr));
+        assert(!Identifier.isValidIdentifier("tes123_^t".ptr));
+        assert(Identifier.isValidIdentifier("te123s_ğt".ptr));
+        assert(!Identifier.isValidIdentifier("t^e123s_ğt".ptr));
+    }
+
    extern (D) static Identifier lookup(const(char)* s, size_t len)
    {
        return lookup(s[0 .. len]);
--- a/compiler/src/dmd/lexer.d
+++ b/compiler/src/dmd/lexer.d
@ -22,9 +22,11 @@ import dmd.errorsink;
 import dmd.id;
 import dmd.identifier;
 import dmd.location;
+import dmd.common.smallbuffer;
+import dmd.common.outbuffer;
+import dmd.common.charactertables;
 import dmd.root.array;
 import dmd.root.ctfloat;
-import dmd.common.outbuffer;
 import dmd.root.port;
 import dmd.root.rmem;
 import dmd.root.utf;
@ -42,6 +44,8 @@ version (DMDLIB)
 */
 struct CompileEnv
 {
+    import dmd.common.charactertables;
+
    uint versionNumber;      /// __VERSION__
    const(char)[] date;      /// __DATE__
    const(char)[] time;      /// __TIME__
@ -51,6 +55,10 @@ struct CompileEnv
    bool previewIn;          /// `in` means `[ref] scope const`, accepts rvalues
    bool ddocOutput;         /// collect embedded documentation comments
    bool masm;               /// use MASM inline asm syntax
+
+    // these need a default otherwise tests won't work.
+    IdentifierCharLookup cCharLookupTable;
+    IdentifierCharLookup dCharLookupTable;
 }

 /***********************************************************
@ -66,6 +74,8 @@ class Lexer

    Token token;

+    IdentifierCharLookup charLookup;
+
    // For ImportC
    bool Ccompile;              /// true if compiling ImportC

@ -142,6 +152,8 @@ class Lexer
        {
            this.compileEnv.versionNumber = 1;
            this.compileEnv.vendor = "DLF";
+            this.compileEnv.cCharLookupTable = IdentifierCharLookup.forTable(IdentifierTable.LR);
+            this.compileEnv.dCharLookupTable = IdentifierCharLookup.forTable(IdentifierTable.LR);
        }
        //initKeywords();
        /* If first line starts with '#!', ignore the line
@ -175,6 +187,16 @@ class Lexer
            }
            endOfLine();
        }
+
+        // setup the identifier table lookup functions
+        if (this.Ccompile)
+        {
+            charLookup = this.compileEnv.cCharLookupTable;
+        }
+        else
+        {
+            charLookup = this.compileEnv.dCharLookupTable;
+        }
    }

    /***********************
@ -306,6 +328,8 @@ class Lexer
        t.blockComment = null;
        t.lineComment = null;

+        size_t universalCharacterName4, universalCharacterName8;
+
        while (1)
        {
            t.ptr = p;
@ -395,10 +419,35 @@ class Lexer
                continue; // skip white space

            case '\\':
-                if (Ccompile && (p[1] == '\r' || p[1] == '\n'))
+                if (Ccompile)
                {
-                    ++p; // ignore \ followed by new line, like VC does
-                    continue;
+                    if (p[1] == '\r' || p[1] == '\n')
+                    {
+                        ++p; // ignore \ followed by new line, like VC does
+                        continue;
+                    }
+                    else if (p[1] == 'u')
+                    {
+                        // Universal Character Name (C) 2 byte
+                        // \uXXXX
+                        // let the main case handling for identifiers process this
+
+                        // case_indent will always increment, so subtract to prevent branching on the fast path
+                        p--;
+
+                        goto case_ident;
+                    }
+                    else if (p[1] == 'U')
+                    {
+                        // Universal Character Name (C) 4 byte
+                        // \UXXXXXXXX
+                        // let the main case handling for identifiers process this
+
+                        // case_indent will always increment, so subtract to prevent branching on the fast path
+                        p--;
+
+                        goto case_ident;
+                    }
                }
                goto default;

@ -586,23 +635,161 @@ class Lexer
            case '_':
            case_ident:
                {
-                    while (1)
+        IdentLoop: while (1)
                    {
+                        // If this is changed, change the decrement in C's universal character name code above
+                        // For syntax \uXXXX and \UXXXXXXXX
                        const c = *++p;
+
+                        // Is this the first character of the identifier
+                        // For the universal character name this will line up,
+                        //  for the main switch it won't since it wasn't the first,
+                        //  for the default it won't either because a decode increments.
+                        const isStartCharacter = t.ptr is p;
+
                        if (isidchar(c))
                            continue;
                        else if (c & 0x80)
                        {
                            const s = p;
                            const u = decodeUTF();
-                            if (isUniAlpha(u))
-                                continue;
-                            error(t.loc, "char 0x%04x not allowed in identifier", u);
+
+                            if (isStartCharacter)
+                            {
+                                if (charLookup.isStart(u))
+                                    continue;
+                                error(t.loc, "character 0x%04x is not allowed as a start character in an identifier", u);
+                            }
+                            else
+                            {
+                                if (charLookup.isContinue(u))
+                                    continue;
+                                error(t.loc, "character 0x%04x is not allowed as a continue character in an identifier", u);
+                            }
+
                            p = s;
                        }
+                        else if (Ccompile && c == '\\')
+                        {
+                            uint times;
+                            const s = p;
+                            p++;
+
+                            if (*p == 'u')
+                            {
+                                // Universal Character Name (C) 2 byte
+                                // \uXXXX
+                                p++;
+                                times = 4;
+                            }
+                            else if (*p == 'U')
+                            {
+                                // Universal Character Name (C) 4 byte
+                                // \UXXXXXXXX
+                                p++;
+                                times = 8;
+                            }
+                            else
+                            {
+                                error(t.loc, "char 0x%x is not allowed to follow '\\' expecting a C universal character name in format \\uXXXX or \\UXXXXXXXX with hex digits instead of X with invalid u/U", *p);
+                                p = s;
+                                break;
+                            }
+
+                            foreach(_; 0 .. times)
+                            {
+                                const hc = *p;
+                                p++;
+
+                                if ((hc >= '0' && hc <= '9') || (hc >= 'a' && hc <= 'f') || (hc >= 'A' && hc <= 'F'))
+                                    continue;
+
+                                error(t.loc, "char 0x%x is not allowed to follow '\\' expecting a C universal character name in format \\uXXXX or \\UXXXXXXXX with hex digits instead of X with invalid hex digit", hc);
+                                p = s;
+                                break IdentLoop;
+                            }
+
+                            continue;
+                        }
                        break;
                    }
-                    Identifier id = Identifier.idPool((cast(char*)t.ptr)[0 .. p - t.ptr], false);
+
+                    Identifier id;
+
+                    if (universalCharacterName4 > 0 || universalCharacterName8 > 0)
+                    {
+                        auto priorValidation = t.ptr[0 .. p - t.ptr];
+                        const(char)* priorVPtr = priorValidation.ptr;
+                        const possibleLength = (
+                            priorValidation.length - (
+                                (universalCharacterName4 * 6) +
+                                (universalCharacterName8 * 10)
+                            )) + (
+                                (universalCharacterName4 * 3) +
+                                (universalCharacterName8 * 4)
+                            );
+
+                        char[64] buffer = void;
+                        SmallBuffer!char sb = SmallBuffer!char(possibleLength, buffer[]);
+
+                        char[] storage = sb.extent;
+                        size_t offset;
+
+                        while(priorVPtr < &priorValidation[$-1] + 1)
+                        {
+                            if (*priorVPtr == '\\')
+                            {
+                                dchar tempDchar = 0;
+                                uint times;
+
+                                // universal character name (C)
+                                if (priorVPtr[1] == 'u')
+                                    times = 4;
+                                else if (priorVPtr[1] == 'U')
+                                    times = 8;
+                                else
+                                    assert(0, "ICE: Universal character name is 2 or 4 bytes only");
+                                priorVPtr += 2;
+
+                                foreach(_; 0 .. times)
+                                {
+                                    char c = *++priorVPtr;
+                                    if (c >= '0' && c <= '9')
+                                        c -= '0';
+                                    else if (c >= 'a' && c <= 'f')
+                                        c -= 'a' - 10;
+                                    else if (c >= 'A' && c <= 'F')
+                                        c -= 'A' - 10;
+
+                                    tempDchar <<= 4;
+                                    tempDchar |= c;
+                                }
+
+                                utf_encodeChar(&storage[offset], tempDchar);
+                                offset += utf_codeLengthChar(tempDchar);
+
+                                // Could be an error instead of a warning,
+                                //  but hey it was written specifically so why worry?
+                                if (priorVPtr is priorValidation.ptr)
+                                {
+                                    if (!charLookup.isStart(tempDchar))
+                                        warning(t.loc, "char 0x%x is not allowed start character for an identifier", tempDchar);
+                                }
+                                else
+                                {
+                                    if (!charLookup.isContinue(tempDchar))
+                                        warning(t.loc, "char 0x%x is not allowed continue character for an identifier", tempDchar);
+                                }
+                            }
+                            else
+                                storage[offset++] = *++priorVPtr;
+                        }
+
+                        id = Identifier.idPool(storage[0 .. offset], false);
+                    }
+                    else
+                        id = Identifier.idPool((cast(char*)t.ptr)[0 .. p - t.ptr], false);
+
                    t.ident = id;
                    t.value = cast(TOK)id.getValue();

@ -1174,9 +1361,11 @@ class Lexer
                    if (c & 0x80)
                    {
                        c = decodeUTF();
-                        // Check for start of unicode identifier
-                        if (isUniAlpha(c))
+
+                        // Check for start of an identifier
+                        if (charLookup.isStart(c))
                            goto case_ident;
+
                        if (c == PS || c == LS)
                        {
                            endOfLine();
@ -1688,7 +1877,7 @@ class Lexer
                    delimright = ']';
                else if (c == '<')
                    delimright = '>';
-                else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c)))
+                else if (isalpha(c) || c == '_' || (c >= 0x80 && charLookup.isStart(c)))
                {
                    // Start of identifier; must be a heredoc
                    Token tok;
@ -1736,7 +1925,9 @@ class Lexer
                }
                else if (c == delimright)
                    goto Ldone;
-                if (startline && (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c))) && hereid)
+
+                // we're looking for a new identifier token
+                if (startline && (isalpha(c) || c == '_' || (c >= 0x80 && charLookup.isStart(c))) && hereid)
                {
                    Token tok;
                    auto psave = p;
@ -2988,6 +3179,11 @@ class Lexer
        eSink.deprecation(loc, format, args);
    }

+    void warning(T...)(const ref Loc loc, const(char)* format, T args)
+    {
+        eSink.warning(loc, format, args);
+    }
+
    void deprecation(T...)(const(char)* format, T args)
    {
        eSink.deprecation(token.loc, format, args);
@ -3416,124 +3612,6 @@ class Lexer
    }
 }

-
-/******************************* Private *****************************************/
-
-private:
-
-private enum LS = 0x2028;       // UTF line separator
-private enum PS = 0x2029;       // UTF paragraph separator
-
-/********************************************
- * Do our own char maps
- */
-private static immutable cmtable = ()
-{
-    ubyte[256] table;
-    foreach (const c; 0 .. table.length)
-    {
-        if ('0' <= c && c <= '7')
-            table[c] |= CMoctal;
-        if (c_isxdigit(c))
-            table[c] |= CMhex;
-        if (c_isalnum(c) || c == '_')
-            table[c] |= CMidchar;
-
-        switch (c)
-        {
-            case 'x': case 'X':
-            case 'b': case 'B':
-                table[c] |= CMzerosecond;
-                break;
-
-            case '0': .. case '9':
-            case 'e': case 'E':
-            case 'f': case 'F':
-            case 'l': case 'L':
-            case 'p': case 'P':
-            case 'u': case 'U':
-            case 'i':
-            case '.':
-            case '_':
-                table[c] |= CMzerosecond | CMdigitsecond;
-                break;
-
-            default:
-                break;
-        }
-
-        switch (c)
-        {
-            case '\\':
-            case '\n':
-            case '\r':
-            case 0:
-            case 0x1A:
-            case '\'':
-                break;
-            default:
-                if (!(c & 0x80))
-                    table[c] |= CMsinglechar;
-                break;
-        }
-    }
-    return table;
-}();
-
-private
-{
-    enum CMoctal  = 0x1;
-    enum CMhex    = 0x2;
-    enum CMidchar = 0x4;
-    enum CMzerosecond = 0x8;
-    enum CMdigitsecond = 0x10;
-    enum CMsinglechar = 0x20;
-}
-
-private bool isoctal(const char c) pure @nogc @safe
-{
-    return (cmtable[c] & CMoctal) != 0;
-}
-
-private bool ishex(const char c) pure @nogc @safe
-{
-    return (cmtable[c] & CMhex) != 0;
-}
-
-private bool isidchar(const char c) pure @nogc @safe
-{
-    return (cmtable[c] & CMidchar) != 0;
-}
-
-private bool isZeroSecond(const char c) pure @nogc @safe
-{
-    return (cmtable[c] & CMzerosecond) != 0;
-}
-
-private bool isDigitSecond(const char c) pure @nogc @safe
-{
-    return (cmtable[c] & CMdigitsecond) != 0;
-}
-
-private bool issinglechar(const char c) pure @nogc @safe
-{
-    return (cmtable[c] & CMsinglechar) != 0;
-}
-
-private bool c_isxdigit(const int c) pure @nogc @safe
-{
-    return (( c >= '0' && c <= '9') ||
-            ( c >= 'a' && c <= 'f') ||
-            ( c >= 'A' && c <= 'F'));
-}
-
-private bool c_isalnum(const int c) pure @nogc @safe
-{
-    return (( c >= '0' && c <= '9') ||
-            ( c >= 'a' && c <= 'z') ||
-            ( c >= 'A' && c <= 'Z'));
-}
-
 /******************************* Unittest *****************************************/

 unittest
--- a/compiler/src/dmd/main.d
+++ b/compiler/src/dmd/main.d
@ -157,6 +157,8 @@ private:
 */
 private int tryMain(size_t argc, const(char)** argv, ref Param params)
 {
+    import dmd.common.charactertables;
+
    Strings files;
    Strings libmodules;
    global._init();
@ -168,6 +170,52 @@ private int tryMain(size_t argc, const(char)** argv, ref Param params)
    global.compileEnv.previewIn        = global.params.previewIn;
    global.compileEnv.ddocOutput       = global.params.ddoc.doOutput;

+    final switch(global.params.cIdentifierTable)
+    {
+        case CLIIdentifierTable.C99:
+            global.compileEnv.cCharLookupTable = IdentifierCharLookup.forTable(IdentifierTable.C99);
+            break;
+
+        case CLIIdentifierTable.C11:
+        case CLIIdentifierTable.default_:
+            // ImportC is defined against C11, not C23.
+            // If it was C23 this needs to be changed to UAX31 instead.
+            global.compileEnv.cCharLookupTable = IdentifierCharLookup.forTable(IdentifierTable.C11);
+            break;
+
+        case CLIIdentifierTable.UAX31:
+            global.compileEnv.cCharLookupTable = IdentifierCharLookup.forTable(IdentifierTable.UAX31);
+            break;
+
+        case CLIIdentifierTable.All:
+            global.compileEnv.cCharLookupTable = IdentifierCharLookup.forTable(IdentifierTable.LR);
+            break;
+    }
+
+    final switch(global.params.dIdentifierTable)
+    {
+        case CLIIdentifierTable.C99:
+            global.compileEnv.dCharLookupTable = IdentifierCharLookup.forTable(IdentifierTable.C99);
+            break;
+
+        case CLIIdentifierTable.C11:
+            global.compileEnv.dCharLookupTable = IdentifierCharLookup.forTable(IdentifierTable.C11);
+            break;
+
+        case CLIIdentifierTable.UAX31:
+            global.compileEnv.dCharLookupTable = IdentifierCharLookup.forTable(IdentifierTable.UAX31);
+            break;
+
+        case CLIIdentifierTable.All:
+        case CLIIdentifierTable.default_:
+            // @@@DEPRECATED_2.119@@@
+            // Change the default to UAX31,
+            //  this is a breaking change as C99 (what D used for ~23 years),
+            //  has characters that are not in UAX31.
+            global.compileEnv.dCharLookupTable = IdentifierCharLookup.forTable(IdentifierTable.LR);
+            break;
+    }
+
    if (params.help.usage)
    {
        usage();
--- a/compiler/src/dmd/mars.d
+++ b/compiler/src/dmd/mars.d
@ -1383,6 +1383,58 @@ bool parseCommandLine(const ref Strings arguments, const size_t argc, ref Param
            params.useInline = true;
            params.dihdr.fullOutput = true;
        }
+        else if (startsWith(p + 1, "identifiers-importc"))
+        {
+            enum len = "-identifiers-importc=".length;
+            // Parse:
+            //      -identifiers=table
+            immutable string msg = "Only `UAX31`, `c99`, `c11`, `all`, allowed for `-identifiers-importc`";
+            if (Identifier.isValidIdentifier(p + len))
+            {
+                const ident = p + len;
+                switch (ident.toDString())
+                {
+                    case "c99":     params.cIdentifierTable = CLIIdentifierTable.C99;   break;
+                    case "c11":     params.cIdentifierTable = CLIIdentifierTable.C11;   break;
+                    case "UAX31":   params.cIdentifierTable = CLIIdentifierTable.UAX31; break;
+                    case "all":     params.cIdentifierTable = CLIIdentifierTable.All;   break;
+                    default:
+                        errorInvalidSwitch(p, msg);
+                        return false;
+                }
+            }
+            else
+            {
+                errorInvalidSwitch(p, msg);
+                return false;
+            }
+        }
+        else if (startsWith(p + 1, "identifiers"))
+        {
+            enum len = "-identifiers=".length;
+            // Parse:
+            //      -identifiers=table
+            immutable string msg = "Only `UAX31`, `c99`, `c11`, `all`, allowed for `-identifiers`";
+            if (Identifier.isValidIdentifier(p + len))
+            {
+                const ident = p + len;
+                switch (ident.toDString())
+                {
+                    case "c99":     params.dIdentifierTable = CLIIdentifierTable.C99;   break;
+                    case "c11":     params.dIdentifierTable = CLIIdentifierTable.C11;   break;
+                    case "UAX31":   params.dIdentifierTable = CLIIdentifierTable.UAX31; break;
+                    case "all":     params.dIdentifierTable = CLIIdentifierTable.All;   break;
+                    default:
+                        errorInvalidSwitch(p, msg);
+                        return false;
+                }
+            }
+            else
+            {
+                errorInvalidSwitch(p, msg);
+                return false;
+            }
+        }
        else if (arg == "-i")
            includeImports = true;
        else if (startsWith(p + 1, "i="))
--- a/compiler/src/dmd/pragmasem.d
+++ b/compiler/src/dmd/pragmasem.d
@ -67,6 +67,8 @@ void pragmaDeclSemantic(PragmaDeclaration pd, Scope* sc)
        }
        version (all)
        {
+            import dmd.common.charactertables;
+
            /* Note: D language specification should not have any assumption about backend
             * implementation. Ideally pragma(mangle) can accept a string of any content.
             *
@ -94,7 +96,7 @@ void pragmaDeclSemantic(PragmaDeclaration pd, Scope* sc)
                    .error(pd.loc, "%s `%s` %.*s", pd.kind, pd.toPrettyChars, cast(int)msg.length, msg.ptr);
                    break;
                }
-                if (!isUniAlpha(c))
+                if (!isAnyIdentifierCharacter(c))
                {
                    .error(pd.loc, "%s `%s` char `0x%04x` not allowed in mangled name", pd.kind, pd.toPrettyChars, c);
                    break;
--- a/compiler/src/dmd/root/utf.d
+++ b/compiler/src/dmd/root/utf.d
@ -27,281 +27,6 @@ bool utf_isValidDchar(dchar c)
    return false;
 }

-/*******************************
- * Return !=0 if unicode alpha.
- * Use table from C99 Appendix D.
- */
-bool isUniAlpha(dchar c)
-{
-    static immutable wchar[2][] ALPHA_TABLE =
-    [
-        [0x00AA, 0x00AA],
-        [0x00B5, 0x00B5],
-        [0x00B7, 0x00B7],
-        [0x00BA, 0x00BA],
-        [0x00C0, 0x00D6],
-        [0x00D8, 0x00F6],
-        [0x00F8, 0x01F5],
-        [0x01FA, 0x0217],
-        [0x0250, 0x02A8],
-        [0x02B0, 0x02B8],
-        [0x02BB, 0x02BB],
-        [0x02BD, 0x02C1],
-        [0x02D0, 0x02D1],
-        [0x02E0, 0x02E4],
-        [0x037A, 0x037A],
-        [0x0386, 0x0386],
-        [0x0388, 0x038A],
-        [0x038C, 0x038C],
-        [0x038E, 0x03A1],
-        [0x03A3, 0x03CE],
-        [0x03D0, 0x03D6],
-        [0x03DA, 0x03DA],
-        [0x03DC, 0x03DC],
-        [0x03DE, 0x03DE],
-        [0x03E0, 0x03E0],
-        [0x03E2, 0x03F3],
-        [0x0401, 0x040C],
-        [0x040E, 0x044F],
-        [0x0451, 0x045C],
-        [0x045E, 0x0481],
-        [0x0490, 0x04C4],
-        [0x04C7, 0x04C8],
-        [0x04CB, 0x04CC],
-        [0x04D0, 0x04EB],
-        [0x04EE, 0x04F5],
-        [0x04F8, 0x04F9],
-        [0x0531, 0x0556],
-        [0x0559, 0x0559],
-        [0x0561, 0x0587],
-        [0x05B0, 0x05B9],
-        [0x05BB, 0x05BD],
-        [0x05BF, 0x05BF],
-        [0x05C1, 0x05C2],
-        [0x05D0, 0x05EA],
-        [0x05F0, 0x05F2],
-        [0x0621, 0x063A],
-        [0x0640, 0x0652],
-        [0x0660, 0x0669],
-        [0x0670, 0x06B7],
-        [0x06BA, 0x06BE],
-        [0x06C0, 0x06CE],
-        [0x06D0, 0x06DC],
-        [0x06E5, 0x06E8],
-        [0x06EA, 0x06ED],
-        [0x06F0, 0x06F9],
-        [0x0901, 0x0903],
-        [0x0905, 0x0939],
-        [0x093D, 0x094D],
-        [0x0950, 0x0952],
-        [0x0958, 0x0963],
-        [0x0966, 0x096F],
-        [0x0981, 0x0983],
-        [0x0985, 0x098C],
-        [0x098F, 0x0990],
-        [0x0993, 0x09A8],
-        [0x09AA, 0x09B0],
-        [0x09B2, 0x09B2],
-        [0x09B6, 0x09B9],
-        [0x09BE, 0x09C4],
-        [0x09C7, 0x09C8],
-        [0x09CB, 0x09CD],
-        [0x09DC, 0x09DD],
-        [0x09DF, 0x09E3],
-        [0x09E6, 0x09F1],
-        [0x0A02, 0x0A02],
-        [0x0A05, 0x0A0A],
-        [0x0A0F, 0x0A10],
-        [0x0A13, 0x0A28],
-        [0x0A2A, 0x0A30],
-        [0x0A32, 0x0A33],
-        [0x0A35, 0x0A36],
-        [0x0A38, 0x0A39],
-        [0x0A3E, 0x0A42],
-        [0x0A47, 0x0A48],
-        [0x0A4B, 0x0A4D],
-        [0x0A59, 0x0A5C],
-        [0x0A5E, 0x0A5E],
-        [0x0A66, 0x0A6F],
-        [0x0A74, 0x0A74],
-        [0x0A81, 0x0A83],
-        [0x0A85, 0x0A8B],
-        [0x0A8D, 0x0A8D],
-        [0x0A8F, 0x0A91],
-        [0x0A93, 0x0AA8],
-        [0x0AAA, 0x0AB0],
-        [0x0AB2, 0x0AB3],
-        [0x0AB5, 0x0AB9],
-        [0x0ABD, 0x0AC5],
-        [0x0AC7, 0x0AC9],
-        [0x0ACB, 0x0ACD],
-        [0x0AD0, 0x0AD0],
-        [0x0AE0, 0x0AE0],
-        [0x0AE6, 0x0AEF],
-        [0x0B01, 0x0B03],
-        [0x0B05, 0x0B0C],
-        [0x0B0F, 0x0B10],
-        [0x0B13, 0x0B28],
-        [0x0B2A, 0x0B30],
-        [0x0B32, 0x0B33],
-        [0x0B36, 0x0B39],
-        [0x0B3D, 0x0B43],
-        [0x0B47, 0x0B48],
-        [0x0B4B, 0x0B4D],
-        [0x0B5C, 0x0B5D],
-        [0x0B5F, 0x0B61],
-        [0x0B66, 0x0B6F],
-        [0x0B82, 0x0B83],
-        [0x0B85, 0x0B8A],
-        [0x0B8E, 0x0B90],
-        [0x0B92, 0x0B95],
-        [0x0B99, 0x0B9A],
-        [0x0B9C, 0x0B9C],
-        [0x0B9E, 0x0B9F],
-        [0x0BA3, 0x0BA4],
-        [0x0BA8, 0x0BAA],
-        [0x0BAE, 0x0BB5],
-        [0x0BB7, 0x0BB9],
-        [0x0BBE, 0x0BC2],
-        [0x0BC6, 0x0BC8],
-        [0x0BCA, 0x0BCD],
-        [0x0BE7, 0x0BEF],
-        [0x0C01, 0x0C03],
-        [0x0C05, 0x0C0C],
-        [0x0C0E, 0x0C10],
-        [0x0C12, 0x0C28],
-        [0x0C2A, 0x0C33],
-        [0x0C35, 0x0C39],
-        [0x0C3E, 0x0C44],
-        [0x0C46, 0x0C48],
-        [0x0C4A, 0x0C4D],
-        [0x0C60, 0x0C61],
-        [0x0C66, 0x0C6F],
-        [0x0C82, 0x0C83],
-        [0x0C85, 0x0C8C],
-        [0x0C8E, 0x0C90],
-        [0x0C92, 0x0CA8],
-        [0x0CAA, 0x0CB3],
-        [0x0CB5, 0x0CB9],
-        [0x0CBE, 0x0CC4],
-        [0x0CC6, 0x0CC8],
-        [0x0CCA, 0x0CCD],
-        [0x0CDE, 0x0CDE],
-        [0x0CE0, 0x0CE1],
-        [0x0CE6, 0x0CEF],
-        [0x0D02, 0x0D03],
-        [0x0D05, 0x0D0C],
-        [0x0D0E, 0x0D10],
-        [0x0D12, 0x0D28],
-        [0x0D2A, 0x0D39],
-        [0x0D3E, 0x0D43],
-        [0x0D46, 0x0D48],
-        [0x0D4A, 0x0D4D],
-        [0x0D60, 0x0D61],
-        [0x0D66, 0x0D6F],
-        [0x0E01, 0x0E3A],
-        [0x0E40, 0x0E5B],
-        [0x0E81, 0x0E82],
-        [0x0E84, 0x0E84],
-        [0x0E87, 0x0E88],
-        [0x0E8A, 0x0E8A],
-        [0x0E8D, 0x0E8D],
-        [0x0E94, 0x0E97],
-        [0x0E99, 0x0E9F],
-        [0x0EA1, 0x0EA3],
-        [0x0EA5, 0x0EA5],
-        [0x0EA7, 0x0EA7],
-        [0x0EAA, 0x0EAB],
-        [0x0EAD, 0x0EAE],
-        [0x0EB0, 0x0EB9],
-        [0x0EBB, 0x0EBD],
-        [0x0EC0, 0x0EC4],
-        [0x0EC6, 0x0EC6],
-        [0x0EC8, 0x0ECD],
-        [0x0ED0, 0x0ED9],
-        [0x0EDC, 0x0EDD],
-        [0x0F00, 0x0F00],
-        [0x0F18, 0x0F19],
-        [0x0F20, 0x0F33],
-        [0x0F35, 0x0F35],
-        [0x0F37, 0x0F37],
-        [0x0F39, 0x0F39],
-        [0x0F3E, 0x0F47],
-        [0x0F49, 0x0F69],
-        [0x0F71, 0x0F84],
-        [0x0F86, 0x0F8B],
-        [0x0F90, 0x0F95],
-        [0x0F97, 0x0F97],
-        [0x0F99, 0x0FAD],
-        [0x0FB1, 0x0FB7],
-        [0x0FB9, 0x0FB9],
-        [0x10A0, 0x10C5],
-        [0x10D0, 0x10F6],
-        [0x1E00, 0x1E9B],
-        [0x1EA0, 0x1EF9],
-        [0x1F00, 0x1F15],
-        [0x1F18, 0x1F1D],
-        [0x1F20, 0x1F45],
-        [0x1F48, 0x1F4D],
-        [0x1F50, 0x1F57],
-        [0x1F59, 0x1F59],
-        [0x1F5B, 0x1F5B],
-        [0x1F5D, 0x1F5D],
-        [0x1F5F, 0x1F7D],
-        [0x1F80, 0x1FB4],
-        [0x1FB6, 0x1FBC],
-        [0x1FBE, 0x1FBE],
-        [0x1FC2, 0x1FC4],
-        [0x1FC6, 0x1FCC],
-        [0x1FD0, 0x1FD3],
-        [0x1FD6, 0x1FDB],
-        [0x1FE0, 0x1FEC],
-        [0x1FF2, 0x1FF4],
-        [0x1FF6, 0x1FFC],
-        [0x203F, 0x2040],
-        [0x207F, 0x207F],
-        [0x2102, 0x2102],
-        [0x2107, 0x2107],
-        [0x210A, 0x2113],
-        [0x2115, 0x2115],
-        [0x2118, 0x211D],
-        [0x2124, 0x2124],
-        [0x2126, 0x2126],
-        [0x2128, 0x2128],
-        [0x212A, 0x2131],
-        [0x2133, 0x2138],
-        [0x2160, 0x2182],
-        [0x3005, 0x3007],
-        [0x3021, 0x3029],
-        [0x3041, 0x3093],
-        [0x309B, 0x309C],
-        [0x30A1, 0x30F6],
-        [0x30FB, 0x30FC],
-        [0x3105, 0x312C],
-        [0x4E00, 0x9FA5],
-        [0xAC00, 0xD7A3]
-    ];
-
-    size_t high = ALPHA_TABLE.length - 1;
-    // Shortcut search if c is out of range
-    size_t low = (c < ALPHA_TABLE[0][0] || ALPHA_TABLE[high][1] < c) ? high + 1 : 0;
-    // Binary search
-    while (low <= high)
-    {
-        const size_t mid = low + ((high - low) >> 1);
-        if (c < ALPHA_TABLE[mid][0])
-            high = mid - 1;
-        else if (ALPHA_TABLE[mid][1] < c)
-            low = mid + 1;
-        else
-        {
-            assert(ALPHA_TABLE[mid][0] <= c && c <= ALPHA_TABLE[mid][1]);
-            return true;
-        }
-    }
-    return false;
-}
-
 /**
 * Returns the code length of c in code units.
 */
--- a/compiler/test/compilable/ident_UAX31.c
+++ b/compiler/test/compilable/ident_UAX31.c
@ -0,0 +1,9 @@
+// REQUIRED_ARGS: -identifiers-importc=UAX31
+
+// sppn doesn't support anything newer than c99
+// DISABLED: win32omf
+
+// verify that the UAX31 identifier set is applied.
+
+int \u00F8ide\u00F9nt;
+int øideùnt2;
--- a/compiler/test/compilable/ident_UAX31.d
+++ b/compiler/test/compilable/ident_UAX31.d
@ -0,0 +1,5 @@
+// REQUIRED_ARGS: -identifiers=UAX31
+
+// verify that the UAX31 identifier set is applied.
+
+int øideùnt;
--- a/compiler/test/compilable/ident_all.c
+++ b/compiler/test/compilable/ident_all.c
@ -0,0 +1,11 @@
+// REQUIRED_ARGS: -identifiers-importc=all
+
+// sppn doesn't support anything newer than c99
+// DISABLED: win32omf
+
+// verify that the All identifier set is applied.
+
+int \u00F8ide\u00F9nt;
+int \u00AAide\u00B5nt;
+int \u00A8ide\u00AFnt;
+int \u00F8ide\u00F9nt;
--- a/compiler/test/compilable/ident_all.d
+++ b/compiler/test/compilable/ident_all.d
@ -0,0 +1,10 @@
+// REQUIRED_ARGS: -identifiers=all
+
+// verify that the UAX31 identifier set is applied.
+
+int øideùnt;
+int ªideµnt;
+int ¨ide¯nt;
+
+// just to play it safe, do we support one unicode then another at start?
+int øùident;
--- a/compiler/test/compilable/ident_c11.c
+++ b/compiler/test/compilable/ident_c11.c
@ -0,0 +1,9 @@
+// REQUIRED_ARGS: -identifiers-importc=c11
+
+// sppn doesn't support anything newer than c99
+// DISABLED: win32omf
+
+// verify that the C11 identifier set is applied.
+
+int \u00A8ide\u00AFnt;
+int ¨ide¯nt;
--- a/compiler/test/compilable/ident_c11.d
+++ b/compiler/test/compilable/ident_c11.d
@ -0,0 +1,5 @@
+// REQUIRED_ARGS: -identifiers=c11
+
+// verify that the C11 identifier set is applied.
+
+int ¨ide¯nt;
--- a/compiler/test/compilable/ident_c99.c
+++ b/compiler/test/compilable/ident_c99.c
@ -0,0 +1,6 @@
+// REQUIRED_ARGS: -identifiers-importc=c99
+
+// verify that the C99 identifier set is applied.
+
+int \u00AAide\u00B5nt;
+int ªideµnt2;
--- a/compiler/test/compilable/ident_c99.d
+++ b/compiler/test/compilable/ident_c99.d
@ -0,0 +1,5 @@
+// REQUIRED_ARGS: -identifiers=c99
+
+// verify that the C99 identifier set is applied.
+
+int ªideµnt;
--- a/compiler/test/fail_compilation/lexer23465.d
+++ b/compiler/test/fail_compilation/lexer23465.d
@ -1,7 +1,7 @@
 /*
 TEST_OUTPUT:
 ---
-fail_compilation/lexer23465.d(19): Error: char 0x1f37a not allowed in identifier
+fail_compilation/lexer23465.d(19): Error: character 0x1f37a is not allowed as a continue character in an identifier
 fail_compilation/lexer23465.d(19): Error: character 0x1f37a is not a valid token
 fail_compilation/lexer23465.d(20): Error: character '\' is not a valid token
 fail_compilation/lexer23465.d(21): Error: unterminated /+ +/ comment
--- a/compiler/tools/unicode_tables/derivedCoreProperties.d
+++ b/compiler/tools/unicode_tables/derivedCoreProperties.d
@ -0,0 +1,77 @@
+/**
+This module parses the UCD DerivedCoreProperties.txt file.
+
+Copyright:   Copyright (C) 1999-2024 by The D Language Foundation, All Rights Reserved
+Authors:     $(LINK2 https://cattermole.co.nz, Richard (Rikki) Andrew Cattermole
+License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
+*/
+module unicode_tables.derivedCoreProperties;
+import unicode_tables.util;
+
+ValueRanges propertyXID_StartRanges, propertyXID_ContinueRanges;
+
+void parseProperties(string dataFile)
+{
+    import std.algorithm : countUntil, startsWith;
+    import std.file : readText;
+    import std.string : lineSplitter, strip, split;
+    import std.conv : parse;
+
+    foreach (line; readText(dataFile).lineSplitter)
+    {
+        {
+            // handle end of line comment
+            ptrdiff_t offset = line.countUntil('#');
+            if (offset >= 0)
+                line = line[0 .. offset];
+            line = line.strip;
+        }
+
+        string[] fields = line.split(";");
+        {
+            foreach (ref field; fields)
+            {
+                field = field.strip;
+            }
+
+            if (fields.length == 0)
+            {
+                continue;
+            }
+            else if (fields.length != 2)
+            {
+                continue;
+            }
+        }
+
+        ValueRange range;
+
+        {
+            range.start = parse!uint(fields[0], 16);
+
+            if (fields[0].startsWith(".."))
+            {
+                fields[0] = fields[0][2 .. $];
+                range.end = parse!uint(fields[0], 16);
+            }
+            else
+            {
+                range.end = range.start;
+            }
+        }
+
+        switch (fields[1])
+        {
+            case "XID_Start":
+                propertyXID_StartRanges.add(range);
+                break;
+
+            case "XID_Continue":
+                propertyXID_ContinueRanges.add(range);
+                break;
+
+            default:
+                break;
+        }
+    }
+}
--- a/compiler/tools/unicode_tables/fixedtables.d
+++ b/compiler/tools/unicode_tables/fixedtables.d
@ -0,0 +1,165 @@
+/**
+Known fixed tables.
+
+Copyright:   Copyright (C) 1999-2024 by The D Language Foundation, All Rights Reserved
+Authors:     $(LINK2 https://cattermole.co.nz, Richard (Rikki) Andrew Cattermole)
+License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
+*/
+module unicode_tables.fixedtables;
+import unicode_tables.util;
+
+immutable ValueRanges ASCII_Table = ValueRanges([
+    ValueRange(0, 127)
+]);
+
+immutable ValueRanges c99_Table = ValueRanges([
+    ValueRange(0x00AA, 0x00AA), ValueRange(0x00B5, 0x00B5),
+    ValueRange(0x00B7, 0x00B7), ValueRange(0x00BA, 0x00BA),
+    ValueRange(0x00C0, 0x00D6), ValueRange(0x00D8, 0x00F6),
+    ValueRange(0x00F8, 0x01F5), ValueRange(0x01FA, 0x0217),
+    ValueRange(0x0250, 0x02A8), ValueRange(0x02B0, 0x02B8),
+    ValueRange(0x02BB, 0x02BB), ValueRange(0x02BD, 0x02C1),
+    ValueRange(0x02D0, 0x02D1), ValueRange(0x02E0, 0x02E4),
+    ValueRange(0x037A, 0x037A), ValueRange(0x0386, 0x0386),
+    ValueRange(0x0388, 0x038A), ValueRange(0x038C, 0x038C),
+    ValueRange(0x038E, 0x03A1), ValueRange(0x03A3, 0x03CE),
+    ValueRange(0x03D0, 0x03D6), ValueRange(0x03DA, 0x03DA),
+    ValueRange(0x03DC, 0x03DC), ValueRange(0x03DE, 0x03DE),
+    ValueRange(0x03E0, 0x03E0), ValueRange(0x03E2, 0x03F3),
+    ValueRange(0x0401, 0x040C), ValueRange(0x040E, 0x044F),
+    ValueRange(0x0451, 0x045C), ValueRange(0x045E, 0x0481),
+    ValueRange(0x0490, 0x04C4), ValueRange(0x04C7, 0x04C8),
+    ValueRange(0x04CB, 0x04CC), ValueRange(0x04D0, 0x04EB),
+    ValueRange(0x04EE, 0x04F5), ValueRange(0x04F8, 0x04F9),
+    ValueRange(0x0531, 0x0556), ValueRange(0x0559, 0x0559),
+    ValueRange(0x0561, 0x0587), ValueRange(0x05B0, 0x05B9),
+    ValueRange(0x05BB, 0x05BD), ValueRange(0x05BF, 0x05BF),
+    ValueRange(0x05C1, 0x05C2), ValueRange(0x05D0, 0x05EA),
+    ValueRange(0x05F0, 0x05F2), ValueRange(0x0621, 0x063A),
+    ValueRange(0x0640, 0x0652), ValueRange(0x0660, 0x0669),
+    ValueRange(0x0670, 0x06B7), ValueRange(0x06BA, 0x06BE),
+    ValueRange(0x06C0, 0x06CE), ValueRange(0x06D0, 0x06DC),
+    ValueRange(0x06E5, 0x06E8), ValueRange(0x06EA, 0x06ED),
+    ValueRange(0x06F0, 0x06F9), ValueRange(0x0901, 0x0903),
+    ValueRange(0x0905, 0x0939), ValueRange(0x093D, 0x094D),
+    ValueRange(0x0950, 0x0952), ValueRange(0x0958, 0x0963),
+    ValueRange(0x0966, 0x096F), ValueRange(0x0981, 0x0983),
+    ValueRange(0x0985, 0x098C), ValueRange(0x098F, 0x0990),
+    ValueRange(0x0993, 0x09A8), ValueRange(0x09AA, 0x09B0),
+    ValueRange(0x09B2, 0x09B2), ValueRange(0x09B6, 0x09B9),
+    ValueRange(0x09BE, 0x09C4), ValueRange(0x09C7, 0x09C8),
+    ValueRange(0x09CB, 0x09CD), ValueRange(0x09DC, 0x09DD),
+    ValueRange(0x09DF, 0x09E3), ValueRange(0x09E6, 0x09F1),
+    ValueRange(0x0A02, 0x0A02), ValueRange(0x0A05, 0x0A0A),
+    ValueRange(0x0A0F, 0x0A10), ValueRange(0x0A13, 0x0A28),
+    ValueRange(0x0A2A, 0x0A30), ValueRange(0x0A32, 0x0A33),
+    ValueRange(0x0A35, 0x0A36), ValueRange(0x0A38, 0x0A39),
+    ValueRange(0x0A3E, 0x0A42), ValueRange(0x0A47, 0x0A48),
+    ValueRange(0x0A4B, 0x0A4D), ValueRange(0x0A59, 0x0A5C),
+    ValueRange(0x0A5E, 0x0A5E), ValueRange(0x0A66, 0x0A6F),
+    ValueRange(0x0A74, 0x0A74), ValueRange(0x0A81, 0x0A83),
+    ValueRange(0x0A85, 0x0A8B), ValueRange(0x0A8D, 0x0A8D),
+    ValueRange(0x0A8F, 0x0A91), ValueRange(0x0A93, 0x0AA8),
+    ValueRange(0x0AAA, 0x0AB0), ValueRange(0x0AB2, 0x0AB3),
+    ValueRange(0x0AB5, 0x0AB9), ValueRange(0x0ABD, 0x0AC5),
+    ValueRange(0x0AC7, 0x0AC9), ValueRange(0x0ACB, 0x0ACD),
+    ValueRange(0x0AD0, 0x0AD0), ValueRange(0x0AE0, 0x0AE0),
+    ValueRange(0x0AE6, 0x0AEF), ValueRange(0x0B01, 0x0B03),
+    ValueRange(0x0B05, 0x0B0C), ValueRange(0x0B0F, 0x0B10),
+    ValueRange(0x0B13, 0x0B28), ValueRange(0x0B2A, 0x0B30),
+    ValueRange(0x0B32, 0x0B33), ValueRange(0x0B36, 0x0B39),
+    ValueRange(0x0B3D, 0x0B43), ValueRange(0x0B47, 0x0B48),
+    ValueRange(0x0B4B, 0x0B4D), ValueRange(0x0B5C, 0x0B5D),
+    ValueRange(0x0B5F, 0x0B61), ValueRange(0x0B66, 0x0B6F),
+    ValueRange(0x0B82, 0x0B83), ValueRange(0x0B85, 0x0B8A),
+    ValueRange(0x0B8E, 0x0B90), ValueRange(0x0B92, 0x0B95),
+    ValueRange(0x0B99, 0x0B9A), ValueRange(0x0B9C, 0x0B9C),
+    ValueRange(0x0B9E, 0x0B9F), ValueRange(0x0BA3, 0x0BA4),
+    ValueRange(0x0BA8, 0x0BAA), ValueRange(0x0BAE, 0x0BB5),
+    ValueRange(0x0BB7, 0x0BB9), ValueRange(0x0BBE, 0x0BC2),
+    ValueRange(0x0BC6, 0x0BC8), ValueRange(0x0BCA, 0x0BCD),
+    ValueRange(0x0BE7, 0x0BEF), ValueRange(0x0C01, 0x0C03),
+    ValueRange(0x0C05, 0x0C0C), ValueRange(0x0C0E, 0x0C10),
+    ValueRange(0x0C12, 0x0C28), ValueRange(0x0C2A, 0x0C33),
+    ValueRange(0x0C35, 0x0C39), ValueRange(0x0C3E, 0x0C44),
+    ValueRange(0x0C46, 0x0C48), ValueRange(0x0C4A, 0x0C4D),
+    ValueRange(0x0C60, 0x0C61), ValueRange(0x0C66, 0x0C6F),
+    ValueRange(0x0C82, 0x0C83), ValueRange(0x0C85, 0x0C8C),
+    ValueRange(0x0C8E, 0x0C90), ValueRange(0x0C92, 0x0CA8),
+    ValueRange(0x0CAA, 0x0CB3), ValueRange(0x0CB5, 0x0CB9),
+    ValueRange(0x0CBE, 0x0CC4), ValueRange(0x0CC6, 0x0CC8),
+    ValueRange(0x0CCA, 0x0CCD), ValueRange(0x0CDE, 0x0CDE),
+    ValueRange(0x0CE0, 0x0CE1), ValueRange(0x0CE6, 0x0CEF),
+    ValueRange(0x0D02, 0x0D03), ValueRange(0x0D05, 0x0D0C),
+    ValueRange(0x0D0E, 0x0D10), ValueRange(0x0D12, 0x0D28),
+    ValueRange(0x0D2A, 0x0D39), ValueRange(0x0D3E, 0x0D43),
+    ValueRange(0x0D46, 0x0D48), ValueRange(0x0D4A, 0x0D4D),
+    ValueRange(0x0D60, 0x0D61), ValueRange(0x0D66, 0x0D6F),
+    ValueRange(0x0E01, 0x0E3A), ValueRange(0x0E40, 0x0E5B),
+    ValueRange(0x0E81, 0x0E82), ValueRange(0x0E84, 0x0E84),
+    ValueRange(0x0E87, 0x0E88), ValueRange(0x0E8A, 0x0E8A),
+    ValueRange(0x0E8D, 0x0E8D), ValueRange(0x0E94, 0x0E97),
+    ValueRange(0x0E99, 0x0E9F), ValueRange(0x0EA1, 0x0EA3),
+    ValueRange(0x0EA5, 0x0EA5), ValueRange(0x0EA7, 0x0EA7),
+    ValueRange(0x0EAA, 0x0EAB), ValueRange(0x0EAD, 0x0EAE),
+    ValueRange(0x0EB0, 0x0EB9), ValueRange(0x0EBB, 0x0EBD),
+    ValueRange(0x0EC0, 0x0EC4), ValueRange(0x0EC6, 0x0EC6),
+    ValueRange(0x0EC8, 0x0ECD), ValueRange(0x0ED0, 0x0ED9),
+    ValueRange(0x0EDC, 0x0EDD), ValueRange(0x0F00, 0x0F00),
+    ValueRange(0x0F18, 0x0F19), ValueRange(0x0F20, 0x0F33),
+    ValueRange(0x0F35, 0x0F35), ValueRange(0x0F37, 0x0F37),
+    ValueRange(0x0F39, 0x0F39), ValueRange(0x0F3E, 0x0F47),
+    ValueRange(0x0F49, 0x0F69), ValueRange(0x0F71, 0x0F84),
+    ValueRange(0x0F86, 0x0F8B), ValueRange(0x0F90, 0x0F95),
+    ValueRange(0x0F97, 0x0F97), ValueRange(0x0F99, 0x0FAD),
+    ValueRange(0x0FB1, 0x0FB7), ValueRange(0x0FB9, 0x0FB9),
+    ValueRange(0x10A0, 0x10C5), ValueRange(0x10D0, 0x10F6),
+    ValueRange(0x1E00, 0x1E9B), ValueRange(0x1EA0, 0x1EF9),
+    ValueRange(0x1F00, 0x1F15), ValueRange(0x1F18, 0x1F1D),
+    ValueRange(0x1F20, 0x1F45), ValueRange(0x1F48, 0x1F4D),
+    ValueRange(0x1F50, 0x1F57), ValueRange(0x1F59, 0x1F59),
+    ValueRange(0x1F5B, 0x1F5B), ValueRange(0x1F5D, 0x1F5D),
+    ValueRange(0x1F5F, 0x1F7D), ValueRange(0x1F80, 0x1FB4),
+    ValueRange(0x1FB6, 0x1FBC), ValueRange(0x1FBE, 0x1FBE),
+    ValueRange(0x1FC2, 0x1FC4), ValueRange(0x1FC6, 0x1FCC),
+    ValueRange(0x1FD0, 0x1FD3), ValueRange(0x1FD6, 0x1FDB),
+    ValueRange(0x1FE0, 0x1FEC), ValueRange(0x1FF2, 0x1FF4),
+    ValueRange(0x1FF6, 0x1FFC), ValueRange(0x203F, 0x2040),
+    ValueRange(0x207F, 0x207F), ValueRange(0x2102, 0x2102),
+    ValueRange(0x2107, 0x2107), ValueRange(0x210A, 0x2113),
+    ValueRange(0x2115, 0x2115), ValueRange(0x2118, 0x211D),
+    ValueRange(0x2124, 0x2124), ValueRange(0x2126, 0x2126),
+    ValueRange(0x2128, 0x2128), ValueRange(0x212A, 0x2131),
+    ValueRange(0x2133, 0x2138), ValueRange(0x2160, 0x2182),
+    ValueRange(0x3005, 0x3007), ValueRange(0x3021, 0x3029),
+    ValueRange(0x3041, 0x3093), ValueRange(0x309B, 0x309C),
+    ValueRange(0x30A1, 0x30F6), ValueRange(0x30FB, 0x30FC),
+    ValueRange(0x3105, 0x312C), ValueRange(0x4E00, 0x9FA5),
+    ValueRange(0xAC00, 0xD7A3)
+]);
+
+immutable ValueRanges c11_Table = ValueRanges([
+    ValueRange(0x00A8, 0x00A8), ValueRange(0x00AA, 0x00AA),
+    ValueRange(0x00AD, 0x00AD), ValueRange(0x00AF,0x00AF),
+    ValueRange(0x00B2, 0x00B5), ValueRange(0x00B7, 0x00BA),
+    ValueRange(0x00BC, 0x00BE), ValueRange(0x00C0, 0x00D6),
+    ValueRange(0x00D8, 0x00F6), ValueRange(0x00F8, 0x00FF),
+    ValueRange(0x0100, 0x167F), ValueRange(0x1681, 0x180D),
+    ValueRange(0x180F, 0x1FFF), ValueRange(0x200B, 0x200D),
+    ValueRange(0x202A, 0x202E), ValueRange(0x203F, 0x2040),
+    ValueRange(0x2054, 0x2054), ValueRange(0x2060, 0x206F),
+    ValueRange(0x2070, 0x218F), ValueRange(0x2460, 0x24FF),
+    ValueRange(0x2776, 0x2793), ValueRange(0x2C00, 0x2DFF),
+    ValueRange(0x2E80, 0x2FFF), ValueRange(0x3004, 0x3007),
+    ValueRange(0x3021, 0x302F), ValueRange(0x3031, 0x303F),
+    ValueRange(0x3040, 0xD7FF), ValueRange(0xF900, 0xFD3D),
+    ValueRange(0xFD40, 0xFDCF), ValueRange(0xFDF0, 0xFE44),
+    ValueRange(0xFE47, 0xFFFD), ValueRange(0x10000, 0x1FFFD),
+    ValueRange(0x20000, 0x2FFFD), ValueRange(0x30000, 0x3FFFD),
+    ValueRange(0x40000, 0x4FFFD), ValueRange(0x50000, 0x5FFFD),
+    ValueRange(0x60000, 0x6FFFD), ValueRange(0x70000, 0x7FFFD),
+    ValueRange(0x80000, 0x8FFFD), ValueRange(0x90000, 0x9FFFD),
+    ValueRange(0xA0000, 0xAFFFD), ValueRange(0xB0000, 0xBFFFD),
+    ValueRange(0xC0000, 0xCFFFD), ValueRange(0xD0000, 0xDFFFD),
+    ValueRange(0xE0000, 0xEFFFD),
+]);
--- a/compiler/tools/unicode_tables/unicodeData.d
+++ b/compiler/tools/unicode_tables/unicodeData.d
@ -0,0 +1,184 @@
+/**
+This module parses the UCD UnicodeData.txt file.
+
+Copyright:   Copyright (C) 1999-2024 by The D Language Foundation, All Rights Reserved
+Authors:     $(LINK2 https://cattermole.co.nz, Richard (Rikki) Andrew Cattermole
+License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
+*/
+module unicode_tables.unicodeData;
+import unicode_tables.util;
+
+UDEntry[] udEntries;
+
+void parseUnicodeData(string dataFile)
+{
+    import std.algorithm : countUntil, endsWith;
+    import std.file : readText;
+    import std.string : lineSplitter, strip, split;
+    import std.conv : parse;
+
+    bool expectedRangeEnd, nextRangeEnd;
+
+    foreach (line; readText(dataFile).lineSplitter)
+    {
+        {
+            // handle end of line comment
+            ptrdiff_t offset = line.countUntil('#');
+            if (offset >= 0)
+                line = line[0 .. offset];
+            line = line.strip;
+        }
+
+        string[] fields = line.split(";");
+        {
+            foreach (ref field; fields)
+            {
+                field = field.strip;
+            }
+
+            if (fields.length == 0)
+            {
+                continue;
+            }
+            else if (fields.length != 15)
+            {
+                continue;
+            }
+        }
+
+        {
+            /+
+            How first field ranges are specified (the First, Last bit):
+            3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
+            4DBF;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
+            +/
+
+            if (fields[1].endsWith(">"))
+            {
+                if (fields[1].endsWith("First>"))
+                {
+                    nextRangeEnd = true;
+                }
+                else if (fields[1].endsWith("Last>"))
+                {
+                    assert(nextRangeEnd);
+                    nextRangeEnd = false;
+                    expectedRangeEnd = true;
+                }
+                else if (fields[1] == "<control>")
+                    {
+                        if (expectedRangeEnd)
+                        {
+                            nextRangeEnd = false;
+                            expectedRangeEnd = false;
+                            continue;
+                        }
+                    }
+                    else
+                    {
+                        continue;
+                    }
+            }
+            else if (expectedRangeEnd)
+            {
+                continue;
+            }
+        }
+
+        uint character = parse!uint(fields[0], 16);
+
+        if (expectedRangeEnd)
+        {
+            udEntries[$ - 1].range.end = character;
+            expectedRangeEnd = false;
+            continue;
+        }
+
+        {
+            UDEntry entry;
+            entry.range = ValueRange(character);
+
+            static foreach (GC; __traits(allMembers, GeneralCategory))
+            {
+                if (fields[2] == GC)
+                    entry.generalCategory = __traits(getMember, GeneralCategory, GC);
+            }
+
+            entry.canonicalCombiningClass = parse!int(fields[3]);
+
+            udEntries ~= entry;
+        }
+    }
+}
+
+struct UDEntry
+{
+    ValueRange range;
+    GeneralCategory generalCategory;
+    int canonicalCombiningClass;
+
+    @safe:
+
+    bool isStarter()
+    {
+        return canonicalCombiningClass == 0;
+    }
+
+    bool isAlpha()
+    {
+        switch (generalCategory)
+        {
+            case GeneralCategory.Lu:
+            case GeneralCategory.Ll:
+            case GeneralCategory.Lt:
+            case GeneralCategory.Lm:
+            case GeneralCategory.Lo:
+                return true;
+            default:
+                return false;
+        }
+    }
+}
+
+enum GeneralCategory
+{
+    None, ///
+    Lu, ///
+    Ll, ///
+    Lt, ///
+    LC, ///
+    Lm, ///
+    Lo, ///
+    L, ///
+    Mn, ///
+    Mc, ///
+    Me, ///
+    M, ///
+    Nd, ///
+    Nl, ///
+    No, ///
+    N, ///
+    Pc, ///
+    Pd, ///
+    Ps, ///
+    Pe, ///
+    Pi, ///
+    Pf, ///
+    Po, ///
+    P, ///
+    Sm, ///
+    Sc, ///
+    Sk, ///
+    So, ///
+    S, ///
+    Zs, ///
+    Zl, ///
+    Zp, ///
+    Z, ///
+    Cc, ///
+    Cf, ///
+    Cs, ///
+    Co, ///
+    Cn, ///
+    C, ///
+}
--- a/compiler/tools/unicode_tables/util.d
+++ b/compiler/tools/unicode_tables/util.d
@ -0,0 +1,145 @@
+/**
+Utilities for working with Unicode ranges.
+
+Copyright:   Copyright (C) 1999-2024 by The D Language Foundation, All Rights Reserved
+Authors:     $(LINK2 https://cattermole.co.nz, Richard (Rikki) Andrew Cattermole
+License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
+*/
+module unicode_tables.util;
+
+struct ValueRange
+{
+    dchar start, end;
+@safe:
+
+    this(dchar index)
+    {
+        this.start = index;
+        this.end = index;
+    }
+
+    this(dchar start, dchar end)
+    {
+        assert(end >= start);
+
+        this.start = start;
+        this.end = end;
+    }
+
+    bool isSingle() const
+    {
+        return start == end;
+    }
+
+    bool within(dchar index) const
+    {
+        return start <= index && end >= index;
+    }
+
+    uint count() const
+    {
+        return end + 1 - start;
+    }
+
+    int opCmp(const ValueRange other) const {
+        return this.start < other.start ? -1 : (this.start > other.start ? 1 : 0);
+    }
+
+    int opApply(scope int delegate(dchar) @safe del) const
+    {
+        int result;
+
+        foreach (dchar index; start .. end + 1)
+        {
+            result = del(index);
+            if (result)
+                return result;
+        }
+
+        return result;
+    }
+}
+
+struct ValueRanges
+{
+    ValueRange[] ranges;
+
+@safe:
+
+    void add(ValueRange toAdd)
+    {
+        if (ranges.length > 0 && (ranges[$ - 1].end >= toAdd.start || ranges[$ - 1].end + 1 == toAdd.start))
+        {
+            ranges[$ - 1].end = toAdd.end;
+        }
+        else
+        {
+            ranges ~= toAdd;
+        }
+    }
+
+    ValueRanges not(const ref ValueRanges butNotThis) const
+    {
+        ValueRanges ret;
+
+        foreach (toAdd; this)
+        {
+            if (butNotThis.within(toAdd))
+                continue;
+            ret.add(ValueRange(toAdd));
+        }
+
+        return ret;
+    }
+
+    ValueRanges merge(const ref ValueRanges andThis) const
+    {
+        import std.algorithm : sort;
+        ValueRanges ret;
+
+        auto sorted = sort((this.ranges ~ andThis.ranges).dup);
+
+        foreach(range; sorted) {
+            ret.add(range);
+        }
+
+        return ret;
+    }
+
+    bool within(dchar index) const
+    {
+        foreach (range; ranges)
+        {
+            if (range.within(index))
+                return true;
+        }
+
+        return false;
+    }
+
+    uint count() const
+    {
+        uint ret;
+
+        foreach (range; ranges)
+        {
+            ret += range.count;
+        }
+
+        return ret;
+    }
+
+    int opApply(scope int delegate(dchar) @safe del) const
+    {
+        int result;
+
+        foreach (range; ranges)
+        {
+            result = range.opApply(del);
+            if (result)
+                return result;
+        }
+
+        return result;
+    }
+}
--- a/compiler/tools/unicodetables.d
+++ b/compiler/tools/unicodetables.d
@ -0,0 +1,206 @@
+/**
+Generates the Unicode tables and associated Identifier tables for dmd-fe.
+
+These tables are stored in ``dmd.common.identifiertables``.
+They are C99, C11, UAX31 and a least restrictive set (All).
+
+You can run this via ``rdmd unicodetables.d``.
+
+You will likely only need to run this program whenever the Unicode standard updates.
+It does not need to be run automatically as part of CI, as long as its kept in a working condition when committed, it only needs non-fancy features so it is unlikely to break long term.
+
+Place the updated files from the $(LINK2 https://www.unicode.org/Public/, Unicode database) into the a directory ``UCD-<version>/``, update the ``UCDDirectory`` variable.
+Make sure to commit the updated ``UCDDirectory`` variable into the repository so we can keep track of what the latest version it has been updated to.
+
+The update procedure is similar to Phobos's Unicode table generator for ``std.uni``.
+If you know one, you can do the other fairly easily.
+
+Copyright:   Copyright (C) 1999-2024 by The D Language Foundation, All Rights Reserved
+Authors:     $(LINK2 https://cattermole.co.nz, Richard (Rikki) Andrew Cattermole)
+License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
+*/
+module unicodetables;
+import unicode_tables.util;
+import unicode_tables.fixedtables;
+import std.stdio : File, writeln;
+
+enum {
+    // don't forget to update me when you commit new tables!
+    UCDDirectory = "UCD-15.1.0/",
+    UnicodeDataFile = UCDDirectory ~ "UnicodeData.txt",
+    DerivedCorePropertiesFile = UCDDirectory ~ "DerivedCoreProperties.txt",
+
+    UnicodeTableFile = "../src/dmd/common/identifiertables.d",
+}
+
+// Will disable the ASCII ranges in the generated tables.
+// Disable if you are not handling elsewhere.
+version = IgnoreASCIIRanges;
+
+File tableFile;
+
+int main(string[] args)
+{
+    import std.file : exists;
+
+    if (!exists(UnicodeDataFile)) {
+        writeln("Missing UCD table UnicodeData.txt");
+        return 1;
+    } else if (!exists(DerivedCorePropertiesFile)) {
+        writeln("Missing UCD table DerivedCoreProperties.txt");
+        return 2;
+    }
+
+    {
+        tableFile = File(UnicodeTableFile, "w+");
+        tableFile.writeln("// Generated by compiler/tools/unicode_tables.d DO NOT MODIFY!!!");
+        tableFile.writeln("module dmd.common.identifiertables;");
+        tableFile.writeln();
+    }
+
+    {
+        import unicode_tables.unicodeData;
+        import unicode_tables.derivedCoreProperties;
+
+        parseUnicodeData(UnicodeDataFile);
+        parseProperties(DerivedCorePropertiesFile);
+    }
+
+    write_XID_Start;
+    tableFile.writeln;
+
+    write_XID_Continue;
+    tableFile.writeln;
+
+    write_other_tables;
+    tableFile.writeln;
+
+    write_least_restrictive_table;
+
+    return 0;
+}
+
+void writeTable(string name, const ValueRanges vr)
+{
+    tableFile.writeln("static immutable dchar[2][] ", name, " = [");
+
+    foreach (entry; vr.ranges)
+    {
+        tableFile.writefln!"    [0x%X, 0x%X],"(entry.start, entry.end);
+    }
+
+    tableFile.writeln("];");
+}
+
+void write_XID_Start()
+{
+    import unicode_tables.derivedCoreProperties;
+    import std.algorithm : sort;
+
+    ValueRanges start = ValueRanges(propertyXID_StartRanges.ranges.dup);
+
+    version(IgnoreASCIIRanges)
+    {
+        // Remove ASCII ranges as its always a waste of time, since its handles elsewhere.
+        start = start.not(ASCII_Table);
+    }
+    else
+    {
+        // This may be not needed, as we'll handle ASCII elsewhere in lexer,
+        //  but if we don't in some place we'll want this instead.
+        start.add(ValueRange(0x5F)); // add _
+        start.ranges.sort!((a, b) => a.start < b.start);
+    }
+
+    tableFile.writeln("/**");
+    tableFile.writeln("UAX31 profile Start");
+    tableFile.writeln("Entries: ", start.count);
+    tableFile.writeln("*/");
+    writeTable("UAX31_Start", start);
+}
+
+void write_XID_Continue()
+{
+    import unicode_tables.derivedCoreProperties;
+
+    ValueRanges cont = ValueRanges(propertyXID_ContinueRanges.ranges.dup);
+
+    version(IgnoreASCIIRanges)
+    {
+        // Remove ASCII ranges as its always a waste of time, since its handles elsewhere.
+        cont = cont.not(ASCII_Table);
+    }
+
+    tableFile.writeln("/**");
+    tableFile.writeln("UAX31 profile Continue");
+    tableFile.writeln("Entries: ", cont.count);
+    tableFile.writeln("*/");
+    writeTable("UAX31_Continue", cont);
+}
+
+void write_other_tables()
+{
+    tableFile.writeln("/**");
+    tableFile.writeln("C99 Start");
+    tableFile.writeln("Entries: ", c99_Table.count);
+    tableFile.writeln("*/");
+    tableFile.writeln("alias FixedTable_C99_Start = FixedTable_C99_Continue;");
+    tableFile.writeln;
+
+    tableFile.writeln("/**");
+    tableFile.writeln("C99 Continue");
+    tableFile.writeln("Entries: ", c99_Table.count);
+    tableFile.writeln("*/");
+    writeTable("FixedTable_C99_Continue", c99_Table);
+    tableFile.writeln;
+
+    tableFile.writeln("/**");
+    tableFile.writeln("C11 Start");
+    tableFile.writeln("Entries: ", c11_Table.count);
+    tableFile.writeln("*/");
+    tableFile.writeln("alias FixedTable_C11_Start = FixedTable_C11_Continue;");
+    tableFile.writeln;
+
+    tableFile.writeln("/**");
+    tableFile.writeln("C11 Continue");
+    tableFile.writeln("Entries: ", c11_Table.count);
+    tableFile.writeln("*/");
+    writeTable("FixedTable_C11_Continue", c11_Table);
+}
+
+void write_least_restrictive_table() {
+    import unicode_tables.derivedCoreProperties;
+
+    ValueRanges toMerge = c99_Table.merge(c11_Table);
+    ValueRanges lrs = propertyXID_StartRanges.merge(toMerge);
+    ValueRanges lrc = propertyXID_ContinueRanges.merge(toMerge);
+    ValueRanges lr = lrs.merge(lrc);
+
+    version(IgnoreASCIIRanges)
+    {
+        // Remove ASCII ranges as its always a waste of time, since its handles elsewhere.
+        lrs = lrs.not(ASCII_Table);
+        lrc = lrc.not(ASCII_Table);
+        lr = lr.not(ASCII_Table);
+    }
+
+    tableFile.writeln("/**");
+    tableFile.writeln("Least restrictive with both Start and Continue");
+    tableFile.writeln("Entries: ", lr.count);
+    tableFile.writeln("*/");
+    writeTable("LeastRestrictive_OfAll", lr);
+    tableFile.writeln;
+
+    tableFile.writeln("/**");
+    tableFile.writeln("Least restrictive Start");
+    tableFile.writeln("Entries: ", lrs.count);
+    tableFile.writeln("*/");
+    writeTable("LeastRestrictive_Start", lrs);
+    tableFile.writeln;
+
+    tableFile.writeln("/**");
+    tableFile.writeln("Least restrictive Continue");
+    tableFile.writeln("Entries: ", lrc.count);
+    tableFile.writeln("*/");
+    writeTable("LeastRestrictive_Continue", lrc);
+}