diff --git a/changelog/dmd.hexstring-cast.dd b/changelog/dmd.hexstring-cast.dd new file mode 100644 index 0000000000..9036dd7f52 --- /dev/null +++ b/changelog/dmd.hexstring-cast.dd @@ -0,0 +1,20 @@ +Hex strings can now be cast to integer arrays + +Hex strings are the most efficient way to embed binary data into source files. +However, they couldn't easily be used to initialize a `short[]`, `int[]` or `long[]` because re-interpret casting arrays is not allowed during CTFE. +Now, hex strings can be cast to integer arrays with element types larger than `byte`. +A big endian byte order is assumed, consistent with how integer literals are written. + +--- +immutable uint[] data = cast(immutable uint[]) x"AABBCCDD"; + +static assert(data[0] == 0xAABBCCDD); +--- + +When the hex string has a character postfix, or its length is not a multiple of the element size, it is an error: + +--- +auto e = cast(immutable ushort[]) x"AABBCC"; // Error, 3 bytes is not a multiple of `ushort.sizeof` + +auto f = cast(immutable ushort[]) x"AABB"w; // Error, hex string has wide character postfix +--- diff --git a/compiler/src/dmd/constfold.d b/compiler/src/dmd/constfold.d index cee1f6364b..41fed9cae4 100644 --- a/compiler/src/dmd/constfold.d +++ b/compiler/src/dmd/constfold.d @@ -711,7 +711,7 @@ UnionExp Equal(EXP op, const ref Loc loc, Type type, Expression e1, Expression e cmp = 1; // if dim1 winds up being 0 foreach (i; 0 .. dim1) { - uinteger_t c = es1.getCodeUnit(i); + uinteger_t c = es1.getIndex(i); auto ee2 = es2[i]; if (ee2.isConst() != 1) { @@ -1119,7 +1119,7 @@ UnionExp Index(Type type, Expression e1, Expression e2, bool indexIsInBounds) } else { - emplaceExp!(IntegerExp)(&ue, loc, es1.getCodeUnit(cast(size_t) i), type); + emplaceExp!(IntegerExp)(&ue, loc, es1.getIndex(cast(size_t) i), type); } } else if (e1.type.toBasetype().ty == Tsarray && e2.op == EXP.int64) @@ -1282,7 +1282,7 @@ void sliceAssignArrayLiteralFromString(ArrayLiteralExp existingAE, const StringE Type elemType = existingAE.type.nextOf(); foreach (j; 0 .. len) { - const val = newval.getCodeUnit(j); + const val = newval.getIndex(j); (*existingAE.elements)[j + firstIndex] = new IntegerExp(newval.loc, val, elemType); } } diff --git a/compiler/src/dmd/ctfeexpr.d b/compiler/src/dmd/ctfeexpr.d index 5fe1e7dccb..af83aad554 100644 --- a/compiler/src/dmd/ctfeexpr.d +++ b/compiler/src/dmd/ctfeexpr.d @@ -568,6 +568,9 @@ StringExp createBlockDuplicatedStringLiteral(UnionExp* pue, const ref Loc loc, T case 4: (cast(dchar*)s)[elemi] = value; break; + case 8: + (cast(ulong*)s)[elemi] = value; + break; default: assert(0); } @@ -1494,7 +1497,7 @@ Expression ctfeIndex(UnionExp* pue, const ref Loc loc, Type type, Expression e1, error(loc, "string index %llu is out of bounds `[0 .. %llu]`", indx, cast(ulong)es1.len); return CTFEExp.cantexp; } - emplaceExp!IntegerExp(pue, loc, es1.getCodeUnit(cast(size_t) indx), type); + emplaceExp!IntegerExp(pue, loc, es1.getIndex(cast(size_t) indx), type); return pue.exp(); } @@ -1704,7 +1707,7 @@ Expression changeArrayLiteralLength(UnionExp* pue, const ref Loc loc, TypeArray void* s = mem.xcalloc(newlen + 1, oldse.sz); const data = oldse.peekData(); memcpy(s, data.ptr, copylen * oldse.sz); - const defaultValue = cast(uint)defaultElem.toInteger(); + const defaultValue = cast(ulong)defaultElem.toInteger(); foreach (size_t elemi; copylen .. newlen) { switch (oldse.sz) @@ -1718,6 +1721,9 @@ Expression changeArrayLiteralLength(UnionExp* pue, const ref Loc loc, TypeArray case 4: (cast(dchar*)s)[cast(size_t)(indxlo + elemi)] = cast(dchar)defaultValue; break; + case 8: + (cast(ulong*)s)[cast(size_t)(indxlo + elemi)] = cast(ulong)defaultValue; + break; default: assert(0); } diff --git a/compiler/src/dmd/dinterpret.d b/compiler/src/dmd/dinterpret.d index ea66051ef2..d8069c63a5 100644 --- a/compiler/src/dmd/dinterpret.d +++ b/compiler/src/dmd/dinterpret.d @@ -6098,11 +6098,35 @@ public: result.type = e.to; return; } + // Disallow array type painting, except for conversions between built-in // types of identical size. if ((e.to.ty == Tsarray || e.to.ty == Tarray) && (e1.type.ty == Tsarray || e1.type.ty == Tarray) && !isSafePointerCast(e1.type.nextOf(), e.to.nextOf())) { + auto se = e1.isStringExp(); + // Allow casting a hex string literal to short[], int[] or long[] + if (se && se.hexString && se.postfix == StringExp.NoPostfix) + { + const sz = cast(size_t) e.to.nextOf().size; + if ((se.len % sz) != 0) + { + error(e.loc, "hex string length %d must be a multiple of %d to cast to `%s`", + cast(int) se.len, cast(int) sz, e.to.toChars()); + result = CTFEExp.cantexp; + return; + } + + auto str = arrayCastBigEndian((cast(const ubyte[]) se.peekString()), sz); + emplaceExp!(StringExp)(pue, e1.loc, str, se.len / sz, cast(ubyte) sz); + result = pue.exp(); + result.type = e.to; + return; + } error(e.loc, "array cast from `%s` to `%s` is not supported at compile time", e1.type.toChars(), e.to.toChars()); + if (se && se.hexString && se.postfix != StringExp.NoPostfix) + errorSupplemental(e.loc, "perhaps remove postfix `%s` from hex string", + (cast(char) se.postfix ~ "\0").ptr); + result = CTFEExp.cantexp; return; } @@ -7720,3 +7744,44 @@ private void removeHookTraceImpl(ref CallExp ce, ref FuncDeclaration fd) if (global.params.v.verbose) message("strip %s =>\n %s", oldCE.toChars(), ce.toChars()); } + +/** + * Cast a `ubyte[]` to an array of larger integers as if we are on a big endian architecture + * Params: + * data = array with big endian data + * size = 1 for ubyte[], 2 for ushort[], 4 for uint[], 8 for ulong[] + * Returns: copy of `data`, with bytes shuffled if compiled for `version(LittleEndian)` + */ +ubyte[] arrayCastBigEndian(const ubyte[] data, size_t size) +{ + ubyte[] impl(T)() + { + auto result = new T[](data.length / T.sizeof); + foreach (i; 0 .. result.length) + { + result[i] = 0; + foreach (j; 0 .. T.sizeof) + { + result[i] |= T(data[i * T.sizeof + j]) << ((T.sizeof - 1 - j) * 8); + } + } + return cast(ubyte[]) result; + } + switch (size) + { + case 1: return data.dup; + case 2: return impl!ushort; + case 4: return impl!uint; + case 8: return impl!ulong; + default: assert(0); + } +} + +unittest +{ + ubyte[] data = [0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x11, 0x22]; + assert(cast(ulong[]) arrayCastBigEndian(data, 8) == [0xAABBCCDDEEFF1122]); + assert(cast(uint[]) arrayCastBigEndian(data, 4) == [0xAABBCCDD, 0xEEFF1122]); + assert(cast(ushort[]) arrayCastBigEndian(data, 2) == [0xAABB, 0xCCDD, 0xEEFF, 0x1122]); + assert(cast(ubyte[]) arrayCastBigEndian(data, 1) == data); +} diff --git a/compiler/src/dmd/dmangle.d b/compiler/src/dmd/dmangle.d index b46ce50d84..baf05c61fc 100644 --- a/compiler/src/dmd/dmangle.d +++ b/compiler/src/dmd/dmangle.d @@ -502,6 +502,20 @@ public: toBuffer(*buf, id.toString(), s); } + void mangleInteger(dinteger_t v) + { + if (cast(sinteger_t) v < 0) + { + buf.writeByte('N'); + buf.print(-v); + } + else + { + buf.writeByte('i'); + buf.print(v); + } + } + //////////////////////////////////////////////////////////////////////////// void mangleDecl(Declaration sthis) { @@ -909,17 +923,7 @@ public: override void visit(IntegerExp e) { - const v = e.toInteger(); - if (cast(sinteger_t)v < 0) - { - buf.writeByte('N'); - buf.print(-v); - } - else - { - buf.writeByte('i'); - buf.print(v); - } + mangleInteger(e.toInteger()); } override void visit(RealExp e) @@ -946,6 +950,7 @@ public: char m; OutBuffer tmp; const(char)[] q; + /* Write string in UTF-8 format */ switch (e.sz) @@ -983,7 +988,15 @@ public: q = tmp[]; break; } - + case 8: + // String of size 8 has to be hexstring cast to long[], mangle as array literal + buf.writeByte('A'); + buf.print(e.len); + foreach (i; 0 .. e.len) + { + mangleInteger(e.getIndex(i)); + } + return; default: assert(0); } diff --git a/compiler/src/dmd/dstruct.d b/compiler/src/dmd/dstruct.d index 5683d5fb1d..7546fb6146 100644 --- a/compiler/src/dmd/dstruct.d +++ b/compiler/src/dmd/dstruct.d @@ -616,7 +616,7 @@ bool _isZeroInit(Expression exp) foreach (i; 0 .. se.len) { - if (se.getCodeUnit(i)) + if (se.getIndex(i) != 0) return false; } return true; diff --git a/compiler/src/dmd/expression.d b/compiler/src/dmd/expression.d index 9de3c322ed..9e6c257903 100644 --- a/compiler/src/dmd/expression.d +++ b/compiler/src/dmd/expression.d @@ -1500,6 +1500,7 @@ extern (C++) final class StringExp : Expression char* string; // if sz == 1 wchar* wstring; // if sz == 2 dchar* dstring; // if sz == 4 + ulong* lstring; // if sz == 8 } // (const if ownedByCtfe == OwnedBy.code) size_t len; // number of code units ubyte sz = 1; // 1: char, 2: wchar, 4: dchar @@ -1662,6 +1663,13 @@ extern (C++) final class StringExp : Expression * code unit at index i */ dchar getCodeUnit(size_t i) const pure + { + assert(this.sz <= dchar.sizeof); + return cast(dchar) getIndex(i); + } + + /// Returns: integer at index `i` + ulong getIndex(size_t i) const pure { assert(i < len); final switch (sz) @@ -1672,6 +1680,8 @@ extern (C++) final class StringExp : Expression return wstring[i]; case 4: return dstring[i]; + case 8: + return lstring[i]; } } @@ -1682,6 +1692,11 @@ extern (C++) final class StringExp : Expression * c = code unit to set it to */ extern (D) void setCodeUnit(size_t i, dchar c) + { + return setIndex(i, c); + } + + extern (D) void setIndex(size_t i, long c) { assert(i < len); final switch (sz) @@ -1693,7 +1708,10 @@ extern (C++) final class StringExp : Expression wstring[i] = cast(wchar)c; break; case 4: - dstring[i] = c; + dstring[i] = cast(dchar) c; + break; + case 8: + lstring[i] = c; break; } } diff --git a/compiler/src/dmd/expression.h b/compiler/src/dmd/expression.h index 954a6db10f..449940d231 100644 --- a/compiler/src/dmd/expression.h +++ b/compiler/src/dmd/expression.h @@ -354,7 +354,7 @@ class StringExp final : public Expression public: utf8_t postfix; // 'c', 'w', 'd' OwnedBy ownedByCtfe; - void *string; // char, wchar, or dchar data + void *string; // char, wchar, dchar, or long data size_t len; // number of chars, wchars, or dchars unsigned char sz; // 1: char, 2: wchar, 4: dchar d_bool committed; // if type is committed diff --git a/compiler/src/dmd/frontend.h b/compiler/src/dmd/frontend.h index 99a0d81c40..456bae7c62 100644 --- a/compiler/src/dmd/frontend.h +++ b/compiler/src/dmd/frontend.h @@ -3366,6 +3366,7 @@ public: char* string; char16_t* wstring; char32_t* dstring; + uint64_t* lstring; }; size_t len; uint8_t sz; @@ -3379,6 +3380,7 @@ public: size_t numberOfCodeUnits(int32_t tynto = 0) const; void writeTo(void* dest, bool zero, int32_t tyto = 0) const; char32_t getCodeUnit(size_t i) const; + uint64_t getIndex(size_t i) const; StringExp* toStringExp() override; int32_t compare(const StringExp* const se2) const; Optional toBool() override; diff --git a/compiler/test/fail_compilation/hexstring.d b/compiler/test/fail_compilation/hexstring.d index de83db9c5e..87f00f1dfa 100644 --- a/compiler/test/fail_compilation/hexstring.d +++ b/compiler/test/fail_compilation/hexstring.d @@ -1,18 +1,39 @@ -/** -TEST_OUTPUT: ---- -fail_compilation\hexstring.d(16): Error: cannot implicitly convert expression `"123F"` of type `string` to `immutable(ubyte[])` -fail_compilation\hexstring.d(17): Error: cannot implicitly convert expression `"\x12?"c` of type `string` to `immutable(ubyte[])` -fail_compilation\hexstring.d(18): Error: cannot implicitly convert expression `"\x12?"` of type `string` to `immutable(ubyte[])` -fail_compilation\hexstring.d(15): Error: cannot implicitly convert expression `"\x12?"` of type `string` to `ubyte[]` ---- -*/ -immutable ubyte[] s0 = x"123F"; -static assert(s0[0] == 0x12); -static assert(s0[1] == 0x3F); -immutable byte[] s1 = x"123F"; - -ubyte[] f1 = x"123F"; -immutable ubyte[] f2 = "123F"; -immutable ubyte[] f3 = x"123F"c; -immutable ubyte[] f4 = cast(string) x"123F"; +/** +TEST_OUTPUT: +--- +fail_compilation/hexstring.d(29): Error: cannot implicitly convert expression `"123F"` of type `string` to `immutable(ubyte[])` +fail_compilation/hexstring.d(30): Error: cannot implicitly convert expression `"\x12?"c` of type `string` to `immutable(ubyte[])` +fail_compilation/hexstring.d(31): Error: cannot implicitly convert expression `"\x12?"` of type `string` to `immutable(ubyte[])` +fail_compilation/hexstring.d(33): Error: hex string length 1 must be a multiple of 2 to cast to `immutable(ushort[])` +fail_compilation/hexstring.d(34): Error: hex string length 3 must be a multiple of 4 to cast to `immutable(uint[])` +fail_compilation/hexstring.d(35): Error: hex string length 5 must be a multiple of 8 to cast to `immutable(ulong[])` +fail_compilation/hexstring.d(36): Error: array cast from `wstring` to `immutable(ulong[])` is not supported at compile time +fail_compilation/hexstring.d(36): perhaps remove postfix `w` from hex string +fail_compilation/hexstring.d(37): Error: array cast from `string` to `immutable(uint[])` is not supported at compile time +fail_compilation/hexstring.d(38): Error: array cast from `string` to `immutable(ushort[])` is not supported at compile time +fail_compilation/hexstring.d(39): Error: array cast from `string` to `immutable(uint[])` is not supported at compile time +fail_compilation/hexstring.d(39): perhaps remove postfix `c` from hex string +fail_compilation/hexstring.d(28): Error: cannot implicitly convert expression `"\x12?"` of type `string` to `ubyte[]` +--- +*/ + +immutable ubyte[] s0 = x"123F"; +static assert(s0[0] == 0x12); +static assert(s0[1] == 0x3F); +immutable byte[] s1 = x"123F"; + +enum E(X) = cast(X[]) x"AABBCCDD"; +static assert(E!int[0] == 0xAABBCCDD); + +ubyte[] f1 = x"123F"; +immutable ubyte[] f2 = "123F"; +immutable ubyte[] f3 = x"123F"c; +immutable ubyte[] f4 = cast(string) x"123F"; + +immutable ushort[] f5 = cast(immutable ushort[]) x"11"; +immutable uint[] f6 = cast(immutable uint[]) x"112233"; +immutable ulong[] f7 = cast(immutable ulong[]) x"1122334455"; +immutable ulong[] f8 = cast(immutable ulong[]) x"1122334455"w; +immutable uint[] f9 = cast(immutable uint[]) "ABCD"; +immutable ushort[] f10 = cast(immutable ushort[]) (x"1122" ~ ""); +immutable uint[] f11 = cast(immutable uint[]) x"AABBCCDD"c; diff --git a/compiler/test/runnable/literal.d b/compiler/test/runnable/literal.d index 99b177759b..af2029e904 100644 --- a/compiler/test/runnable/literal.d +++ b/compiler/test/runnable/literal.d @@ -241,6 +241,23 @@ void test12950() assert(0b00_00_00_01UL.op12950() == 12951); } +void testHexstring() +{ + static immutable uint[] x = cast(immutable uint[]) x"FFAADDEE"; + static assert(x[0] == 0xFFAADDEE); + assert(x[0] == 0xFFAADDEE); + + static immutable ulong[] y = cast(immutable ulong[]) x"1122334455667788AABBCCDDEEFF0099"; + static assert(y[0] == 0x1122334455667788); + static assert(y[1] == 0xAABBCCDDEEFF0099); + assert(y[0] == 0x1122334455667788); + assert(y[1] == 0xAABBCCDDEEFF0099); + + // Test that mangling of StringExp with size 8 is the same as array literal mangling: + void f(immutable ulong[] a)() {} + static assert(f!y.mangleof == f!([0x1122334455667788, 0xAABBCCDDEEFF0099]).mangleof); +} + /***************************************************/ int main() @@ -249,6 +266,7 @@ int main() test2(); test13907(); test12950(); + testHexstring(); printf("Success\n"); return 0;