Allow casting hexstring to int array (#16079)

This commit is contained in:
Dennis 2024-01-25 10:02:04 +01:00 committed by GitHub
parent e93b26e259
commit ba5402e7ac
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 201 additions and 38 deletions

View file

@ -0,0 +1,20 @@
Hex strings can now be cast to integer arrays
Hex strings are the most efficient way to embed binary data into source files.
However, they couldn't easily be used to initialize a `short[]`, `int[]` or `long[]` because re-interpret casting arrays is not allowed during CTFE.
Now, hex strings can be cast to integer arrays with element types larger than `byte`.
A big endian byte order is assumed, consistent with how integer literals are written.
---
immutable uint[] data = cast(immutable uint[]) x"AABBCCDD";
static assert(data[0] == 0xAABBCCDD);
---
When the hex string has a character postfix, or its length is not a multiple of the element size, it is an error:
---
auto e = cast(immutable ushort[]) x"AABBCC"; // Error, 3 bytes is not a multiple of `ushort.sizeof`
auto f = cast(immutable ushort[]) x"AABB"w; // Error, hex string has wide character postfix
---

View file

@ -711,7 +711,7 @@ UnionExp Equal(EXP op, const ref Loc loc, Type type, Expression e1, Expression e
cmp = 1; // if dim1 winds up being 0
foreach (i; 0 .. dim1)
{
uinteger_t c = es1.getCodeUnit(i);
uinteger_t c = es1.getIndex(i);
auto ee2 = es2[i];
if (ee2.isConst() != 1)
{
@ -1119,7 +1119,7 @@ UnionExp Index(Type type, Expression e1, Expression e2, bool indexIsInBounds)
}
else
{
emplaceExp!(IntegerExp)(&ue, loc, es1.getCodeUnit(cast(size_t) i), type);
emplaceExp!(IntegerExp)(&ue, loc, es1.getIndex(cast(size_t) i), type);
}
}
else if (e1.type.toBasetype().ty == Tsarray && e2.op == EXP.int64)
@ -1282,7 +1282,7 @@ void sliceAssignArrayLiteralFromString(ArrayLiteralExp existingAE, const StringE
Type elemType = existingAE.type.nextOf();
foreach (j; 0 .. len)
{
const val = newval.getCodeUnit(j);
const val = newval.getIndex(j);
(*existingAE.elements)[j + firstIndex] = new IntegerExp(newval.loc, val, elemType);
}
}

View file

@ -568,6 +568,9 @@ StringExp createBlockDuplicatedStringLiteral(UnionExp* pue, const ref Loc loc, T
case 4:
(cast(dchar*)s)[elemi] = value;
break;
case 8:
(cast(ulong*)s)[elemi] = value;
break;
default:
assert(0);
}
@ -1494,7 +1497,7 @@ Expression ctfeIndex(UnionExp* pue, const ref Loc loc, Type type, Expression e1,
error(loc, "string index %llu is out of bounds `[0 .. %llu]`", indx, cast(ulong)es1.len);
return CTFEExp.cantexp;
}
emplaceExp!IntegerExp(pue, loc, es1.getCodeUnit(cast(size_t) indx), type);
emplaceExp!IntegerExp(pue, loc, es1.getIndex(cast(size_t) indx), type);
return pue.exp();
}
@ -1704,7 +1707,7 @@ Expression changeArrayLiteralLength(UnionExp* pue, const ref Loc loc, TypeArray
void* s = mem.xcalloc(newlen + 1, oldse.sz);
const data = oldse.peekData();
memcpy(s, data.ptr, copylen * oldse.sz);
const defaultValue = cast(uint)defaultElem.toInteger();
const defaultValue = cast(ulong)defaultElem.toInteger();
foreach (size_t elemi; copylen .. newlen)
{
switch (oldse.sz)
@ -1718,6 +1721,9 @@ Expression changeArrayLiteralLength(UnionExp* pue, const ref Loc loc, TypeArray
case 4:
(cast(dchar*)s)[cast(size_t)(indxlo + elemi)] = cast(dchar)defaultValue;
break;
case 8:
(cast(ulong*)s)[cast(size_t)(indxlo + elemi)] = cast(ulong)defaultValue;
break;
default:
assert(0);
}

View file

@ -6098,11 +6098,35 @@ public:
result.type = e.to;
return;
}
// Disallow array type painting, except for conversions between built-in
// types of identical size.
if ((e.to.ty == Tsarray || e.to.ty == Tarray) && (e1.type.ty == Tsarray || e1.type.ty == Tarray) && !isSafePointerCast(e1.type.nextOf(), e.to.nextOf()))
{
auto se = e1.isStringExp();
// Allow casting a hex string literal to short[], int[] or long[]
if (se && se.hexString && se.postfix == StringExp.NoPostfix)
{
const sz = cast(size_t) e.to.nextOf().size;
if ((se.len % sz) != 0)
{
error(e.loc, "hex string length %d must be a multiple of %d to cast to `%s`",
cast(int) se.len, cast(int) sz, e.to.toChars());
result = CTFEExp.cantexp;
return;
}
auto str = arrayCastBigEndian((cast(const ubyte[]) se.peekString()), sz);
emplaceExp!(StringExp)(pue, e1.loc, str, se.len / sz, cast(ubyte) sz);
result = pue.exp();
result.type = e.to;
return;
}
error(e.loc, "array cast from `%s` to `%s` is not supported at compile time", e1.type.toChars(), e.to.toChars());
if (se && se.hexString && se.postfix != StringExp.NoPostfix)
errorSupplemental(e.loc, "perhaps remove postfix `%s` from hex string",
(cast(char) se.postfix ~ "\0").ptr);
result = CTFEExp.cantexp;
return;
}
@ -7720,3 +7744,44 @@ private void removeHookTraceImpl(ref CallExp ce, ref FuncDeclaration fd)
if (global.params.v.verbose)
message("strip %s =>\n %s", oldCE.toChars(), ce.toChars());
}
/**
* Cast a `ubyte[]` to an array of larger integers as if we are on a big endian architecture
* Params:
* data = array with big endian data
* size = 1 for ubyte[], 2 for ushort[], 4 for uint[], 8 for ulong[]
* Returns: copy of `data`, with bytes shuffled if compiled for `version(LittleEndian)`
*/
ubyte[] arrayCastBigEndian(const ubyte[] data, size_t size)
{
ubyte[] impl(T)()
{
auto result = new T[](data.length / T.sizeof);
foreach (i; 0 .. result.length)
{
result[i] = 0;
foreach (j; 0 .. T.sizeof)
{
result[i] |= T(data[i * T.sizeof + j]) << ((T.sizeof - 1 - j) * 8);
}
}
return cast(ubyte[]) result;
}
switch (size)
{
case 1: return data.dup;
case 2: return impl!ushort;
case 4: return impl!uint;
case 8: return impl!ulong;
default: assert(0);
}
}
unittest
{
ubyte[] data = [0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x11, 0x22];
assert(cast(ulong[]) arrayCastBigEndian(data, 8) == [0xAABBCCDDEEFF1122]);
assert(cast(uint[]) arrayCastBigEndian(data, 4) == [0xAABBCCDD, 0xEEFF1122]);
assert(cast(ushort[]) arrayCastBigEndian(data, 2) == [0xAABB, 0xCCDD, 0xEEFF, 0x1122]);
assert(cast(ubyte[]) arrayCastBigEndian(data, 1) == data);
}

View file

@ -502,6 +502,20 @@ public:
toBuffer(*buf, id.toString(), s);
}
void mangleInteger(dinteger_t v)
{
if (cast(sinteger_t) v < 0)
{
buf.writeByte('N');
buf.print(-v);
}
else
{
buf.writeByte('i');
buf.print(v);
}
}
////////////////////////////////////////////////////////////////////////////
void mangleDecl(Declaration sthis)
{
@ -909,17 +923,7 @@ public:
override void visit(IntegerExp e)
{
const v = e.toInteger();
if (cast(sinteger_t)v < 0)
{
buf.writeByte('N');
buf.print(-v);
}
else
{
buf.writeByte('i');
buf.print(v);
}
mangleInteger(e.toInteger());
}
override void visit(RealExp e)
@ -946,6 +950,7 @@ public:
char m;
OutBuffer tmp;
const(char)[] q;
/* Write string in UTF-8 format
*/
switch (e.sz)
@ -983,7 +988,15 @@ public:
q = tmp[];
break;
}
case 8:
// String of size 8 has to be hexstring cast to long[], mangle as array literal
buf.writeByte('A');
buf.print(e.len);
foreach (i; 0 .. e.len)
{
mangleInteger(e.getIndex(i));
}
return;
default:
assert(0);
}

View file

@ -616,7 +616,7 @@ bool _isZeroInit(Expression exp)
foreach (i; 0 .. se.len)
{
if (se.getCodeUnit(i))
if (se.getIndex(i) != 0)
return false;
}
return true;

View file

@ -1500,6 +1500,7 @@ extern (C++) final class StringExp : Expression
char* string; // if sz == 1
wchar* wstring; // if sz == 2
dchar* dstring; // if sz == 4
ulong* lstring; // if sz == 8
} // (const if ownedByCtfe == OwnedBy.code)
size_t len; // number of code units
ubyte sz = 1; // 1: char, 2: wchar, 4: dchar
@ -1662,6 +1663,13 @@ extern (C++) final class StringExp : Expression
* code unit at index i
*/
dchar getCodeUnit(size_t i) const pure
{
assert(this.sz <= dchar.sizeof);
return cast(dchar) getIndex(i);
}
/// Returns: integer at index `i`
ulong getIndex(size_t i) const pure
{
assert(i < len);
final switch (sz)
@ -1672,6 +1680,8 @@ extern (C++) final class StringExp : Expression
return wstring[i];
case 4:
return dstring[i];
case 8:
return lstring[i];
}
}
@ -1682,6 +1692,11 @@ extern (C++) final class StringExp : Expression
* c = code unit to set it to
*/
extern (D) void setCodeUnit(size_t i, dchar c)
{
return setIndex(i, c);
}
extern (D) void setIndex(size_t i, long c)
{
assert(i < len);
final switch (sz)
@ -1693,7 +1708,10 @@ extern (C++) final class StringExp : Expression
wstring[i] = cast(wchar)c;
break;
case 4:
dstring[i] = c;
dstring[i] = cast(dchar) c;
break;
case 8:
lstring[i] = c;
break;
}
}

View file

@ -354,7 +354,7 @@ class StringExp final : public Expression
public:
utf8_t postfix; // 'c', 'w', 'd'
OwnedBy ownedByCtfe;
void *string; // char, wchar, or dchar data
void *string; // char, wchar, dchar, or long data
size_t len; // number of chars, wchars, or dchars
unsigned char sz; // 1: char, 2: wchar, 4: dchar
d_bool committed; // if type is committed

View file

@ -3366,6 +3366,7 @@ public:
char* string;
char16_t* wstring;
char32_t* dstring;
uint64_t* lstring;
};
size_t len;
uint8_t sz;
@ -3379,6 +3380,7 @@ public:
size_t numberOfCodeUnits(int32_t tynto = 0) const;
void writeTo(void* dest, bool zero, int32_t tyto = 0) const;
char32_t getCodeUnit(size_t i) const;
uint64_t getIndex(size_t i) const;
StringExp* toStringExp() override;
int32_t compare(const StringExp* const se2) const;
Optional<bool > toBool() override;

View file

@ -1,18 +1,39 @@
/**
TEST_OUTPUT:
---
fail_compilation\hexstring.d(16): Error: cannot implicitly convert expression `"123F"` of type `string` to `immutable(ubyte[])`
fail_compilation\hexstring.d(17): Error: cannot implicitly convert expression `"\x12?"c` of type `string` to `immutable(ubyte[])`
fail_compilation\hexstring.d(18): Error: cannot implicitly convert expression `"\x12?"` of type `string` to `immutable(ubyte[])`
fail_compilation\hexstring.d(15): Error: cannot implicitly convert expression `"\x12?"` of type `string` to `ubyte[]`
fail_compilation/hexstring.d(29): Error: cannot implicitly convert expression `"123F"` of type `string` to `immutable(ubyte[])`
fail_compilation/hexstring.d(30): Error: cannot implicitly convert expression `"\x12?"c` of type `string` to `immutable(ubyte[])`
fail_compilation/hexstring.d(31): Error: cannot implicitly convert expression `"\x12?"` of type `string` to `immutable(ubyte[])`
fail_compilation/hexstring.d(33): Error: hex string length 1 must be a multiple of 2 to cast to `immutable(ushort[])`
fail_compilation/hexstring.d(34): Error: hex string length 3 must be a multiple of 4 to cast to `immutable(uint[])`
fail_compilation/hexstring.d(35): Error: hex string length 5 must be a multiple of 8 to cast to `immutable(ulong[])`
fail_compilation/hexstring.d(36): Error: array cast from `wstring` to `immutable(ulong[])` is not supported at compile time
fail_compilation/hexstring.d(36): perhaps remove postfix `w` from hex string
fail_compilation/hexstring.d(37): Error: array cast from `string` to `immutable(uint[])` is not supported at compile time
fail_compilation/hexstring.d(38): Error: array cast from `string` to `immutable(ushort[])` is not supported at compile time
fail_compilation/hexstring.d(39): Error: array cast from `string` to `immutable(uint[])` is not supported at compile time
fail_compilation/hexstring.d(39): perhaps remove postfix `c` from hex string
fail_compilation/hexstring.d(28): Error: cannot implicitly convert expression `"\x12?"` of type `string` to `ubyte[]`
---
*/
immutable ubyte[] s0 = x"123F";
static assert(s0[0] == 0x12);
static assert(s0[1] == 0x3F);
immutable byte[] s1 = x"123F";
enum E(X) = cast(X[]) x"AABBCCDD";
static assert(E!int[0] == 0xAABBCCDD);
ubyte[] f1 = x"123F";
immutable ubyte[] f2 = "123F";
immutable ubyte[] f3 = x"123F"c;
immutable ubyte[] f4 = cast(string) x"123F";
immutable ushort[] f5 = cast(immutable ushort[]) x"11";
immutable uint[] f6 = cast(immutable uint[]) x"112233";
immutable ulong[] f7 = cast(immutable ulong[]) x"1122334455";
immutable ulong[] f8 = cast(immutable ulong[]) x"1122334455"w;
immutable uint[] f9 = cast(immutable uint[]) "ABCD";
immutable ushort[] f10 = cast(immutable ushort[]) (x"1122" ~ "");
immutable uint[] f11 = cast(immutable uint[]) x"AABBCCDD"c;

View file

@ -241,6 +241,23 @@ void test12950()
assert(0b00_00_00_01UL.op12950() == 12951);
}
void testHexstring()
{
static immutable uint[] x = cast(immutable uint[]) x"FFAADDEE";
static assert(x[0] == 0xFFAADDEE);
assert(x[0] == 0xFFAADDEE);
static immutable ulong[] y = cast(immutable ulong[]) x"1122334455667788AABBCCDDEEFF0099";
static assert(y[0] == 0x1122334455667788);
static assert(y[1] == 0xAABBCCDDEEFF0099);
assert(y[0] == 0x1122334455667788);
assert(y[1] == 0xAABBCCDDEEFF0099);
// Test that mangling of StringExp with size 8 is the same as array literal mangling:
void f(immutable ulong[] a)() {}
static assert(f!y.mangleof == f!([0x1122334455667788, 0xAABBCCDDEEFF0099]).mangleof);
}
/***************************************************/
int main()
@ -249,6 +266,7 @@ int main()
test2();
test13907();
test12950();
testHexstring();
printf("Success\n");
return 0;