From d8dcb940ab9b165be1a6b1bc228720e4b463c05f Mon Sep 17 00:00:00 2001 From: "Adam D. Ruppe" Date: Sat, 20 Jan 2024 01:21:40 -0500 Subject: [PATCH] Enhanced version of dip1036 implementation (#15715) This implements the Enhanced Interpolated Expression Sequence proposal: i"" or iq{} or q`` with a $(expression) in the middle are converted to a tuple of druntime types for future processing by library code. --- changelog/dmd.ies.dd | 18 + compiler/src/dmd/astbase.d | 20 + compiler/src/dmd/doc.d | 3 +- compiler/src/dmd/expression.d | 24 ++ compiler/src/dmd/expression.h | 12 + compiler/src/dmd/expressionsem.d | 78 ++++ compiler/src/dmd/frontend.h | 379 ++++++++++-------- compiler/src/dmd/hdrgen.d | 32 ++ compiler/src/dmd/id.d | 6 + compiler/src/dmd/lexer.d | 166 +++++++- compiler/src/dmd/parse.d | 9 + compiler/src/dmd/parsetimevisitor.d | 1 + compiler/src/dmd/strictvisitor.d | 1 + compiler/src/dmd/tokens.d | 33 +- compiler/src/dmd/tokens.h | 9 +- compiler/src/dmd/visitor.h | 2 + .../interpolatedexpressionsequence_postfix.d | 13 + .../runnable/interpolatedexpressionsequence.d | 51 +++ compiler/test/unit/lexer/location_offset.d | 1 + druntime/mak/COPY | 1 + druntime/mak/DOCS | 1 + druntime/mak/SRCS | 1 + druntime/src/core/interpolation.d | 156 +++++++ 23 files changed, 827 insertions(+), 190 deletions(-) create mode 100644 changelog/dmd.ies.dd create mode 100644 compiler/test/fail_compilation/interpolatedexpressionsequence_postfix.d create mode 100644 compiler/test/runnable/interpolatedexpressionsequence.d create mode 100644 druntime/src/core/interpolation.d diff --git a/changelog/dmd.ies.dd b/changelog/dmd.ies.dd new file mode 100644 index 0000000000..f39e280c2b --- /dev/null +++ b/changelog/dmd.ies.dd @@ -0,0 +1,18 @@ +Add support for Interpolated Expression Sequences + +Interpolated Expression Sequences are a way to implement things like string interpolation in library code. Three forms of literals are added: + +``` +i"Content $(a + 4)" +i`Content $(a + 4)` +iq{Content $(a + 4)} +``` + +all provide the same thing: a tuple that can be passed to other functions, like `writeln` from `std.stdio` and `text` from `std.conv`: + +``` +int a = 6; +writeln(i"Content $(a + 4)"); // prints "Content 10" +``` + +You can also pass them to other functions which understand the types in the new `core.interpolation` module. Numerous examples can be found documentation of that module or in this repository: https://github.com/adamdruppe/interpolation-examples/ diff --git a/compiler/src/dmd/astbase.d b/compiler/src/dmd/astbase.d index c224801ab9..4336c473d4 100644 --- a/compiler/src/dmd/astbase.d +++ b/compiler/src/dmd/astbase.d @@ -4575,6 +4575,7 @@ struct ASTBase inout(SuperExp) isSuperExp() { return op == EXP.super_ ? cast(typeof(return))this : null; } inout(NullExp) isNullExp() { return op == EXP.null_ ? cast(typeof(return))this : null; } inout(StringExp) isStringExp() { return op == EXP.string_ ? cast(typeof(return))this : null; } + inout(InterpExp) isInterpExp() { return op == EXP.interpolated ? cast(typeof(return))this : null; } inout(TupleExp) isTupleExp() { return op == EXP.tuple ? cast(typeof(return))this : null; } inout(ArrayLiteralExp) isArrayLiteralExp() { return op == EXP.arrayLiteral ? cast(typeof(return))this : null; } inout(AssocArrayLiteralExp) isAssocArrayLiteralExp() { return op == EXP.assocArrayLiteral ? cast(typeof(return))this : null; } @@ -4907,6 +4908,25 @@ struct ASTBase } } + extern (C++) final class InterpExp : Expression + { + InterpolatedSet* interpolatedSet; + char postfix = 0; // 'c', 'w', 'd' + + extern (D) this(const ref Loc loc, InterpolatedSet* interpolatedSet, char postfix = 0) + { + super(loc, EXP.interpolated, __traits(classInstanceSize, InterpExp)); + this.interpolatedSet = interpolatedSet; + this.postfix = postfix; + } + + override void accept(Visitor v) + { + v.visit(this); + } + } + + extern (C++) final class StringExp : Expression { union diff --git a/compiler/src/dmd/doc.d b/compiler/src/dmd/doc.d index ec3cc91409..bcf358cb5e 100644 --- a/compiler/src/dmd/doc.d +++ b/compiler/src/dmd/doc.d @@ -5204,6 +5204,7 @@ void highlightCode2(Scope* sc, Dsymbols* a, ref OutBuffer buf, size_t offset) highlight = "$(D_COMMENT "; break; case TOK.string_: + case TOK.interpolated: highlight = "$(D_STRING "; break; default: @@ -5216,7 +5217,7 @@ void highlightCode2(Scope* sc, Dsymbols* a, ref OutBuffer buf, size_t offset) res.writestring(highlight); size_t o = res.length; highlightCode3(sc, res, tok.ptr, lex.p); - if (tok.value == TOK.comment || tok.value == TOK.string_) + if (tok.value == TOK.comment || tok.value == TOK.string_ || tok.value == TOK.interpolated) /* https://issues.dlang.org/show_bug.cgi?id=7656 * https://issues.dlang.org/show_bug.cgi?id=7715 * https://issues.dlang.org/show_bug.cgi?id=10519 diff --git a/compiler/src/dmd/expression.d b/compiler/src/dmd/expression.d index cbf01186f9..9de3c322ed 100644 --- a/compiler/src/dmd/expression.d +++ b/compiler/src/dmd/expression.d @@ -721,6 +721,7 @@ extern (C++) abstract class Expression : ASTNode inout(SuperExp) isSuperExp() { return op == EXP.super_ ? cast(typeof(return))this : null; } inout(NullExp) isNullExp() { return op == EXP.null_ ? cast(typeof(return))this : null; } inout(StringExp) isStringExp() { return op == EXP.string_ ? cast(typeof(return))this : null; } + inout(InterpExp) isInterpExp() { return op == EXP.interpolated ? cast(typeof(return))this : null; } inout(TupleExp) isTupleExp() { return op == EXP.tuple ? cast(typeof(return))this : null; } inout(ArrayLiteralExp) isArrayLiteralExp() { return op == EXP.arrayLiteral ? cast(typeof(return))this : null; } inout(AssocArrayLiteralExp) isAssocArrayLiteralExp() { return op == EXP.assocArrayLiteral ? cast(typeof(return))this : null; } @@ -1847,6 +1848,28 @@ extern (C++) final class StringExp : Expression } } +extern (C++) final class InterpExp : Expression +{ + char postfix = NoPostfix; // 'c', 'w', 'd' + OwnedBy ownedByCtfe = OwnedBy.code; + InterpolatedSet* interpolatedSet; + + enum char NoPostfix = 0; + + extern (D) this(const ref Loc loc, InterpolatedSet* set, char postfix = NoPostfix) scope + { + super(loc, EXP.interpolated); + this.interpolatedSet = set; + this.postfix = postfix; + } + + override void accept(Visitor v) + { + v.visit(this); + } +} + + /*********************************************************** * A sequence of expressions * @@ -5494,6 +5517,7 @@ private immutable ubyte[EXP.max+1] expSize = [ EXP.preMinusMinus: __traits(classInstanceSize, PreExp), EXP.identifier: __traits(classInstanceSize, IdentifierExp), EXP.string_: __traits(classInstanceSize, StringExp), + EXP.interpolated: __traits(classInstanceSize, InterpExp), EXP.this_: __traits(classInstanceSize, ThisExp), EXP.super_: __traits(classInstanceSize, SuperExp), EXP.halt: __traits(classInstanceSize, HaltExp), diff --git a/compiler/src/dmd/expression.h b/compiler/src/dmd/expression.h index f57f6a4029..954a6db10f 100644 --- a/compiler/src/dmd/expression.h +++ b/compiler/src/dmd/expression.h @@ -38,6 +38,7 @@ class TemplateDeclaration; class ClassDeclaration; class OverloadSet; class StringExp; +class InterpExp; class LoweredAssignExp; #ifdef IN_GCC typedef union tree_node Symbol; @@ -129,6 +130,7 @@ public: SuperExp* isSuperExp(); NullExp* isNullExp(); StringExp* isStringExp(); + InterpExp* isInterpExp(); TupleExp* isTupleExp(); ArrayLiteralExp* isArrayLiteralExp(); AssocArrayLiteralExp* isAssocArrayLiteralExp(); @@ -370,6 +372,16 @@ public: void writeTo(void* dest, bool zero, int tyto = 0) const; }; +class InterpExp final : public Expression +{ +public: + utf8_t postfix; // 'c', 'w', 'd' + OwnedBy ownedByCtfe; + void* interpolatedSet; + + void accept(Visitor* v) override { v->visit(this); } +}; + // Tuple class TupleExp final : public Expression diff --git a/compiler/src/dmd/expressionsem.d b/compiler/src/dmd/expressionsem.d index 1d46d6e13e..79e88d5367 100644 --- a/compiler/src/dmd/expressionsem.d +++ b/compiler/src/dmd/expressionsem.d @@ -4145,6 +4145,84 @@ private extern (C++) final class ExpressionSemanticVisitor : Visitor result = e; } + override void visit(InterpExp e) + { + // the lexer breaks up into an odd/even array of literals and expression code + // we need to turn that into: + /+ + tuple( + .object.imported!"core.interpolation".InterpolationHeader(), + ... + .object.imported!"core.interpolation".InterpolationFooter() + ) + + There the ... loops through them all, making the even ones + .object.imported!"core.interpolation".InterpolatedLiteral!str() + and making the odd ones + .object.imported!"core.interpolation".InterpolatedExpression!str(), + the code represented by str + + Empty string literals are skipped as they provide no additional information. + +/ + + if (e.postfix) + error(e.loc, "String postfixes on interpolated expression sequences are not allowed."); + + Expression makeNonTemplateItem(Identifier which) { + Expression id = new IdentifierExp(e.loc, Id.empty); + id = new DotIdExp(e.loc, id, Id.object); + auto moduleNameArgs = new Objects(); + moduleNameArgs.push(new StringExp(e.loc, "core.interpolation")); + id = new DotTemplateInstanceExp(e.loc, id, Id.imported, moduleNameArgs); + id = new DotIdExp(e.loc, id, which); + id = new CallExp(e.loc, id, new Expressions()); + return id; + } + + Expression makeTemplateItem(Identifier which, string arg) { + Expression id = new IdentifierExp(e.loc, Id.empty); + id = new DotIdExp(e.loc, id, Id.object); + auto moduleNameArgs = new Objects(); + moduleNameArgs.push(new StringExp(e.loc, "core.interpolation")); + id = new DotTemplateInstanceExp(e.loc, id, Id.imported, moduleNameArgs); + auto tiargs = new Objects(); + auto templateStringArg = new StringExp(e.loc, arg); + // banning those instead of forwarding them + // templateStringArg.postfix = e.postfix; // forward the postfix to these literals + tiargs.push(templateStringArg); + id = new DotTemplateInstanceExp(e.loc, id, which, tiargs); + id = new CallExp(e.loc, id, new Expressions()); + return id; + } + + auto arguments = new Expressions(); + arguments.push(makeNonTemplateItem(Id.InterpolationHeader)); + + foreach (idx, str; e.interpolatedSet.parts) + { + if (idx % 2 == 0) + { + if (str.length > 0) + arguments.push(makeTemplateItem(Id.InterpolatedLiteral, str)); + } + else + { + arguments.push(makeTemplateItem(Id.InterpolatedExpression, str)); + Expressions* mix = new Expressions(); + mix.push(new StringExp(e.loc, str)); + // FIXME: i'd rather not use MixinExp but idk how to do it lol + arguments.push(new MixinExp(e.loc, mix)); + } + } + + arguments.push(makeNonTemplateItem(Id.InterpolationFooter)); + + auto loweredTo = new TupleExp(e.loc, arguments); + visit(loweredTo); + + result = loweredTo; + } + override void visit(StringExp e) { static if (LOGSEMANTIC) diff --git a/compiler/src/dmd/frontend.h b/compiler/src/dmd/frontend.h index ec654ed103..d59f38f423 100644 --- a/compiler/src/dmd/frontend.h +++ b/compiler/src/dmd/frontend.h @@ -183,6 +183,7 @@ class DsymbolExp; class ThisExp; class SuperExp; class NullExp; +class InterpExp; class TupleExp; class ArrayLiteralExp; class AssocArrayLiteralExp; @@ -285,6 +286,7 @@ class ThrownExceptionExp; class UnaExp; class BinExp; class BinAssignExp; +struct InterpolatedSet; struct ContractInfo; struct ObjcSelector; class PeelStatement; @@ -1091,6 +1093,7 @@ public: virtual void visit(typename AST::TypeidExp e); virtual void visit(typename AST::TraitsExp e); virtual void visit(typename AST::StringExp e); + virtual void visit(typename AST::InterpExp e); virtual void visit(typename AST::NewExp e); virtual void visit(typename AST::AssocArrayLiteralExp e); virtual void visit(typename AST::ArrayLiteralExp e); @@ -2049,44 +2052,45 @@ enum class EXP : uint8_t preMinusMinus = 86u, identifier = 87u, string_ = 88u, - this_ = 89u, - super_ = 90u, - halt = 91u, - tuple = 92u, - error = 93u, - void_ = 94u, - int64 = 95u, - float64 = 96u, - complex80 = 97u, - import_ = 98u, - delegate_ = 99u, - function_ = 100u, - mixin_ = 101u, - in_ = 102u, - break_ = 103u, - continue_ = 104u, - goto_ = 105u, - scope_ = 106u, - traits = 107u, - overloadSet = 108u, - line = 109u, - file = 110u, - fileFullPath = 111u, - moduleString = 112u, - functionString = 113u, - prettyFunction = 114u, - pow = 115u, - powAssign = 116u, - vector = 117u, - voidExpression = 118u, - cantExpression = 119u, - showCtfeContext = 120u, - objcClassReference = 121u, - vectorArray = 122u, - compoundLiteral = 123u, - _Generic_ = 124u, - interval = 125u, - loweredAssignExp = 126u, + interpolated = 89u, + this_ = 90u, + super_ = 91u, + halt = 92u, + tuple = 93u, + error = 94u, + void_ = 95u, + int64 = 96u, + float64 = 97u, + complex80 = 98u, + import_ = 99u, + delegate_ = 100u, + function_ = 101u, + mixin_ = 102u, + in_ = 103u, + break_ = 104u, + continue_ = 105u, + goto_ = 106u, + scope_ = 107u, + traits = 108u, + overloadSet = 109u, + line = 110u, + file = 111u, + fileFullPath = 112u, + moduleString = 113u, + functionString = 114u, + prettyFunction = 115u, + pow = 116u, + powAssign = 117u, + vector = 118u, + voidExpression = 119u, + cantExpression = 120u, + showCtfeContext = 121u, + objcClassReference = 122u, + vectorArray = 123u, + compoundLiteral = 124u, + _Generic_ = 125u, + interval = 126u, + loweredAssignExp = 127u, }; struct complex_t final @@ -2154,6 +2158,7 @@ public: SuperExp* isSuperExp(); NullExp* isNullExp(); StringExp* isStringExp(); + InterpExp* isInterpExp(); TupleExp* isTupleExp(); ArrayLiteralExp* isArrayLiteralExp(); AssocArrayLiteralExp* isAssocArrayLiteralExp(); @@ -2792,143 +2797,144 @@ enum class TOK : uint8_t dcharLiteral = 85u, identifier = 86u, string_ = 87u, - hexadecimalString = 88u, - this_ = 89u, - super_ = 90u, - error = 91u, - void_ = 92u, - int8 = 93u, - uns8 = 94u, - int16 = 95u, - uns16 = 96u, - int32 = 97u, - uns32 = 98u, - int64 = 99u, - uns64 = 100u, - int128 = 101u, - uns128 = 102u, - float32 = 103u, - float64 = 104u, - float80 = 105u, - imaginary32 = 106u, - imaginary64 = 107u, - imaginary80 = 108u, - complex32 = 109u, - complex64 = 110u, - complex80 = 111u, - char_ = 112u, - wchar_ = 113u, - dchar_ = 114u, - bool_ = 115u, - struct_ = 116u, - class_ = 117u, - interface_ = 118u, - union_ = 119u, - enum_ = 120u, - import_ = 121u, - alias_ = 122u, - override_ = 123u, - delegate_ = 124u, - function_ = 125u, - mixin_ = 126u, - align_ = 127u, - extern_ = 128u, - private_ = 129u, - protected_ = 130u, - public_ = 131u, - export_ = 132u, - static_ = 133u, - final_ = 134u, - const_ = 135u, - abstract_ = 136u, - debug_ = 137u, - deprecated_ = 138u, - in_ = 139u, - out_ = 140u, - inout_ = 141u, - lazy_ = 142u, - auto_ = 143u, - package_ = 144u, - immutable_ = 145u, - if_ = 146u, - else_ = 147u, - while_ = 148u, - for_ = 149u, - do_ = 150u, - switch_ = 151u, - case_ = 152u, - default_ = 153u, - break_ = 154u, - continue_ = 155u, - with_ = 156u, - synchronized_ = 157u, - return_ = 158u, - goto_ = 159u, - try_ = 160u, - catch_ = 161u, - finally_ = 162u, - asm_ = 163u, - foreach_ = 164u, - foreach_reverse_ = 165u, - scope_ = 166u, - onScopeExit = 167u, - onScopeFailure = 168u, - onScopeSuccess = 169u, - invariant_ = 170u, - unittest_ = 171u, - argumentTypes = 172u, - ref_ = 173u, - macro_ = 174u, - parameters = 175u, - traits = 176u, - pure_ = 177u, - nothrow_ = 178u, - gshared = 179u, - line = 180u, - file = 181u, - fileFullPath = 182u, - moduleString = 183u, - functionString = 184u, - prettyFunction = 185u, - shared_ = 186u, - at = 187u, - pow = 188u, - powAssign = 189u, - goesTo = 190u, - vector = 191u, - pound = 192u, - arrow = 193u, - colonColon = 194u, - wchar_tLiteral = 195u, - endOfLine = 196u, - whitespace = 197u, - inline_ = 198u, - register_ = 199u, - restrict_ = 200u, - signed_ = 201u, - sizeof_ = 202u, - typedef_ = 203u, - unsigned_ = 204u, - volatile_ = 205u, - _Alignas_ = 206u, - _Alignof_ = 207u, - _Atomic_ = 208u, - _Bool_ = 209u, - _Complex_ = 210u, - _Generic_ = 211u, - _Imaginary_ = 212u, - _Noreturn_ = 213u, - _Static_assert_ = 214u, - _Thread_local_ = 215u, - _assert_ = 216u, - _import_ = 217u, - __cdecl_ = 218u, - __declspec_ = 219u, - __stdcall_ = 220u, - __thread_ = 221u, - __pragma_ = 222u, - __int128_ = 223u, - __attribute___ = 224u, + interpolated = 88u, + hexadecimalString = 89u, + this_ = 90u, + super_ = 91u, + error = 92u, + void_ = 93u, + int8 = 94u, + uns8 = 95u, + int16 = 96u, + uns16 = 97u, + int32 = 98u, + uns32 = 99u, + int64 = 100u, + uns64 = 101u, + int128 = 102u, + uns128 = 103u, + float32 = 104u, + float64 = 105u, + float80 = 106u, + imaginary32 = 107u, + imaginary64 = 108u, + imaginary80 = 109u, + complex32 = 110u, + complex64 = 111u, + complex80 = 112u, + char_ = 113u, + wchar_ = 114u, + dchar_ = 115u, + bool_ = 116u, + struct_ = 117u, + class_ = 118u, + interface_ = 119u, + union_ = 120u, + enum_ = 121u, + import_ = 122u, + alias_ = 123u, + override_ = 124u, + delegate_ = 125u, + function_ = 126u, + mixin_ = 127u, + align_ = 128u, + extern_ = 129u, + private_ = 130u, + protected_ = 131u, + public_ = 132u, + export_ = 133u, + static_ = 134u, + final_ = 135u, + const_ = 136u, + abstract_ = 137u, + debug_ = 138u, + deprecated_ = 139u, + in_ = 140u, + out_ = 141u, + inout_ = 142u, + lazy_ = 143u, + auto_ = 144u, + package_ = 145u, + immutable_ = 146u, + if_ = 147u, + else_ = 148u, + while_ = 149u, + for_ = 150u, + do_ = 151u, + switch_ = 152u, + case_ = 153u, + default_ = 154u, + break_ = 155u, + continue_ = 156u, + with_ = 157u, + synchronized_ = 158u, + return_ = 159u, + goto_ = 160u, + try_ = 161u, + catch_ = 162u, + finally_ = 163u, + asm_ = 164u, + foreach_ = 165u, + foreach_reverse_ = 166u, + scope_ = 167u, + onScopeExit = 168u, + onScopeFailure = 169u, + onScopeSuccess = 170u, + invariant_ = 171u, + unittest_ = 172u, + argumentTypes = 173u, + ref_ = 174u, + macro_ = 175u, + parameters = 176u, + traits = 177u, + pure_ = 178u, + nothrow_ = 179u, + gshared = 180u, + line = 181u, + file = 182u, + fileFullPath = 183u, + moduleString = 184u, + functionString = 185u, + prettyFunction = 186u, + shared_ = 187u, + at = 188u, + pow = 189u, + powAssign = 190u, + goesTo = 191u, + vector = 192u, + pound = 193u, + arrow = 194u, + colonColon = 195u, + wchar_tLiteral = 196u, + endOfLine = 197u, + whitespace = 198u, + inline_ = 199u, + register_ = 200u, + restrict_ = 201u, + signed_ = 202u, + sizeof_ = 203u, + typedef_ = 204u, + unsigned_ = 205u, + volatile_ = 206u, + _Alignas_ = 207u, + _Alignof_ = 208u, + _Atomic_ = 209u, + _Bool_ = 210u, + _Complex_ = 211u, + _Generic_ = 212u, + _Imaginary_ = 213u, + _Noreturn_ = 214u, + _Static_assert_ = 215u, + _Thread_local_ = 216u, + _assert_ = 217u, + _import_ = 218u, + __cdecl_ = 219u, + __declspec_ = 220u, + __stdcall_ = 221u, + __thread_ = 222u, + __pragma_ = 223u, + __int128_ = 224u, + __attribute___ = 225u, }; class FuncExp final : public Expression @@ -3013,6 +3019,17 @@ public: static IntegerExp* createBool(bool b); }; +class InterpExp final : public Expression +{ +public: + char postfix; + OwnedBy ownedByCtfe; + InterpolatedSet* interpolatedSet; + enum : char { NoPostfix = 0u }; + + void accept(Visitor* v) override; +}; + class IntervalExp final : public Expression { public: @@ -5730,6 +5747,7 @@ struct ASTCodegen final using InExp = ::InExp; using IndexExp = ::IndexExp; using IntegerExp = ::IntegerExp; + using InterpExp = ::InterpExp; using IntervalExp = ::IntervalExp; using IsExp = ::IsExp; using LineInitExp = ::LineInitExp; @@ -8662,6 +8680,11 @@ struct Id final static Identifier* _d_arraysetassign; static Identifier* _d_arrayassign_l; static Identifier* _d_arrayassign_r; + static Identifier* imported; + static Identifier* InterpolationHeader; + static Identifier* InterpolationFooter; + static Identifier* InterpolatedLiteral; + static Identifier* InterpolatedExpression; static Identifier* Pinline; static Identifier* lib; static Identifier* linkerDirective; @@ -8889,7 +8912,11 @@ struct Token final _d_real floatvalue; struct { - const char* ustring; + union + { + const char* ustring; + InterpolatedSet* interpolatedSet; + }; uint32_t len; uint8_t postfix; }; diff --git a/compiler/src/dmd/hdrgen.d b/compiler/src/dmd/hdrgen.d index 570c662624..4827c4b8a0 100644 --- a/compiler/src/dmd/hdrgen.d +++ b/compiler/src/dmd/hdrgen.d @@ -2247,6 +2247,37 @@ private void expressionPrettyPrint(Expression e, ref OutBuffer buf, ref HdrGenSt buf.writeByte(e.postfix); } + void visitInterpolation(InterpExp e) + { + buf.writeByte('i'); + buf.writeByte('"'); + const o = buf.length; + + foreach (idx, str; e.interpolatedSet.parts) + { + if (idx % 2 == 0) + { + foreach(ch; str) + writeCharLiteral(buf, ch); + } + else + { + buf.writeByte('$'); + buf.writeByte('('); + foreach(ch; str) + buf.writeByte(ch); + buf.writeByte(')'); + } + } + + if (hgs.ddoc) + escapeDdocString(buf, o); + buf.writeByte('"'); + if (e.postfix) + buf.writeByte(e.postfix); + + } + void visitArrayLiteral(ArrayLiteralExp e) { buf.writeByte('['); @@ -2827,6 +2858,7 @@ private void expressionPrettyPrint(Expression e, ref OutBuffer buf, ref HdrGenSt case EXP.super_: return visitSuper(e.isSuperExp()); case EXP.null_: return visitNull(e.isNullExp()); case EXP.string_: return visitString(e.isStringExp()); + case EXP.interpolated: return visitInterpolation(e.isInterpExp()); case EXP.arrayLiteral: return visitArrayLiteral(e.isArrayLiteralExp()); case EXP.assocArrayLiteral: return visitAssocArrayLiteral(e.isAssocArrayLiteralExp()); case EXP.structLiteral: return visitStructLiteral(e.isStructLiteralExp()); diff --git a/compiler/src/dmd/id.d b/compiler/src/dmd/id.d index a66f2af467..5ad324d605 100644 --- a/compiler/src/dmd/id.d +++ b/compiler/src/dmd/id.d @@ -335,6 +335,12 @@ immutable Msgtable[] msgtable = { "_d_arrayassign_l" }, { "_d_arrayassign_r" }, + { "imported" }, + { "InterpolationHeader" }, + { "InterpolationFooter" }, + { "InterpolatedLiteral" }, + { "InterpolatedExpression" }, + // For pragma's { "Pinline", "inline" }, { "lib" }, diff --git a/compiler/src/dmd/lexer.d b/compiler/src/dmd/lexer.d index 5eadd721f6..937597cdf8 100644 --- a/compiler/src/dmd/lexer.d +++ b/compiler/src/dmd/lexer.d @@ -506,6 +506,29 @@ class Lexer } else goto case_ident; + case 'i': + if (Ccompile) + goto case_ident; + if (p[1] == '"') + { + p++; // skip the i + escapeStringConstant(t, true); + return; + } + else if (p[1] == '`') + { + p++; // skip the i + wysiwygStringConstant(t, true); + return; + } + else if (p[1] == 'q' && p[2] == '{') + { + p += 2; // skip the i and q + tokenStringConstant(t, true); + return; + } + else + goto case_ident; case '"': escapeStringConstant(t); return; @@ -517,7 +540,7 @@ class Lexer case 'f': case 'g': case 'h': - case 'i': + /*case 'i':*/ case 'j': case 'k': case 'l': @@ -1429,9 +1452,18 @@ class Lexer Params: result = pointer to the token that accepts the result */ - private void wysiwygStringConstant(Token* result) + private void wysiwygStringConstant(Token* result, bool supportInterpolation = false) { - result.value = TOK.string_; + if (supportInterpolation) + { + result.value = TOK.interpolated; + result.interpolatedSet = null; + } + else + { + result.value = TOK.string_; + } + Loc start = loc(); auto terminator = p[0]; p++; @@ -1451,6 +1483,14 @@ class Lexer c = '\n'; // treat EndOfLine as \n character endOfLine(); break; + case '$': + if (!supportInterpolation) + goto default; + + if (!handleInterpolatedSegment(result, start)) + goto default; + + continue; case 0: case 0x1A: error("unterminated string constant starting at %s", start.toChars()); @@ -1461,7 +1501,11 @@ class Lexer default: if (c == terminator) { - result.setString(stringbuffer); + if (supportInterpolation) + result.appendInterpolatedPart(stringbuffer); + else + result.setString(stringbuffer); + stringPostfix(result); return; } @@ -1736,13 +1780,21 @@ class Lexer Params: result = pointer to the token that accepts the result */ - private void tokenStringConstant(Token* result) + private void tokenStringConstant(Token* result, bool supportInterpolation = false) { - result.value = TOK.string_; + if (supportInterpolation) + { + result.value = TOK.interpolated; + result.interpolatedSet = null; + } + else + { + result.value = TOK.string_; + } uint nest = 1; const start = loc(); - const pstart = ++p; + auto pstart = ++p; inTokenStringConstant++; scope(exit) inTokenStringConstant--; while (1) @@ -1757,10 +1809,28 @@ class Lexer case TOK.rightCurly: if (--nest == 0) { - result.setString(pstart, p - 1 - pstart); + if (supportInterpolation) + result.appendInterpolatedPart(pstart, p - 1 - pstart); + else + result.setString(pstart, p - 1 - pstart); + stringPostfix(result); return; } + continue; + case TOK.dollar: + if (!supportInterpolation) + goto default; + + stringbuffer.setsize(0); + stringbuffer.write(pstart, p - 1 - pstart); + if (!handleInterpolatedSegment(result, start)) + goto default; + + stringbuffer.setsize(0); + + pstart = p; + continue; case TOK.endOfFile: error("unterminated token string constant starting at %s", start.toChars()); @@ -1772,6 +1842,52 @@ class Lexer } } + // returns true if it got special treatment as an interpolated segment + // otherwise returns false, indicating to treat it as just part of a normal string + private bool handleInterpolatedSegment(Token* token, Loc start) + { + switch(*p) + { + case '(': + // expression, at this level we need to scan until the closing ')' + + // always put the string part in first + token.appendInterpolatedPart(stringbuffer); + stringbuffer.setsize(0); + + int openParenCount = 1; + p++; // skip the first open paren + auto pstart = p; + while (openParenCount > 0) + { + // need to scan with the lexer to support embedded strings and other complex cases + Token tok; + scan(&tok); + if (tok.value == TOK.leftParenthesis) + openParenCount++; + if (tok.value == TOK.rightParenthesis) + openParenCount--; + if (tok.value == TOK.endOfFile) + { + // FIXME: make this error better, it spams a lot + error("unterminated interpolated string constant starting at %s", start.toChars()); + return false; + } + } + + // then put the interpolated string segment + token.appendInterpolatedPart(pstart[0 .. p - 1 - pstart]); + + stringbuffer.setsize(0); // make sure this is reset from the last token scan + // otherwise something like i"$(func("thing")) stuff" can still include it + + return true; + default: + // nothing special + return false; + } + } + /** Scan a quoted string while building the processed string value by handling escape sequences. The result is returned in the given `t` token. @@ -1783,9 +1899,17 @@ class Lexer * D https://dlang.org/spec/lex.html#double_quoted_strings * ImportC C11 6.4.5 */ - private void escapeStringConstant(Token* t) + private void escapeStringConstant(Token* t, bool supportInterpolation = false) { - t.value = TOK.string_; + if (supportInterpolation) + { + t.value = TOK.interpolated; + t.interpolatedSet = null; + } + else + { + t.value = TOK.string_; + } const start = loc(); const tc = *p++; // opening quote @@ -1813,11 +1937,28 @@ class Lexer c = escapeSequence(c2); stringbuffer.writeUTF8(c); continue; + case '$': + if (supportInterpolation) + { + p++; // skip escaped $ + stringbuffer.writeByte('$'); + continue; + } + else + goto default; default: c = escapeSequence(c2); break; } break; + case '$': + if (!supportInterpolation) + goto default; + + if (!handleInterpolatedSegment(t, start)) + goto default; + + continue; case '\n': endOfLine(); if (Ccompile) @@ -1835,7 +1976,10 @@ class Lexer case '"': if (c != tc) goto default; - t.setString(stringbuffer); + if (supportInterpolation) + t.appendInterpolatedPart(stringbuffer); + else + t.setString(stringbuffer); if (!Ccompile) stringPostfix(t); return; diff --git a/compiler/src/dmd/parse.d b/compiler/src/dmd/parse.d index 268622a82f..0dc54ffe76 100644 --- a/compiler/src/dmd/parse.d +++ b/compiler/src/dmd/parse.d @@ -2015,6 +2015,7 @@ class Parser(AST, Lexer = dmd.lexer.Lexer) : Lexer case TOK.wcharLiteral: case TOK.dcharLiteral: case TOK.string_: + case TOK.interpolated: case TOK.hexadecimalString: case TOK.file: case TOK.fileFullPath: @@ -5820,6 +5821,7 @@ class Parser(AST, Lexer = dmd.lexer.Lexer) : Lexer case TOK.true_: case TOK.false_: case TOK.string_: + case TOK.interpolated: case TOK.hexadecimalString: case TOK.leftParenthesis: case TOK.cast_: @@ -7313,6 +7315,7 @@ class Parser(AST, Lexer = dmd.lexer.Lexer) : Lexer case TOK.wcharLiteral: case TOK.dcharLiteral: case TOK.string_: + case TOK.interpolated: case TOK.hexadecimalString: case TOK.file: case TOK.fileFullPath: @@ -8177,6 +8180,11 @@ class Parser(AST, Lexer = dmd.lexer.Lexer) : Lexer nextToken(); break; + case TOK.interpolated: + e = new AST.InterpExp(loc, token.interpolatedSet, token.postfix); + nextToken(); + break; + case TOK.string_: case TOK.hexadecimalString: const bool hexString = token.value == TOK.hexadecimalString; @@ -8810,6 +8818,7 @@ class Parser(AST, Lexer = dmd.lexer.Lexer) : Lexer case TOK.wcharLiteral: case TOK.dcharLiteral: case TOK.string_: + case TOK.interpolated: case TOK.function_: case TOK.delegate_: case TOK.typeof_: diff --git a/compiler/src/dmd/parsetimevisitor.d b/compiler/src/dmd/parsetimevisitor.d index 3d0a585462..422c1c8c0e 100644 --- a/compiler/src/dmd/parsetimevisitor.d +++ b/compiler/src/dmd/parsetimevisitor.d @@ -183,6 +183,7 @@ public: void visit(AST.TypeidExp e) { visit(cast(AST.Expression)e); } void visit(AST.TraitsExp e) { visit(cast(AST.Expression)e); } void visit(AST.StringExp e) { visit(cast(AST.Expression)e); } + void visit(AST.InterpExp e) { visit(cast(AST.Expression)e); } void visit(AST.NewExp e) { visit(cast(AST.Expression)e); } void visit(AST.AssocArrayLiteralExp e) { visit(cast(AST.Expression)e); } void visit(AST.ArrayLiteralExp e) { visit(cast(AST.Expression)e); } diff --git a/compiler/src/dmd/strictvisitor.d b/compiler/src/dmd/strictvisitor.d index 39fcb68f0b..ab87b3f9d7 100644 --- a/compiler/src/dmd/strictvisitor.d +++ b/compiler/src/dmd/strictvisitor.d @@ -138,6 +138,7 @@ extern(C++) class StrictVisitor(AST) : ParseTimeVisitor!AST override void visit(AST.TypeidExp) { assert(0); } override void visit(AST.TraitsExp) { assert(0); } override void visit(AST.StringExp) { assert(0); } + override void visit(AST.InterpExp) { assert(0); } override void visit(AST.NewExp) { assert(0); } override void visit(AST.AssocArrayLiteralExp) { assert(0); } override void visit(AST.ArrayLiteralExp) { assert(0); } diff --git a/compiler/src/dmd/tokens.d b/compiler/src/dmd/tokens.d index 589bc2b53c..da4a3ee209 100644 --- a/compiler/src/dmd/tokens.d +++ b/compiler/src/dmd/tokens.d @@ -124,6 +124,7 @@ enum TOK : ubyte // Leaf operators identifier, string_, + interpolated, hexadecimalString, this_, super_, @@ -380,6 +381,7 @@ enum EXP : ubyte // Leaf operators identifier, string_, + interpolated, this_, super_, halt, @@ -623,6 +625,10 @@ static immutable TOK[TOK.max + 1] Ckeywords = } } (); +struct InterpolatedSet { + // all strings in the parts are zero terminated at length+1 + string[] parts; +} /*********************************************************** */ @@ -645,7 +651,11 @@ extern (C++) struct Token struct { - const(char)* ustring; // UTF8 string + union + { + const(char)* ustring; // UTF8 string + InterpolatedSet* interpolatedSet; + } uint len; ubyte postfix; // 'c', 'w', 'd' } @@ -833,6 +843,7 @@ extern (C++) struct Token // For debugging TOK.error: "error", TOK.string_: "string", + TOK.interpolated: "interpolated string", TOK.onScopeExit: "scope(exit)", TOK.onScopeSuccess: "scope(success)", TOK.onScopeFailure: "scope(failure)", @@ -910,6 +921,24 @@ nothrow: return 0; } + extern(D) void appendInterpolatedPart(const ref OutBuffer buf) { + appendInterpolatedPart(cast(const(char)*)buf[].ptr, buf.length); + } + extern(D) void appendInterpolatedPart(const(char)[] str) { + appendInterpolatedPart(str.ptr, str.length); + } + extern(D) void appendInterpolatedPart(const(char)* ptr, size_t length) { + assert(value == TOK.interpolated); + if (interpolatedSet is null) + interpolatedSet = new InterpolatedSet; + + auto s = cast(char*)mem.xmalloc_noscan(length + 1); + memcpy(s, ptr, length); + s[length] = 0; + + interpolatedSet.parts ~= cast(string) s[0 .. length]; + } + /**** * Set to contents of ptr[0..length] * Params: @@ -918,6 +947,7 @@ nothrow: */ void setString(const(char)* ptr, size_t length) { + value = TOK.string_; auto s = cast(char*)mem.xmalloc_noscan(length + 1); memcpy(s, ptr, length); s[length] = 0; @@ -941,6 +971,7 @@ nothrow: */ void setString() { + value = TOK.string_; ustring = ""; len = 0; postfix = 0; diff --git a/compiler/src/dmd/tokens.h b/compiler/src/dmd/tokens.h index f944663e43..ef91001a99 100644 --- a/compiler/src/dmd/tokens.h +++ b/compiler/src/dmd/tokens.h @@ -133,6 +133,7 @@ enum class TOK : unsigned char // Leaf operators identifier, string_, + interpolated, hexadecimalString, this_, super_, @@ -390,6 +391,7 @@ enum class EXP : unsigned char // Leaf operators identifier, string_, + interpolated, this_, super_, halt, @@ -461,7 +463,12 @@ struct Token real_t floatvalue; struct - { utf8_t *ustring; // UTF8 string + { + union + { + utf8_t *ustring; // UTF8 string + void *interpolatedSet; + }; unsigned len; unsigned char postfix; // 'c', 'w', 'd' }; diff --git a/compiler/src/dmd/visitor.h b/compiler/src/dmd/visitor.h index 7fa08cb0e8..6e3d315188 100644 --- a/compiler/src/dmd/visitor.h +++ b/compiler/src/dmd/visitor.h @@ -195,6 +195,7 @@ class ThisExp; class SuperExp; class NullExp; class StringExp; +class InterpExp; class TupleExp; class ArrayLiteralExp; class AssocArrayLiteralExp; @@ -480,6 +481,7 @@ public: virtual void visit(TypeidExp *e) { visit((Expression *)e); } virtual void visit(TraitsExp *e) { visit((Expression *)e); } virtual void visit(StringExp *e) { visit((Expression *)e); } + virtual void visit(InterpExp *e) { visit((Expression *)e); } virtual void visit(NewExp *e) { visit((Expression *)e); } virtual void visit(AssocArrayLiteralExp *e) { visit((Expression *)e); } virtual void visit(ArrayLiteralExp *e) { visit((Expression *)e); } diff --git a/compiler/test/fail_compilation/interpolatedexpressionsequence_postfix.d b/compiler/test/fail_compilation/interpolatedexpressionsequence_postfix.d new file mode 100644 index 0000000000..c915c446b4 --- /dev/null +++ b/compiler/test/fail_compilation/interpolatedexpressionsequence_postfix.d @@ -0,0 +1,13 @@ +/* TEST_OUTPUT: +--- +fail_compilation/interpolatedexpressionsequence_postfix.d(10): Error: String postfixes on interpolated expression sequences are not allowed. +fail_compilation/interpolatedexpressionsequence_postfix.d(11): Error: String postfixes on interpolated expression sequences are not allowed. +fail_compilation/interpolatedexpressionsequence_postfix.d(12): Error: String postfixes on interpolated expression sequences are not allowed. +--- +*/ +void main() { + // all postfixes are banned + auto c = i"foo"c; + auto w = i"foo"w; + auto d = i"foo"d; +} diff --git a/compiler/test/runnable/interpolatedexpressionsequence.d b/compiler/test/runnable/interpolatedexpressionsequence.d new file mode 100644 index 0000000000..831150792f --- /dev/null +++ b/compiler/test/runnable/interpolatedexpressionsequence.d @@ -0,0 +1,51 @@ +import core.interpolation; + +alias AliasSeq(T...) = T; + +string simpleToString(T...)(T thing) { + string s; + foreach(item; thing) + // all the items provided by core.interpolation have + // toString to return an appropriate value + // + // then this particular example only has embedded strings + // and chars, to we can append them directly + static if(__traits(hasMember, item, "toString")) + s ~= item.toString(); + else + s ~= item; + + return s; +} + +void main() { + int a = 1; + string b = "one"; + // parser won't permit alias = i".." directly; i"..." is meant to + // be used as a function/template parameter at this time. + alias expr = AliasSeq!i"$(a) $(b)"; + // elements from the source code are available at compile time, so + // we static assert those, but the values, of course, are different + static assert(expr[0] == InterpolationHeader()); + static assert(expr[1] == InterpolatedExpression!"a"()); + assert(expr[2] == a); // actual value not available at compile time + static assert(expr[3] == InterpolatedLiteral!" "()); + // the parens around the expression are not included + static assert(expr[4] == InterpolatedExpression!"b"()); + assert(expr[5] == b); // actual value not available at compile time + static assert(expr[6] == InterpolationFooter()); + + // it does currently allow `auto` to be used, it creates a value tuple + // you can embed any D expressions inside the parenthesis, and the + // token is not ended until you get the *outer* ) and ". + auto thing = i"$(b) $("$" ~ ')' ~ `"`)"; + assert(simpleToString(thing) == "one $)\""); + + assert(simpleToString(i"$b") == "$b"); // support for $ident removed by popular demand + + // i`` and iq{} should also work + assert(simpleToString(i` $(b) is $(b)!`) == " one is one!"); + assert(simpleToString(iq{ $(b) is $(b)!}) == " one is one!"); + assert(simpleToString(i`\$('$')`) == "\\$"); // no \ escape there + assert(simpleToString(iq{{$('$')}}) == "{$}"); // {} needs to work +} diff --git a/compiler/test/unit/lexer/location_offset.d b/compiler/test/unit/lexer/location_offset.d index b873214688..21266276d2 100644 --- a/compiler/test/unit/lexer/location_offset.d +++ b/compiler/test/unit/lexer/location_offset.d @@ -515,6 +515,7 @@ enum ignoreTokens showCtfeContext, objcClassReference, vectorArray, + interpolated, wchar_tLiteral, endOfLine, diff --git a/druntime/mak/COPY b/druntime/mak/COPY index 36410e0c3c..920b4f4971 100644 --- a/druntime/mak/COPY +++ b/druntime/mak/COPY @@ -17,6 +17,7 @@ COPY=\ $(IMPDIR)\core\exception.d \ $(IMPDIR)\core\factory.d \ $(IMPDIR)\core\int128.d \ + $(IMPDIR)\core\interpolation.d \ $(IMPDIR)\core\lifetime.d \ $(IMPDIR)\core\math.d \ $(IMPDIR)\core\memory.d \ diff --git a/druntime/mak/DOCS b/druntime/mak/DOCS index 1697fd80f3..81de39c5ed 100644 --- a/druntime/mak/DOCS +++ b/druntime/mak/DOCS @@ -5,6 +5,7 @@ DOCS=\ $(DOCDIR)\core_checkedint.html \ $(DOCDIR)\core_exception.html \ $(DOCDIR)\core_int128.html \ + $(DOCDIR)\core_interpolation.html \ $(DOCDIR)\core_math.html \ $(DOCDIR)\core_vararg.html \ $(DOCDIR)\core_volatile.html \ diff --git a/druntime/mak/SRCS b/druntime/mak/SRCS index 3eb7bb7e77..1182c69d83 100644 --- a/druntime/mak/SRCS +++ b/druntime/mak/SRCS @@ -11,6 +11,7 @@ SRCS=\ src\core\exception.d \ src\core\factory.d \ src\core\int128.d \ + src\core\interpolation.d \ src\core\lifetime.d \ src\core\math.d \ src\core\memory.d \ diff --git a/druntime/src/core/interpolation.d b/druntime/src/core/interpolation.d new file mode 100644 index 0000000000..0d45fe7908 --- /dev/null +++ b/druntime/src/core/interpolation.d @@ -0,0 +1,156 @@ +/++ + This module provides definitions to support D's + interpolated expression sequence literal, sometimes + called string interpolation. + + + --- + string str; + int num; + // the compiler uses this module to implement the + // i"..." literal used here. + auto a = i"$​(str) has $​(num) items."; + --- + + The variable `a` is a sequence of expressions: + + --- + a[0] == InterpolationHeader() + a[$-1] == InterpolationFooter() + --- + + First and last, you see the header and footer, to + clearly indicate where interpolation begins and ends. + Note that there may be nested interpolated sequences too, + each with their own header and footer. Think of them + as a set of balanced parenthesis around the contents. + + Inside, you will find three general categories of + content: `InterpolatedLiteral!"string"` for string + expressions, `InterpolatedExpression!"code"` for code + expressions, and then the values themselves as their + own type. + + In the example: + --- + auto a = i"$​(str) has $​(num) items."; + --- + + We will find: + --- + a[0] == InterpolationHeader() + a[1] == InterpolatedExpression!"str" + a[2] == str + a[3] == InterpolatedLiteral!" has "; + a[4] == InterpolatedExpression!"num"; + a[5] == num + a[6] == InterpolatedLiteral!" items."; + a[7] == InterpolationFooter() + a.length == 8; + --- + + You can see the correspondence with the original + input: when you write `$​(expression)`, the string of the + expression is passed as `InterpolatedExpression!ThatString`, + (excluding any parenthesis around the expression), + and everything else is passed as `InterpolatedLiteral!str`, + in the same sequence as they appeared in the source. + + After an `InterpolatedExpression!...`, you will find the + actual value(s) in the tuple. (If the expression expanded + to multiple values - for example, if it was itself a tuple, + there will be multiple values for a single expression.) + + Library functions should NOT attempt to mixin the code + from an `InterpolatedExpression` themselves. Doing so + will fail, since it is coming from a different scope anyway. + The string is provided to you only for informational purposes + and as a sentinel to separate things the user wrote. + + Your code should be able to handle an empty code string + in `InterpolatedExpression` or even an entirely missing + `InterpolatedExpression`, in case an implementation decides to + not emit these. + + The `toString` members on these return `null`, except for + the `InterpolatedLiteral`, which returns the literal string. + This is to ease processing by generic functions like + `std.stdio.write` or `std.conv.text`, making them effectively + transparently skipped. + + To extract the string from an `InterpolatedLiteral`, you can + use an `is` expression or the `.toString` method. + + To extract the string from a `InterpolatedExpression`, you can + use an `is` expression or the `.expression` member. + + None of these structures have runtime state. + + History: + Added in dmd 2.10x frontend, released in late 2023. ++/ +module core.interpolation; + +/++ + Sentinel values to indicate the beginning and end of an + interpolated expression sequence. + + Note that these can nest, so while processing a sequence, + it may be helpful to keep a nesting count if that knowledge + is important to your application. ++/ +struct InterpolationHeader { + /++ + Returns `null` for easy compatibility with existing functions + like `std.stdio.writeln` and `std.conv.text`. + +/ + string toString() const @nogc pure nothrow @safe { + return null; + } +} + +/// ditto +struct InterpolationFooter { + /++ + Returns `null` for easy compatibility with existing functions + like `std.stdio.writeln` and `std.conv.text`. + +/ + string toString() const @nogc pure nothrow @safe { + return null; + } +} + +/++ + Represents a fragment of a string literal in between expressions + passed as part of an interpolated expression sequence. ++/ +struct InterpolatedLiteral(string text) { + /++ + Returns the text of the interpolated string literal for this + segment of the tuple, for easy access and compatibility with + existing functions like `std.stdio.writeln` and `std.conv.text`. + +/ + string toString() const @nogc pure nothrow @safe { + return text; + } +} + +/++ + Represents the source code of an expression passed as part of an + interpolated expression sequence. ++/ +struct InterpolatedExpression(string text) { + /++ + Returns the text of an interpolated expression used in the + original literal, if provided by the implementation. + +/ + enum expression = text; + + /++ + Returns `null` for easy compatibility with existing functions + like `std.stdio.writeln` and `std.conv.text`. + +/ + string toString() const @nogc pure nothrow @safe { + return null; + } +}