From a8d4b00f98134f30665622d44b9f7cc29bc56f6f Mon Sep 17 00:00:00 2001 From: Witold Baryluk Date: Wed, 19 May 2021 17:20:15 +0000 Subject: [PATCH] Fix issue 21926 - Allow leadings zeros in std.conv.octal First of all, the DDoc currently says that: "Leading zero is allowed, but not required." The current implementation doesn't respect that. D lexer allows leading zero(s) also. And when considering octal from string, there is no possibility of confusion, as the comments in the code claim. Disallowing leading zeros for octal conversion from string, does not help with anything actually. So lets allow leading zeros. Allowing leading zeros, does help when implementing various APIs (i.e. glibc, Linux kernel), where a lot of octal constant, are defined with multiple leading zeros, for readability and alignment, in C headers. Having to manually or automatically special case these when porting such API definitions, is counter-productive. Example from a Linux kernel (`include/uapi/linux/stat.h`): ```c #define S_IFMT 00170000 #define S_IFSOCK 0140000 #define S_IFLNK 0120000 #define S_IFREG 0100000 #define S_IFBLK 0060000 #define S_IFDIR 0040000 #define S_IFCHR 0020000 #define S_IFIFO 0010000 #define S_ISUID 0004000 #define S_ISGID 0002000 #define S_ISVTX 0001000 ``` With this patch, now it is trivial and easier to convert these to D: ```d ... enum S_ISVTX = octal!"0001000"; ``` while being close to original. That helps with readability, and long term maintenance. In fact the run-time version provided by `parse!(int)(string, 8)` also supports leading zeros already. So this makes `octal` more consistent `parse`. --- std/conv.d | 95 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 65 insertions(+), 30 deletions(-) diff --git a/std/conv.d b/std/conv.d index 4eb9da7cc..539ffb972 100644 --- a/std/conv.d +++ b/std/conv.d @@ -2884,6 +2884,7 @@ do assert(parse!int(s = "0011001101101", 2) == 0b0011001101101); assert(parse!int(s = "765", 8) == octal!765); + assert(parse!int(s = "000135", 8) == octal!"135"); assert(parse!int(s = "fCDe", 16) == 0xfcde); // https://issues.dlang.org/show_bug.cgi?id=6609 @@ -4841,12 +4842,14 @@ if (is(typeof(decimalInteger)) && isIntegral!(typeof(decimalInteger))) /// @safe unittest { - // same as 0177 - auto x = octal!177; + // Same as 0177 + auto a = octal!177; // octal is a compile-time device - enum y = octal!160; + enum b = octal!160; // Create an unsigned octal - auto z = octal!"1_000_000u"; + auto c = octal!"1_000_000u"; + // Leading zeros are allowed when converting from a string + auto d = octal!"0001_200_000"; } /* @@ -4877,6 +4880,9 @@ private T octal(T)(const string num) { int a = octal!int("10"); assert(a == 8); + + int b = octal!int("000137"); + assert(b == 95); } /* @@ -4896,12 +4902,41 @@ private template octalFitsInInt(string octalNum) private string strippedOctalLiteral(string original) { string stripped = ""; + bool leading_zeros = true; foreach (c; original) - if (c >= '0' && c <= '7') - stripped ~= c; + { + if (!('0' <= c && c <= '7')) + continue; + if (c == '0') + { + if (leading_zeros) + continue; + } + else + { + leading_zeros = false; + } + stripped ~= c; + } + if (stripped.length == 0) + { + assert(leading_zeros); + return "0"; + } return stripped; } +@safe unittest +{ + static assert(strippedOctalLiteral("7") == "7"); + static assert(strippedOctalLiteral("123") == "123"); + static assert(strippedOctalLiteral("00123") == "123"); + static assert(strippedOctalLiteral("01230") == "1230"); + static assert(strippedOctalLiteral("0") == "0"); + static assert(strippedOctalLiteral("00_000") == "0"); + static assert(strippedOctalLiteral("000_000_12_300") == "12300"); +} + private template literalIsLong(string num) { static if (num.length > 1) @@ -4925,8 +4960,8 @@ private template literalIsUnsigned(string num) /* Returns if the given string is a correctly formatted octal literal. -The format is specified in spec/lex.html. The leading zero is allowed, but -not required. +The format is specified in spec/lex.html. The leading zeros are allowed, +but not required. */ @safe pure nothrow @nogc private bool isOctalLiteral(const string num) @@ -4934,34 +4969,30 @@ private bool isOctalLiteral(const string num) if (num.length == 0) return false; - // Must start with a number. To avoid confusion, literals that - // start with a '0' are not allowed - if (num[0] == '0' && num.length > 1) - return false; + // Must start with a digit. if (num[0] < '0' || num[0] > '7') return false; foreach (i, c; num) { - if ((c < '0' || c > '7') && c != '_') // not a legal character + if (('0' <= c && c <= '7') || c == '_') // a legal character + continue; + + if (i < num.length - 2) + return false; + + // gotta check for those suffixes + if (c != 'U' && c != 'u' && c != 'L') + return false; + if (i != num.length - 1) { - if (i < num.length - 2) - return false; - else // gotta check for those suffixes - { - if (c != 'U' && c != 'u' && c != 'L') - return false; - if (i != num.length - 1) - { - // if we're not the last one, the next one must - // also be a suffix to be valid - char c2 = num[$-1]; - if (c2 != 'U' && c2 != 'u' && c2 != 'L') - return false; // spam at the end of the string - if (c2 == c) - return false; // repeats are disallowed - } - } + // if we're not the last one, the next one must + // also be a suffix to be valid + char c2 = num[$-1]; + if (c2 != 'U' && c2 != 'u' && c2 != 'L') + return false; // spam at the end of the string + if (c2 == c) + return false; // repeats are disallowed } } @@ -4981,6 +5012,9 @@ private bool isOctalLiteral(const string num) static assert(octal!"7" == 7); static assert(octal!"10" == 8); static assert(octal!"666" == 438); + static assert(octal!"0004001" == 2049); + static assert(octal!"00" == 0); + static assert(octal!"0_0" == 0); static assert(octal!45 == 37); static assert(octal!0 == 0); @@ -4989,6 +5023,7 @@ private bool isOctalLiteral(const string num) static assert(octal!666 == 438); static assert(octal!"66_6" == 438); + static assert(octal!"0_0_66_6" == 438); static assert(octal!2520046213 == 356535435); static assert(octal!"2520046213" == 356535435);