mirror of
https://github.com/dlang/phobos.git
synced 2025-04-29 22:50:38 +03:00
new std.uni module
This commit is contained in:
parent
1bd22b2e8b
commit
9a053d97c2
8 changed files with 12979 additions and 2681 deletions
|
@ -202,7 +202,7 @@ EXTRA_MODULES += $(EXTRA_DOCUMENTABLES) $(addprefix \
|
|||
std/internal/digest/, sha_SSSE3 ) $(addprefix \
|
||||
std/internal/math/, biguintcore biguintnoasm biguintx86 \
|
||||
gammafunction errorfunction) $(addprefix std/internal/, \
|
||||
processinit uni uni_tab)
|
||||
processinit uni uni_tab unicode_tables)
|
||||
|
||||
# Aggregate all D modules relevant to this build
|
||||
D_MODULES = crc32 $(STD_MODULES) $(EXTRA_MODULES) $(STD_NET_MODULES) $(STD_DIGEST_MODULES)
|
||||
|
|
5777
std/internal/unicode_tables.d
Normal file
5777
std/internal/unicode_tables.d
Normal file
File diff suppressed because one or more lines are too long
|
@ -596,7 +596,8 @@ unittest
|
|||
val = parseJSON(`"\u2660\u2666"`);
|
||||
assert(toJSON(&val) == "\"\♠\♦\"");
|
||||
|
||||
assertNotThrown(parseJSON(`{ "foo": "` ~ "\u007F" ~ `"}`));
|
||||
//0x7F is a control character (see Unicode spec)
|
||||
assertThrown(parseJSON(`{ "foo": "` ~ "\u007F" ~ `"}`));
|
||||
|
||||
with(parseJSON(`""`))
|
||||
assert(str == "" && str !is null);
|
||||
|
|
|
@ -225,8 +225,9 @@ module std.regex;
|
|||
import std.internal.uni, std.internal.uni_tab;//unicode property tables
|
||||
import std.array, std.algorithm, std.range,
|
||||
std.conv, std.exception, std.traits, std.typetuple,
|
||||
std.uni, std.utf, std.format, std.typecons, std.bitmanip,
|
||||
std.utf, std.format, std.typecons, std.bitmanip,
|
||||
std.functional, std.exception;
|
||||
|
||||
import core.bitop, core.stdc.string, core.stdc.stdlib;
|
||||
static import ascii = std.ascii;
|
||||
import std.string : representation;
|
||||
|
@ -234,6 +235,9 @@ import std.string : representation;
|
|||
debug import std.stdio;
|
||||
|
||||
private:
|
||||
|
||||
import std.uni : isAlpha, isWhite;
|
||||
|
||||
@safe:
|
||||
|
||||
//uncomment to get a barrage of debug info
|
||||
|
|
395
std/string.d
395
std/string.d
|
@ -71,85 +71,15 @@ class StringException : Exception
|
|||
/++
|
||||
Compares two ranges of characters lexicographically. The comparison is
|
||||
case insensitive. Use $(XREF algorithm, cmp) for a case sensitive
|
||||
comparison. icmp works like $(XREF algorithm, cmp) except that it
|
||||
converts characters to lowercase prior to applying $(D pred). Technically,
|
||||
$(D icmp(r1, r2)) is equivalent to
|
||||
$(D cmp!"std.uni.toLower(a) < std.uni.toLower(b)"(r1, r2)).
|
||||
comparison. For details see $(XREF uni, icmp).
|
||||
|
||||
$(BOOKTABLE,
|
||||
$(TR $(TD $(D < 0)) $(TD $(D s1 < s2) ))
|
||||
$(TR $(TD $(D = 0)) $(TD $(D s1 == s2)))
|
||||
$(TR $(TD $(D > 0)) $(TD $(D s1 > s2)))
|
||||
)
|
||||
+/
|
||||
int icmp(alias pred = "a < b", S1, S2)(S1 s1, S2 s2)
|
||||
if (isSomeString!S1 && isSomeString!S2)
|
||||
{
|
||||
static if (is(typeof(pred) : string))
|
||||
enum isLessThan = pred == "a < b";
|
||||
else
|
||||
enum isLessThan = false;
|
||||
|
||||
size_t i, j;
|
||||
while (i < s1.length && j < s2.length)
|
||||
{
|
||||
immutable c1 = std.uni.toLower(decode(s1, i));
|
||||
immutable c2 = std.uni.toLower(decode(s2, j));
|
||||
|
||||
static if (isLessThan)
|
||||
{
|
||||
if (c1 != c2)
|
||||
{
|
||||
if (c1 < c2) return -1;
|
||||
if (c1 > c2) return 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (binaryFun!pred(c1, c2)) return -1;
|
||||
if (binaryFun!pred(c2, c1)) return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (i < s1.length) return 1;
|
||||
if (j < s2.length) return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int icmp(alias pred = "a < b", S1, S2)(S1 s1, S2 s2)
|
||||
if (!(isSomeString!S1 && isSomeString!S2) &&
|
||||
isForwardRange!S1 && is(Unqual!(ElementType!S1) == dchar) &&
|
||||
isForwardRange!S2 && is(Unqual!(ElementType!S2) == dchar))
|
||||
{
|
||||
static if (is(typeof(pred) : string))
|
||||
enum isLessThan = pred == "a < b";
|
||||
else
|
||||
enum isLessThan = false;
|
||||
|
||||
for (;; s1.popFront(), s2.popFront())
|
||||
{
|
||||
if (s1.empty) return s2.empty ? 0 : -1;
|
||||
if (s2.empty) return 1;
|
||||
|
||||
immutable c1 = std.uni.toLower(s1.front);
|
||||
immutable c2 = std.uni.toLower(s2.front);
|
||||
|
||||
static if (isLessThan)
|
||||
{
|
||||
if (c1 != c2)
|
||||
{
|
||||
if(c1 < c2) return -1;
|
||||
if(c1 > c2) return 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (binaryFun!pred(c1, c2)) return -1;
|
||||
if (binaryFun!pred(c2, c1)) return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
+/
|
||||
alias icmp = std.uni.icmp;
|
||||
|
||||
unittest
|
||||
{
|
||||
|
@ -786,318 +716,35 @@ unittest
|
|||
|
||||
/++
|
||||
Returns a string which is identical to $(D s) except that all of its
|
||||
characters are lowercase (in unicode, not just ASCII). If $(D s) does not
|
||||
have any uppercase characters, then $(D s) is returned.
|
||||
characters are converted to lowercase (by preforming Unicode lowercase mapping).
|
||||
If none of $(D s) characters were affected, then $(D s) itself is returned.
|
||||
+/
|
||||
S toLower(S)(S s) @trusted pure
|
||||
if (isSomeString!S)
|
||||
{
|
||||
foreach (i, dchar cOuter; s)
|
||||
{
|
||||
if (!std.uni.isUpper(cOuter))
|
||||
continue;
|
||||
auto result = s[0.. i].dup;
|
||||
foreach (dchar c; s[i .. $])
|
||||
{
|
||||
if (std.uni.isUpper(c))
|
||||
{
|
||||
c = std.uni.toLower(c);
|
||||
}
|
||||
result ~= c;
|
||||
}
|
||||
return cast(S) result;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
unittest
|
||||
{
|
||||
debug(string) printf("string.toLower.unittest\n");
|
||||
|
||||
assertCTFEable!(
|
||||
{
|
||||
foreach (S; TypeTuple!(string, wstring, dstring, char[], wchar[], dchar[]))
|
||||
{
|
||||
S s = cast(S)"hello world\u0101";
|
||||
assert(toLower(s) is s);
|
||||
const S sc = "hello world\u0101";
|
||||
assert(toLower(sc) is sc);
|
||||
immutable S si = "hello world\u0101";
|
||||
assert(toLower(si) is si);
|
||||
|
||||
S t = cast(S)"Hello World\u0100";
|
||||
assert(toLower(t) == s);
|
||||
const S tc = "hello world\u0101";
|
||||
assert(toLower(tc) == s);
|
||||
immutable S ti = "hello world\u0101";
|
||||
assert(toLower(ti) == s);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
alias toLower = std.uni.toLower;
|
||||
/++
|
||||
Converts $(D s) to lowercase (in unicode, not just ASCII) in place.
|
||||
Converts $(D s) to lowercase (by performing Unicode lowercase mapping) in place.
|
||||
For a few characters string length may increase after the transformation,
|
||||
in such a case the function reallocates exactly once.
|
||||
If $(D s) does not have any uppercase characters, then $(D s) is unaltered.
|
||||
+/
|
||||
void toLowerInPlace(C)(ref C[] s)
|
||||
if (is(C == char) || is(C == wchar))
|
||||
{
|
||||
for (size_t i = 0; i < s.length; )
|
||||
{
|
||||
immutable c = s[i];
|
||||
if (std.ascii.isUpper(c))
|
||||
{
|
||||
s[i++] = cast(C) (c + (cast(C)'a' - 'A'));
|
||||
}
|
||||
else if (!std.ascii.isASCII(c))
|
||||
{
|
||||
// wide character
|
||||
size_t j = i;
|
||||
dchar dc = decode(s, j);
|
||||
assert(j > i);
|
||||
if (!std.uni.isUpper(dc))
|
||||
{
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
auto toAdd = to!(C[])(std.uni.toLower(dc));
|
||||
s = s[0 .. i] ~ toAdd ~ s[j .. $];
|
||||
i += toAdd.length;
|
||||
}
|
||||
else
|
||||
{
|
||||
++i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void toLowerInPlace(C)(ref C[] s) @safe pure nothrow
|
||||
if (is(C == dchar))
|
||||
{
|
||||
foreach (ref c; s)
|
||||
{
|
||||
if (std.uni.isUpper(c))
|
||||
c = std.uni.toLower(c);
|
||||
}
|
||||
}
|
||||
|
||||
unittest
|
||||
{
|
||||
debug(string) printf("string.toLowerInPlace.unittest\n");
|
||||
|
||||
assertCTFEable!(
|
||||
{
|
||||
foreach (S; TypeTuple!(char[], wchar[], dchar[]))
|
||||
{
|
||||
S s = to!S("hello world\u0101");
|
||||
toLowerInPlace(s);
|
||||
assert(s == "hello world\u0101");
|
||||
|
||||
S t = to!S("Hello World\u0100");
|
||||
toLowerInPlace(t);
|
||||
assert(t == "hello world\u0101");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
unittest
|
||||
{
|
||||
debug(string) printf("string.toLower/toLowerInPlace.unittest\n");
|
||||
|
||||
assertCTFEable!(
|
||||
{
|
||||
string s1 = "FoL";
|
||||
string s2 = toLower(s1);
|
||||
assert(cmp(s2, "fol") == 0, s2);
|
||||
assert(s2 != s1);
|
||||
|
||||
char[] s3 = s1.dup;
|
||||
toLowerInPlace(s3);
|
||||
assert(s3 == s2, s3);
|
||||
|
||||
s1 = "A\u0100B\u0101d";
|
||||
s2 = toLower(s1);
|
||||
s3 = s1.dup;
|
||||
assert(cmp(s2, "a\u0101b\u0101d") == 0);
|
||||
assert(s2 !is s1);
|
||||
toLowerInPlace(s3);
|
||||
assert(s3 == s2, s3);
|
||||
|
||||
s1 = "A\u0460B\u0461d";
|
||||
s2 = toLower(s1);
|
||||
s3 = s1.dup;
|
||||
assert(cmp(s2, "a\u0461b\u0461d") == 0);
|
||||
assert(s2 !is s1);
|
||||
toLowerInPlace(s3);
|
||||
assert(s3 == s2, s3);
|
||||
|
||||
s1 = "\u0130";
|
||||
s2 = toLower(s1);
|
||||
s3 = s1.dup;
|
||||
assert(s2 == "i");
|
||||
assert(s2 !is s1);
|
||||
toLowerInPlace(s3);
|
||||
assert(s3 == s2, s3);
|
||||
|
||||
// Test on wchar and dchar strings.
|
||||
assert(toLower("Some String"w) == "some string"w);
|
||||
assert(toLower("Some String"d) == "some string"d);
|
||||
});
|
||||
}
|
||||
|
||||
alias toLowerInPlace = std.uni.toLowerInPlace;
|
||||
|
||||
/++
|
||||
Returns a string which is identical to $(D s) except that all of its
|
||||
characters are uppercase (in unicode, not just ASCII). If $(D s) does not
|
||||
have any lowercase characters, then $(D s) is returned.
|
||||
characters are converted to uppercase (by preforming Unicode uppercase mapping).
|
||||
If none of $(D s) characters were affected, then $(D s) itself is returned.
|
||||
+/
|
||||
S toUpper(S)(S s) @trusted pure
|
||||
if (isSomeString!S)
|
||||
{
|
||||
foreach (i, dchar cOuter; s)
|
||||
{
|
||||
if (!std.uni.isLower(cOuter))
|
||||
continue;
|
||||
auto result = s[0.. i].dup;
|
||||
foreach (dchar c; s[i .. $])
|
||||
{
|
||||
if (std.uni.isLower(c))
|
||||
{
|
||||
c = std.uni.toUpper(c);
|
||||
}
|
||||
result ~= c;
|
||||
}
|
||||
return cast(S) result;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
unittest
|
||||
{
|
||||
debug(string) printf("string.toUpper.unittest\n");
|
||||
|
||||
assertCTFEable!(
|
||||
{
|
||||
foreach (S; TypeTuple!(string, wstring, dstring, char[], wchar[], dchar[]))
|
||||
{
|
||||
S s = cast(S)"HELLO WORLD\u0100";
|
||||
assert(toUpper(s) is s);
|
||||
const S sc = "HELLO WORLD\u0100";
|
||||
assert(toUpper(sc) is sc);
|
||||
immutable S si = "HELLO WORLD\u0100";
|
||||
assert(toUpper(si) is si);
|
||||
|
||||
S t = cast(S)"hello world\u0101";
|
||||
assert(toUpper(t) == s);
|
||||
const S tc = "HELLO WORLD\u0100";
|
||||
assert(toUpper(tc) == s);
|
||||
immutable S ti = "HELLO WORLD\u0100";
|
||||
assert(toUpper(ti) == s);
|
||||
}
|
||||
});
|
||||
}
|
||||
alias toUpper = std.uni.toUpper;
|
||||
|
||||
/++
|
||||
Converts $(D s) to uppercase (in unicode, not just ASCII) in place.
|
||||
Converts $(D s) to uppercase (by performing Unicode uppercase mapping) in place.
|
||||
For a few characters string length may increase after the transformation,
|
||||
in such a case the function reallocates exactly once.
|
||||
If $(D s) does not have any lowercase characters, then $(D s) is unaltered.
|
||||
+/
|
||||
void toUpperInPlace(C)(ref C[] s)
|
||||
if (isSomeChar!C &&
|
||||
(is(C == char) || is(C == wchar)))
|
||||
{
|
||||
for (size_t i = 0; i < s.length; )
|
||||
{
|
||||
immutable c = s[i];
|
||||
if ('a' <= c && c <= 'z')
|
||||
{
|
||||
s[i++] = cast(C) (c - (cast(C)'a' - 'A'));
|
||||
}
|
||||
else if (!std.ascii.isASCII(c))
|
||||
{
|
||||
// wide character
|
||||
size_t j = i;
|
||||
dchar dc = decode(s, j);
|
||||
assert(j > i);
|
||||
if (!std.uni.isLower(dc))
|
||||
{
|
||||
i = j;
|
||||
continue;
|
||||
}
|
||||
auto toAdd = to!(C[])(std.uni.toUpper(dc));
|
||||
s = s[0 .. i] ~ toAdd ~ s[j .. $];
|
||||
i += toAdd.length;
|
||||
}
|
||||
else
|
||||
{
|
||||
++i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void toUpperInPlace(C)(ref C[] s) @safe pure nothrow
|
||||
if (is(C == dchar))
|
||||
{
|
||||
foreach (ref c; s)
|
||||
{
|
||||
if (std.uni.isLower(c))
|
||||
c = std.uni.toUpper(c);
|
||||
}
|
||||
}
|
||||
|
||||
unittest
|
||||
{
|
||||
debug(string) printf("string.toUpperInPlace.unittest\n");
|
||||
|
||||
assertCTFEable!(
|
||||
{
|
||||
foreach (S; TypeTuple!(char[], wchar[], dchar[]))
|
||||
{
|
||||
S s = to!S("HELLO WORLD\u0100");
|
||||
toUpperInPlace(s);
|
||||
assert(s == "HELLO WORLD\u0100");
|
||||
|
||||
S t = to!S("Hello World\u0101");
|
||||
toUpperInPlace(t);
|
||||
assert(t == "HELLO WORLD\u0100");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
unittest
|
||||
{
|
||||
debug(string) printf("string.toUpper/toUpperInPlace.unittest\n");
|
||||
|
||||
assertCTFEable!(
|
||||
{
|
||||
string s1 = "FoL";
|
||||
string s2;
|
||||
char[] s3;
|
||||
|
||||
s2 = toUpper(s1);
|
||||
s3 = s1.dup; toUpperInPlace(s3);
|
||||
assert(s3 == s2, s3);
|
||||
assert(cmp(s2, "FOL") == 0);
|
||||
assert(s2 !is s1);
|
||||
|
||||
s1 = "a\u0100B\u0101d";
|
||||
s2 = toUpper(s1);
|
||||
s3 = s1.dup; toUpperInPlace(s3);
|
||||
assert(s3 == s2);
|
||||
assert(cmp(s2, "A\u0100B\u0100D") == 0);
|
||||
assert(s2 !is s1);
|
||||
|
||||
s1 = "a\u0460B\u0461d";
|
||||
s2 = toUpper(s1);
|
||||
s3 = s1.dup; toUpperInPlace(s3);
|
||||
assert(s3 == s2);
|
||||
assert(cmp(s2, "A\u0460B\u0460D") == 0);
|
||||
assert(s2 !is s1);
|
||||
});
|
||||
}
|
||||
|
||||
alias toUpperInPlace = std.uni.toUpperInPlace;
|
||||
|
||||
/++
|
||||
Capitalize the first character of $(D s) and conver the rest of $(D s)
|
||||
Capitalize the first character of $(D s) and convert the rest of $(D s)
|
||||
to lowercase.
|
||||
+/
|
||||
S capitalize(S)(S s) @trusted pure
|
||||
|
@ -1138,8 +785,6 @@ S capitalize(S)(S s) @trusted pure
|
|||
|
||||
unittest
|
||||
{
|
||||
debug(string) printf("string.capitalize.unittest\n");
|
||||
|
||||
assertCTFEable!(
|
||||
{
|
||||
foreach (S; TypeTuple!(string, wstring, dstring, char[], wchar[], dchar[]))
|
||||
|
@ -1159,10 +804,9 @@ unittest
|
|||
s2 = capitalize(s1);
|
||||
assert(cmp(s2, "Fol") == 0);
|
||||
assert(s2 !is s1);
|
||||
|
||||
s1 = to!S("\u0131 \u0130");
|
||||
s2 = capitalize(s1);
|
||||
assert(cmp(s2, "\u0049 \u0069") == 0);
|
||||
assert(cmp(s2, "I \u0130") == 0);
|
||||
assert(s2 !is s1);
|
||||
|
||||
s1 = to!S("\u017F \u0049");
|
||||
|
@ -1173,7 +817,6 @@ unittest
|
|||
});
|
||||
}
|
||||
|
||||
|
||||
/++
|
||||
Split $(D s) into an array of lines using $(D '\r'), $(D '\n'),
|
||||
$(D "\r\n"), $(XREF uni, lineSep), and $(XREF uni, paraSep) as delimiters.
|
||||
|
|
|
@ -191,7 +191,7 @@ SRC_STD_C_OSX= std\c\osx\socket.d
|
|||
|
||||
SRC_STD_C_FREEBSD= std\c\freebsd\socket.d
|
||||
|
||||
SRC_STD_INTERNAL= std\internal\processinit.d std\internal\uni.d std\internal\uni_tab.d
|
||||
SRC_STD_INTERNAL= std\internal\processinit.d std\internal\uni.d std\internal\uni_tab.d std\internal\unicode_tables.d
|
||||
|
||||
SRC_STD_INTERNAL_DIGEST= std\internal\digest\sha_SSSE3.d
|
||||
|
||||
|
|
|
@ -210,7 +210,7 @@ SRC_STD_C_OSX= std\c\osx\socket.d
|
|||
|
||||
SRC_STD_C_FREEBSD= std\c\freebsd\socket.d
|
||||
|
||||
SRC_STD_INTERNAL= std\internal\processinit.d std\internal\uni.d std\internal\uni_tab.d
|
||||
SRC_STD_INTERNAL= std\internal\processinit.d std\internal\uni.d std\internal\uni_tab.d std\internal\unicode_tables.d
|
||||
|
||||
SRC_STD_INTERNAL_DIGEST= std\internal\digest\sha_SSSE3.d
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue