mirror of
https://github.com/dlang/phobos.git
synced 2025-04-27 13:40:20 +03:00
2954 lines
53 KiB
D
2954 lines
53 KiB
D
/*
|
|
* Written by Walter Bright
|
|
* Digital Mars
|
|
* www.digitalmars.com
|
|
* Placed into Public Domain.
|
|
*/
|
|
|
|
// String handling functions.
|
|
//
|
|
// To copy or not to copy?
|
|
//
|
|
// When a function takes a string as a parameter, and returns a string,
|
|
// is that string the same as the input string, modified in place, or
|
|
// is it a modified copy of the input string? The D array convention is
|
|
// "copy-on-write". This means that if no modifications are done, the
|
|
// original string (or slices of it) can be returned. If any modifications
|
|
// are done, the returned string is a copy.
|
|
//
|
|
// The code is not optimized for speed, that will have to wait
|
|
// until the design is solidified.
|
|
|
|
module std.string;
|
|
|
|
//debug=string; // uncomment to turn on debugging printf's
|
|
|
|
private import std.stdio;
|
|
private import std.c.stdio;
|
|
private import std.c.stdlib;
|
|
private import std.utf;
|
|
private import std.uni;
|
|
private import std.array;
|
|
private import std.format;
|
|
private import std.ctype;
|
|
|
|
extern (C)
|
|
{
|
|
// Functions from the C library.
|
|
int strlen(char *);
|
|
int strcmp(char *, char *);
|
|
char* strcat(char *, char *);
|
|
int memcmp(void *, void *, uint);
|
|
int memicmp(char *, char *, uint);
|
|
char *strcpy(char *, char *);
|
|
char *strstr(char *, char *);
|
|
char *strchr(char *, char);
|
|
char *strrchr(char *, char);
|
|
char *memchr(char *, char, uint);
|
|
void *memcpy(void *, void *, uint);
|
|
void *memmove(void *, void *, uint);
|
|
void *memset(void *, uint, uint);
|
|
char* strerror(int);
|
|
|
|
int wcslen(wchar *);
|
|
int wcscmp(wchar *, wchar *);
|
|
}
|
|
|
|
/************** Exceptions ****************/
|
|
|
|
class StringException : Exception
|
|
{
|
|
this(char[] msg)
|
|
{
|
|
super(msg);
|
|
}
|
|
}
|
|
|
|
/************** Constants ****************/
|
|
|
|
const char[16] hexdigits = "0123456789ABCDEF";
|
|
const char[10] digits = "0123456789";
|
|
const char[8] octdigits = "01234567";
|
|
const char[26] lowercase = "abcdefghijklmnopqrstuvwxyz";
|
|
const char[26] uppercase = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
|
|
const char[52] letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
|
"abcdefghijklmnopqrstuvwxyz";
|
|
const char[6] whitespace = " \t\v\r\n\f";
|
|
|
|
/**********************************
|
|
* Returns !=0 if c is whitespace
|
|
*/
|
|
|
|
int iswhite(dchar c)
|
|
{
|
|
return find(whitespace, c) != -1;
|
|
}
|
|
|
|
/*********************************
|
|
* Convert string to integer / real.
|
|
*/
|
|
|
|
long atoi(char[] s)
|
|
{
|
|
return std.c.stdlib.atoi(toStringz(s));
|
|
}
|
|
|
|
/*************************************
|
|
* Convert string to float
|
|
*/
|
|
|
|
real atof(char[] s)
|
|
{ char* endptr;
|
|
real result;
|
|
|
|
result = strtold(toStringz(s), &endptr);
|
|
return result;
|
|
}
|
|
|
|
/**********************************
|
|
* Compare two strings.
|
|
* Returns:
|
|
* <0 s1 < s2
|
|
* =0 s1 == s2
|
|
* >0 s1 > s2
|
|
*/
|
|
|
|
int cmp(char[] s1, char[] s2)
|
|
{
|
|
uint len = s1.length;
|
|
int result;
|
|
|
|
//printf("cmp('%.*s', '%.*s')\n", s1, s2);
|
|
if (s2.length < len)
|
|
len = s2.length;
|
|
result = memcmp(s1, s2, len);
|
|
if (result == 0)
|
|
result = cast(int)s1.length - cast(int)s2.length;
|
|
return result;
|
|
}
|
|
|
|
/*********************************
|
|
* Same as cmp() but case insensitive.
|
|
*/
|
|
|
|
int icmp(char[] s1, char[] s2)
|
|
{
|
|
uint len = s1.length;
|
|
int result;
|
|
|
|
if (s2.length < len)
|
|
len = s2.length;
|
|
version (Win32)
|
|
{
|
|
result = memicmp(s1, s2, len);
|
|
}
|
|
version (linux)
|
|
{
|
|
for (int i = 0; i < len; i++)
|
|
{
|
|
if (s1[i] != s2[i])
|
|
{
|
|
char c1 = s1[i];
|
|
char c2 = s2[i];
|
|
|
|
if (c1 >= 'A' && c1 <= 'Z')
|
|
c1 += cast(int)'a' - cast(int)'A';
|
|
if (c2 >= 'A' && c2 <= 'Z')
|
|
c2 += cast(int)'a' - cast(int)'A';
|
|
result = cast(int)c1 - cast(int)c2;
|
|
if (result)
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if (result == 0)
|
|
result = cast(int)s1.length - cast(int)s2.length;
|
|
return result;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
int result;
|
|
|
|
debug(string) printf("string.cmp.unittest\n");
|
|
result = cmp("abc", "abc");
|
|
assert(result == 0);
|
|
result = cmp(null, null);
|
|
assert(result == 0);
|
|
result = cmp("", "");
|
|
assert(result == 0);
|
|
result = cmp("abc", "abcd");
|
|
assert(result < 0);
|
|
result = cmp("abcd", "abc");
|
|
assert(result > 0);
|
|
result = cmp("abc", "abd");
|
|
assert(result < 0);
|
|
result = cmp("bbc", "abc");
|
|
assert(result > 0);
|
|
}
|
|
|
|
/*********************************
|
|
* Converts a D array of chars to a C-style 0 terminated string.
|
|
*/
|
|
|
|
deprecated char* toCharz(char[] string)
|
|
{
|
|
return toStringz(string);
|
|
}
|
|
|
|
char* toStringz(char[] string)
|
|
in
|
|
{
|
|
if (string)
|
|
{
|
|
// No embedded 0's
|
|
for (uint i = 0; i < string.length; i++)
|
|
assert(string[i] != 0);
|
|
}
|
|
}
|
|
out (result)
|
|
{
|
|
if (result)
|
|
{ assert(strlen(result) == string.length);
|
|
assert(memcmp(result, string, string.length) == 0);
|
|
}
|
|
}
|
|
body
|
|
{
|
|
char[] copy;
|
|
|
|
if (string.length == 0)
|
|
return "";
|
|
|
|
/+ Unfortunately, this isn't reliable.
|
|
We could make this work if string literals are put
|
|
in read-only memory and we test if string[] is pointing into
|
|
that.
|
|
|
|
/* Peek past end of string[], if it's 0, no conversion necessary.
|
|
* Note that the compiler will put a 0 past the end of static
|
|
* strings, and the storage allocator will put a 0 past the end
|
|
* of newly allocated char[]'s.
|
|
*/
|
|
char* p = &string[0] + string.length;
|
|
if (*p == 0)
|
|
return string;
|
|
+/
|
|
|
|
// Need to make a copy
|
|
copy = new char[string.length + 1];
|
|
copy[0..string.length] = string;
|
|
copy[string.length] = 0;
|
|
return copy;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.toStringz.unittest\n");
|
|
|
|
char* p = toStringz("foo");
|
|
assert(strlen(p) == 3);
|
|
char foo[] = "abbzxyzzy";
|
|
p = toStringz(foo[3..5]);
|
|
assert(strlen(p) == 2);
|
|
|
|
char[] test = "";
|
|
p = toStringz(test);
|
|
assert(*p == 0);
|
|
}
|
|
|
|
/******************************************
|
|
* Find first occurrance of c in string s.
|
|
* Return index in s where it is found.
|
|
* Return -1 if not found.
|
|
*/
|
|
|
|
int find(char[] s, dchar c)
|
|
{
|
|
char* p;
|
|
|
|
if (c <= 0x7F)
|
|
{ // Plain old ASCII
|
|
p = memchr(s, c, s.length);
|
|
if (p)
|
|
return p - cast(char *)s;
|
|
else
|
|
return -1;
|
|
}
|
|
|
|
// c is a universal character
|
|
foreach (int i, dchar c2; s)
|
|
{
|
|
if (c == c2)
|
|
return i;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.find.unittest\n");
|
|
|
|
int i;
|
|
|
|
i = find(null, cast(dchar)'a');
|
|
assert(i == -1);
|
|
i = find("def", cast(dchar)'a');
|
|
assert(i == -1);
|
|
i = find("abba", cast(dchar)'a');
|
|
assert(i == 0);
|
|
i = find("def", cast(dchar)'f');
|
|
assert(i == 2);
|
|
}
|
|
|
|
|
|
/******************************************
|
|
* Case insensitive version of find().
|
|
*/
|
|
|
|
int ifind(char[] s, dchar c)
|
|
{
|
|
char* p;
|
|
|
|
if (c <= 0x7F)
|
|
{ // Plain old ASCII
|
|
char c1 = std.ctype.tolower(c);
|
|
|
|
foreach (int i, char c2; s)
|
|
{
|
|
c2 = std.ctype.tolower(c2);
|
|
if (c1 == c2)
|
|
return i;
|
|
}
|
|
}
|
|
else
|
|
{ // c is a universal character
|
|
dchar c1 = std.uni.toUniLower(c);
|
|
|
|
foreach (int i, dchar c2; s)
|
|
{
|
|
c2 = std.uni.toUniLower(c2);
|
|
if (c1 == c2)
|
|
return i;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.ifind.unittest\n");
|
|
|
|
int i;
|
|
|
|
i = ifind(null, cast(dchar)'a');
|
|
assert(i == -1);
|
|
i = ifind("def", cast(dchar)'a');
|
|
assert(i == -1);
|
|
i = ifind("Abba", cast(dchar)'a');
|
|
assert(i == 0);
|
|
i = ifind("def", cast(dchar)'F');
|
|
assert(i == 2);
|
|
|
|
char[] sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
|
|
|
|
i = ifind("def", cast(char)'f');
|
|
assert(i == 2);
|
|
|
|
i = ifind(sPlts, cast(char)'P');
|
|
assert(i == 23);
|
|
i = ifind(sPlts, cast(char)'R');
|
|
assert(i == 2);
|
|
}
|
|
|
|
|
|
/******************************************
|
|
* Find last occurrance of c in string s.
|
|
* Return index in s where it is found.
|
|
* Return -1 if not found.
|
|
*/
|
|
|
|
int rfind(char[] s, dchar c)
|
|
{
|
|
int i;
|
|
|
|
if (c <= 0x7F)
|
|
{ // Plain old ASCII
|
|
for (i = s.length; i-- > 0;)
|
|
{
|
|
if (s[i] == c)
|
|
break;
|
|
}
|
|
return i;
|
|
}
|
|
|
|
// c is a universal character
|
|
char[4] buf;
|
|
char[] t;
|
|
t = std.utf.toUTF8(buf, c);
|
|
return rfind(s, t);
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.rfind.unittest\n");
|
|
|
|
int i;
|
|
|
|
i = rfind(null, cast(dchar)'a');
|
|
assert(i == -1);
|
|
i = rfind("def", cast(dchar)'a');
|
|
assert(i == -1);
|
|
i = rfind("abba", cast(dchar)'a');
|
|
assert(i == 3);
|
|
i = rfind("def", cast(dchar)'f');
|
|
assert(i == 2);
|
|
}
|
|
|
|
/******************************************
|
|
* Case insensitive version of rfind().
|
|
*/
|
|
|
|
int irfind(char[] s, dchar c)
|
|
{
|
|
size_t i;
|
|
|
|
if (c <= 0x7F)
|
|
{ // Plain old ASCII
|
|
char c1 = std.ctype.tolower(c);
|
|
|
|
for (i = s.length; i-- > 0;)
|
|
{ char c2 = s[i];
|
|
|
|
c2 = std.ctype.tolower(c2);
|
|
if (c1 == c2)
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{ // c is a universal character
|
|
dchar c1 = std.uni.toUniLower(c);
|
|
|
|
for (i = s.length; i-- > 0;)
|
|
{ char cx = s[i];
|
|
|
|
if (cx <= 0x7F)
|
|
continue; // skip, since c is not ASCII
|
|
if ((cx & 0xC0) == 0x80)
|
|
continue; // skip non-starting UTF-8 chars
|
|
|
|
size_t j = i;
|
|
dchar c2 = std.utf.decode(s, j);
|
|
c2 = std.uni.toUniLower(c2);
|
|
if (c1 == c2)
|
|
break;
|
|
}
|
|
}
|
|
return i;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.irfind.unittest\n");
|
|
|
|
int i;
|
|
|
|
i = irfind(null, cast(dchar)'a');
|
|
assert(i == -1);
|
|
i = irfind("def", cast(dchar)'a');
|
|
assert(i == -1);
|
|
i = irfind("AbbA", cast(dchar)'a');
|
|
assert(i == 3);
|
|
i = irfind("def", cast(dchar)'F');
|
|
assert(i == 2);
|
|
|
|
char[] sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
|
|
|
|
i = irfind("def", cast(char)'f');
|
|
assert(i == 2);
|
|
|
|
i = irfind(sPlts, cast(char)'M');
|
|
assert(i == 34);
|
|
i = irfind(sPlts, cast(char)'S');
|
|
assert(i == 40);
|
|
}
|
|
|
|
|
|
/*************************************
|
|
* Find first occurrance of sub[] in string s[].
|
|
* Return index in s[] where it is found.
|
|
* Return -1 if not found.
|
|
*/
|
|
|
|
int find(char[] s, char[] sub)
|
|
out (result)
|
|
{
|
|
if (result == -1)
|
|
{
|
|
}
|
|
else
|
|
{
|
|
assert(0 <= result && result < s.length - sub.length + 1);
|
|
assert(memcmp(&s[result], sub, sub.length) == 0);
|
|
}
|
|
}
|
|
body
|
|
{
|
|
int sublength = sub.length;
|
|
|
|
if (sublength == 0)
|
|
return 0;
|
|
|
|
char c = sub[0];
|
|
if (sublength == 1)
|
|
{
|
|
char *p = memchr(s, c, s.length);
|
|
if (p)
|
|
return p - &s[0];
|
|
}
|
|
else
|
|
{
|
|
int imax = s.length - sublength + 1;
|
|
|
|
// Remainder of sub[]
|
|
char *q = &sub[1];
|
|
sublength--;
|
|
|
|
for (int i = 0; i < imax; i++)
|
|
{
|
|
char *p = memchr(&s[i], c, imax - i);
|
|
if (!p)
|
|
break;
|
|
i = p - &s[0];
|
|
if (memcmp(p + 1, q, sublength) == 0)
|
|
return i;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.find.unittest\n");
|
|
|
|
int i;
|
|
|
|
i = find(null, "a");
|
|
assert(i == -1);
|
|
i = find("def", "a");
|
|
assert(i == -1);
|
|
i = find("abba", "a");
|
|
assert(i == 0);
|
|
i = find("def", "f");
|
|
assert(i == 2);
|
|
i = find("dfefffg", "fff");
|
|
assert(i == 3);
|
|
i = find("dfeffgfff", "fff");
|
|
assert(i == 6);
|
|
}
|
|
|
|
/*************************************
|
|
* Case insensitive version of find().
|
|
*/
|
|
|
|
int ifind(char[] s, char[] sub)
|
|
out (result)
|
|
{
|
|
if (result == -1)
|
|
{
|
|
}
|
|
else
|
|
{
|
|
assert(0 <= result && result < s.length - sub.length + 1);
|
|
assert(icmp(s[result .. result + sub.length], sub) == 0);
|
|
}
|
|
}
|
|
body
|
|
{
|
|
int sublength = sub.length;
|
|
int i;
|
|
|
|
if (sublength == 0)
|
|
return 0;
|
|
|
|
if (s.length < sublength)
|
|
return -1;
|
|
|
|
char c = sub[0];
|
|
if (sublength == 1)
|
|
{
|
|
i = ifind(s, c);
|
|
}
|
|
else if (c <= 0x7F)
|
|
{
|
|
int imax = s.length - sublength + 1;
|
|
|
|
// Remainder of sub[]
|
|
char[] subn = sub[1 .. sublength];
|
|
|
|
for (i = 0; i < imax; i++)
|
|
{
|
|
int j = ifind(s[i .. imax], c);
|
|
if (j == -1)
|
|
return -1;
|
|
i += j;
|
|
if (icmp(s[i + 1 .. i + sublength], subn) == 0)
|
|
return i;
|
|
}
|
|
i = -1;
|
|
}
|
|
else
|
|
{
|
|
int imax = s.length - sublength;
|
|
|
|
for (i = 0; i < imax; i++)
|
|
{
|
|
if (icmp(s[i .. i + sublength], sub) == 0)
|
|
return i;
|
|
}
|
|
i = -1;
|
|
}
|
|
return i;
|
|
}
|
|
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.ifind.unittest\n");
|
|
|
|
int i;
|
|
|
|
i = ifind(null, "a");
|
|
assert(i == -1);
|
|
i = ifind("def", "a");
|
|
assert(i == -1);
|
|
i = ifind("abba", "a");
|
|
assert(i == 0);
|
|
i = ifind("def", "f");
|
|
assert(i == 2);
|
|
i = ifind("dfefffg", "fff");
|
|
assert(i == 3);
|
|
i = ifind("dfeffgfff", "fff");
|
|
assert(i == 6);
|
|
|
|
char[] sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
|
|
char[] sMars = "Who\'s \'My Favorite Maritian?\'";
|
|
|
|
i = ifind(sMars, "MY fAVe");
|
|
assert(i == -1);
|
|
i = ifind(sMars, "mY fAVOriTe");
|
|
assert(i == 7);
|
|
i = ifind(sPlts, "mArS:");
|
|
assert(i == 0);
|
|
i = ifind(sPlts, "rOcK");
|
|
assert(i == 17);
|
|
i = ifind(sPlts, "Un.");
|
|
assert(i == 41);
|
|
i = ifind(sPlts, sPlts);
|
|
assert(i == 0);
|
|
|
|
// Thanks to Carlos Santander B. and zwang
|
|
i = ifind("sus mejores cortesanos. Se embarcaron en el puerto de Dubai y",
|
|
"page-break-before");
|
|
assert(i == -1);
|
|
}
|
|
|
|
/*************************************
|
|
* Find last occurrance of sub in string s.
|
|
* Return index in s where it is found.
|
|
* Return -1 if not found.
|
|
*/
|
|
|
|
int rfind(char[] s, char[] sub)
|
|
out (result)
|
|
{
|
|
if (result == -1)
|
|
{
|
|
}
|
|
else
|
|
{
|
|
assert(0 <= result && result < s.length - sub.length + 1);
|
|
assert(memcmp(&s[0] + result, sub, sub.length) == 0);
|
|
}
|
|
}
|
|
body
|
|
{
|
|
char c;
|
|
|
|
if (sub.length == 0)
|
|
return s.length;
|
|
c = sub[0];
|
|
if (sub.length == 1)
|
|
return rfind(s, c);
|
|
for (int i = s.length - sub.length; i >= 0; i--)
|
|
{
|
|
if (s[i] == c)
|
|
{
|
|
if (memcmp(&s[i + 1], &sub[1], sub.length - 1) == 0)
|
|
return i;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
int i;
|
|
|
|
debug(string) printf("string.rfind.unittest\n");
|
|
i = rfind("abcdefcdef", "c");
|
|
assert(i == 6);
|
|
i = rfind("abcdefcdef", "cd");
|
|
assert(i == 6);
|
|
i = rfind("abcdefcdef", "x");
|
|
assert(i == -1);
|
|
i = rfind("abcdefcdef", "xy");
|
|
assert(i == -1);
|
|
i = rfind("abcdefcdef", "");
|
|
assert(i == 10);
|
|
}
|
|
|
|
|
|
/*************************************
|
|
* Case insensitive version of rfind().
|
|
*/
|
|
|
|
int irfind(char[] s, char[] sub)
|
|
out (result)
|
|
{
|
|
if (result == -1)
|
|
{
|
|
}
|
|
else
|
|
{
|
|
assert(0 <= result && result < s.length - sub.length + 1);
|
|
assert(icmp(s[result .. result + sub.length], sub) == 0);
|
|
}
|
|
}
|
|
body
|
|
{
|
|
dchar c;
|
|
|
|
if (sub.length == 0)
|
|
return s.length;
|
|
c = sub[0];
|
|
if (sub.length == 1)
|
|
return irfind(s, c);
|
|
if (c <= 0x7F)
|
|
{
|
|
c = std.ctype.tolower(c);
|
|
for (int i = s.length - sub.length; i >= 0; i--)
|
|
{
|
|
if (std.ctype.tolower(s[i]) == c)
|
|
{
|
|
if (icmp(s[i + 1 .. i + sub.length], sub[1 .. sub.length]) == 0)
|
|
return i;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (int i = s.length - sub.length; i >= 0; i--)
|
|
{
|
|
if (icmp(s[i .. i + sub.length], sub) == 0)
|
|
return i;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
int i;
|
|
|
|
debug(string) printf("string.irfind.unittest\n");
|
|
i = irfind("abcdefCdef", "c");
|
|
assert(i == 6);
|
|
i = irfind("abcdefCdef", "cD");
|
|
assert(i == 6);
|
|
i = irfind("abcdefcdef", "x");
|
|
assert(i == -1);
|
|
i = irfind("abcdefcdef", "xy");
|
|
assert(i == -1);
|
|
i = irfind("abcdefcdef", "");
|
|
assert(i == 10);
|
|
|
|
char[] sPlts = "Mars: the fourth Rock (Planet) from the Sun.";
|
|
char[] sMars = "Who\'s \'My Favorite Maritian?\'";
|
|
|
|
i = irfind("abcdefcdef", "c");
|
|
assert(i == 6);
|
|
i = irfind("abcdefcdef", "cd");
|
|
assert(i == 6);
|
|
i = irfind( "abcdefcdef", "def" );
|
|
assert(i == 7);
|
|
|
|
i = irfind(sMars, "RiTE maR");
|
|
assert(i == 14);
|
|
i = irfind(sPlts, "FOuRTh");
|
|
assert(i == 10);
|
|
i = irfind(sMars, "whO\'s \'MY");
|
|
assert(i == 0);
|
|
i = irfind(sMars, sMars);
|
|
assert(i == 0);
|
|
}
|
|
|
|
|
|
/************************************
|
|
* Convert string to lower case.
|
|
*/
|
|
|
|
char[] tolower(char[] s)
|
|
{
|
|
int changed;
|
|
int i;
|
|
char[] r = s;
|
|
|
|
changed = 0;
|
|
for (i = 0; i < s.length; i++)
|
|
{
|
|
char c = s[i];
|
|
if ('A' <= c && c <= 'Z')
|
|
{
|
|
if (!changed)
|
|
{ r = s.dup;
|
|
changed = 1;
|
|
}
|
|
r[i] = c + (cast(char)'a' - 'A');
|
|
}
|
|
else if (c >= 0x7F)
|
|
{
|
|
foreach (size_t j, dchar dc; s[i .. length])
|
|
{
|
|
if (!changed)
|
|
{
|
|
if (!std.uni.isUniUpper(dc))
|
|
continue;
|
|
|
|
r = s[0 .. i + j].dup;
|
|
changed = 1;
|
|
}
|
|
dc = std.uni.toUniLower(dc);
|
|
std.utf.encode(r, dc);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
return r;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.tolower.unittest\n");
|
|
|
|
char[] s1 = "FoL";
|
|
char[] s2;
|
|
|
|
s2 = tolower(s1);
|
|
assert(cmp(s2, "fol") == 0);
|
|
assert(s2 != s1);
|
|
}
|
|
|
|
/************************************
|
|
* Convert string to upper case.
|
|
*/
|
|
|
|
char[] toupper(char[] s)
|
|
{
|
|
int changed;
|
|
int i;
|
|
char[] r = s;
|
|
|
|
changed = 0;
|
|
for (i = 0; i < s.length; i++)
|
|
{
|
|
char c = s[i];
|
|
if ('a' <= c && c <= 'z')
|
|
{
|
|
if (!changed)
|
|
{ r = s.dup;
|
|
changed = 1;
|
|
}
|
|
r[i] = c - (cast(char)'a' - 'A');
|
|
}
|
|
else if (c >= 0x7F)
|
|
{
|
|
foreach (size_t j, dchar dc; s[i .. length])
|
|
{
|
|
if (!changed)
|
|
{
|
|
if (!std.uni.isUniLower(dc))
|
|
continue;
|
|
|
|
r = s[0 .. i + j].dup;
|
|
changed = 1;
|
|
}
|
|
dc = std.uni.toUniUpper(dc);
|
|
std.utf.encode(r, dc);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
return r;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.toupper.unittest\n");
|
|
|
|
char[] s1 = "FoL";
|
|
char[] s2;
|
|
|
|
s2 = toupper(s1);
|
|
assert(cmp(s2, "FOL") == 0);
|
|
assert(s2 !== s1);
|
|
}
|
|
|
|
|
|
/********************************************
|
|
* Capitalize first character of string, convert rest of string
|
|
* to lower case.
|
|
*/
|
|
|
|
char[] capitalize(char[] s)
|
|
{
|
|
int changed;
|
|
int i;
|
|
char[] r = s;
|
|
|
|
changed = 0;
|
|
|
|
foreach (size_t i, dchar c; s)
|
|
{ dchar c2;
|
|
|
|
if (i == 0)
|
|
{
|
|
c2 = std.uni.toUniUpper(c);
|
|
if (c != c2)
|
|
{
|
|
changed = 1;
|
|
r = null;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
c2 = std.uni.toUniLower(c);
|
|
if (c != c2)
|
|
{
|
|
if (!changed)
|
|
{ changed = 1;
|
|
r = s[0 .. i].dup;
|
|
}
|
|
}
|
|
}
|
|
if (changed)
|
|
std.utf.encode(r, c2);
|
|
}
|
|
return r;
|
|
}
|
|
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.toupper.capitalize\n");
|
|
|
|
char[] s1 = "FoL";
|
|
char[] s2;
|
|
|
|
s2 = capitalize(s1);
|
|
assert(cmp(s2, "Fol") == 0);
|
|
assert(s2 !== s1);
|
|
|
|
s2 = capitalize(s1[0 .. 2]);
|
|
assert(cmp(s2, "Fo") == 0);
|
|
assert(s2.ptr == s1.ptr);
|
|
|
|
s1 = "fOl";
|
|
s2 = capitalize(s1);
|
|
assert(cmp(s2, "Fol") == 0);
|
|
assert(s2 !== s1);
|
|
}
|
|
|
|
|
|
/********************************************
|
|
* Capitalize all words in string.
|
|
* Remove leading and trailing whitespace.
|
|
* Replace all sequences of whitespace with a single space.
|
|
*/
|
|
|
|
char[] capwords(char[] s)
|
|
{
|
|
char[] r;
|
|
int inword;
|
|
int i;
|
|
int istart;
|
|
|
|
istart = 0;
|
|
inword = 0;
|
|
for (i = 0; i < s.length; i++)
|
|
{
|
|
switch (s[i])
|
|
{
|
|
case ' ':
|
|
case '\t':
|
|
case '\f':
|
|
case '\r':
|
|
case '\n':
|
|
case '\v':
|
|
if (inword)
|
|
{
|
|
r ~= capitalize(s[istart .. i]);
|
|
inword = 0;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
if (!inword)
|
|
{
|
|
if (r.length)
|
|
r ~= ' ';
|
|
istart = i;
|
|
inword = 1;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
if (inword)
|
|
{
|
|
r ~= capitalize(s[istart .. i]);
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.capwords.unittest\n");
|
|
|
|
char[] s1 = "\tfoo abc(aD)* \t (q PTT ";
|
|
char[] s2;
|
|
|
|
s2 = capwords(s1);
|
|
//writefln("s2 = '%s'", s2);
|
|
assert(cmp(s2, "Foo Abc(ad)* (q Ptt") == 0);
|
|
}
|
|
|
|
/********************************************
|
|
* Return a string that consists of s[] repeated n times.
|
|
*/
|
|
|
|
char[] repeat(char[] s, size_t n)
|
|
{
|
|
if (n == 0)
|
|
return null;
|
|
if (n == 1)
|
|
return s;
|
|
char[] r = new char[n * s.length];
|
|
if (s.length == 1)
|
|
r[] = s[0];
|
|
else
|
|
{ size_t len = s.length;
|
|
|
|
for (size_t i = 0; i < n * len; i += len)
|
|
{
|
|
r[i .. i + len] = s[];
|
|
}
|
|
}
|
|
return r;
|
|
}
|
|
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.repeat.unittest\n");
|
|
|
|
char[] s;
|
|
|
|
s = repeat("1234", 0);
|
|
assert(s is null);
|
|
s = repeat("1234", 1);
|
|
assert(cmp(s, "1234") == 0);
|
|
s = repeat("1234", 2);
|
|
assert(cmp(s, "12341234") == 0);
|
|
s = repeat("1", 4);
|
|
assert(cmp(s, "1111") == 0);
|
|
s = repeat(null, 4);
|
|
assert(s is null);
|
|
}
|
|
|
|
|
|
/********************************************
|
|
* Concatenate all the strings together into one
|
|
* string; use sep[] as the separator.
|
|
*/
|
|
|
|
char[] join(char[][] words, char[] sep)
|
|
{
|
|
uint len;
|
|
uint seplen;
|
|
uint i;
|
|
uint j;
|
|
char[] result;
|
|
|
|
if (words.length)
|
|
{
|
|
len = 0;
|
|
for (i = 0; i < words.length; i++)
|
|
len += words[i].length;
|
|
|
|
seplen = sep.length;
|
|
len += (words.length - 1) * seplen;
|
|
|
|
result = new char[len];
|
|
|
|
i = 0;
|
|
while (true)
|
|
{
|
|
uint wlen = words[i].length;
|
|
|
|
result[j .. j + wlen] = words[i];
|
|
j += wlen;
|
|
i++;
|
|
if (i >= words.length)
|
|
break;
|
|
result[j .. j + seplen] = sep;
|
|
j += seplen;
|
|
}
|
|
assert(j == len);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.join.unittest\n");
|
|
|
|
char[] word1 = "peter";
|
|
char[] word2 = "paul";
|
|
char[] word3 = "jerry";
|
|
char[][3] words;
|
|
char[] r;
|
|
int i;
|
|
|
|
words[0] = word1;
|
|
words[1] = word2;
|
|
words[2] = word3;
|
|
r = join(words, ",");
|
|
i = cmp(r, "peter,paul,jerry");
|
|
assert(i == 0);
|
|
}
|
|
|
|
|
|
/**************************************
|
|
* Split s[] into an array of words,
|
|
* using whitespace as the delimiter.
|
|
*/
|
|
|
|
char[][] split(char[] s)
|
|
{
|
|
uint i;
|
|
uint istart;
|
|
int inword;
|
|
char[][] words;
|
|
|
|
inword = 0;
|
|
for (i = 0; i < s.length; i++)
|
|
{
|
|
switch (s[i])
|
|
{
|
|
case ' ':
|
|
case '\t':
|
|
case '\f':
|
|
case '\r':
|
|
case '\n':
|
|
case '\v':
|
|
if (inword)
|
|
{
|
|
words ~= s[istart .. i];
|
|
inword = 0;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
if (!inword)
|
|
{ istart = i;
|
|
inword = 1;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
if (inword)
|
|
words ~= s[istart .. i];
|
|
return words;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.join.split1\n");
|
|
|
|
char[] s = " peter paul\tjerry ";
|
|
char[][] words;
|
|
int i;
|
|
|
|
words = split(s);
|
|
assert(words.length == 3);
|
|
i = cmp(words[0], "peter");
|
|
assert(i == 0);
|
|
i = cmp(words[1], "paul");
|
|
assert(i == 0);
|
|
i = cmp(words[2], "jerry");
|
|
assert(i == 0);
|
|
}
|
|
|
|
|
|
/**************************************
|
|
* Split s[] into an array of words,
|
|
* using delim[] as the delimiter.
|
|
*/
|
|
|
|
char[][] split(char[] s, char[] delim)
|
|
in
|
|
{
|
|
assert(delim.length > 0);
|
|
}
|
|
body
|
|
{
|
|
uint i;
|
|
uint j;
|
|
char[][] words;
|
|
|
|
i = 0;
|
|
if (s.length)
|
|
{
|
|
if (delim.length == 1)
|
|
{ char c = delim[0];
|
|
uint nwords = 0;
|
|
char *p = &s[0];
|
|
char *pend = p + s.length;
|
|
|
|
while (true)
|
|
{
|
|
nwords++;
|
|
p = memchr(p, c, pend - p);
|
|
if (!p)
|
|
break;
|
|
p++;
|
|
if (p == pend)
|
|
{ nwords++;
|
|
break;
|
|
}
|
|
}
|
|
words.length = nwords;
|
|
|
|
int wordi = 0;
|
|
i = 0;
|
|
while (true)
|
|
{
|
|
p = memchr(&s[i], c, s.length - i);
|
|
if (!p)
|
|
{
|
|
words[wordi] = s[i .. s.length];
|
|
break;
|
|
}
|
|
j = p - &s[0];
|
|
words[wordi] = s[i .. j];
|
|
wordi++;
|
|
i = j + 1;
|
|
if (i == s.length)
|
|
{
|
|
words[wordi] = "";
|
|
break;
|
|
}
|
|
}
|
|
assert(wordi + 1 == nwords);
|
|
}
|
|
else
|
|
{ uint nwords = 0;
|
|
|
|
while (true)
|
|
{
|
|
nwords++;
|
|
j = find(s[i .. s.length], delim);
|
|
if (j == -1)
|
|
break;
|
|
i += j + delim.length;
|
|
if (i == s.length)
|
|
{ nwords++;
|
|
break;
|
|
}
|
|
assert(i < s.length);
|
|
}
|
|
words.length = nwords;
|
|
|
|
int wordi = 0;
|
|
i = 0;
|
|
while (true)
|
|
{
|
|
j = find(s[i .. s.length], delim);
|
|
if (j == -1)
|
|
{
|
|
words[wordi] = s[i .. s.length];
|
|
break;
|
|
}
|
|
words[wordi] = s[i .. i + j];
|
|
wordi++;
|
|
i += j + delim.length;
|
|
if (i == s.length)
|
|
{
|
|
words[wordi] = "";
|
|
break;
|
|
}
|
|
assert(i < s.length);
|
|
}
|
|
assert(wordi + 1 == nwords);
|
|
}
|
|
}
|
|
return words;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.join.split2\n");
|
|
|
|
char[] s = ",peter,paul,jerry,";
|
|
char[][] words;
|
|
int i;
|
|
|
|
words = split(s, ",");
|
|
assert(words.length == 5);
|
|
i = cmp(words[0], "");
|
|
assert(i == 0);
|
|
i = cmp(words[1], "peter");
|
|
assert(i == 0);
|
|
i = cmp(words[2], "paul");
|
|
assert(i == 0);
|
|
i = cmp(words[3], "jerry");
|
|
assert(i == 0);
|
|
i = cmp(words[4], "");
|
|
assert(i == 0);
|
|
|
|
s = s[0 .. s.length - 1]; // lop off trailing ','
|
|
words = split(s, ",");
|
|
assert(words.length == 4);
|
|
i = cmp(words[3], "jerry");
|
|
assert(i == 0);
|
|
|
|
s = s[1 .. s.length]; // lop off leading ','
|
|
words = split(s, ",");
|
|
assert(words.length == 3);
|
|
i = cmp(words[0], "peter");
|
|
assert(i == 0);
|
|
|
|
char[] s2 = ",,peter,,paul,,jerry,,";
|
|
|
|
words = split(s2, ",,");
|
|
//printf("words.length = %d\n", words.length);
|
|
assert(words.length == 5);
|
|
i = cmp(words[0], "");
|
|
assert(i == 0);
|
|
i = cmp(words[1], "peter");
|
|
assert(i == 0);
|
|
i = cmp(words[2], "paul");
|
|
assert(i == 0);
|
|
i = cmp(words[3], "jerry");
|
|
assert(i == 0);
|
|
i = cmp(words[4], "");
|
|
assert(i == 0);
|
|
|
|
s2 = s2[0 .. s2.length - 2]; // lop off trailing ',,'
|
|
words = split(s2, ",,");
|
|
assert(words.length == 4);
|
|
i = cmp(words[3], "jerry");
|
|
assert(i == 0);
|
|
|
|
s2 = s2[2 .. s2.length]; // lop off leading ',,'
|
|
words = split(s2, ",,");
|
|
assert(words.length == 3);
|
|
i = cmp(words[0], "peter");
|
|
assert(i == 0);
|
|
}
|
|
|
|
|
|
/**************************************
|
|
* Split s[] into an array of lines,
|
|
* using CR, LF, or CR-LF as the delimiter.
|
|
* The delimiter is not included in the line.
|
|
*/
|
|
|
|
char[][] splitlines(char[] s)
|
|
{
|
|
uint i;
|
|
uint istart;
|
|
uint nlines;
|
|
char[][] lines;
|
|
|
|
nlines = 0;
|
|
for (i = 0; i < s.length; i++)
|
|
{ char c;
|
|
|
|
c = s[i];
|
|
if (c == '\r' || c == '\n')
|
|
{
|
|
nlines++;
|
|
istart = i + 1;
|
|
if (c == '\r' && i + 1 < s.length && s[i + 1] == '\n')
|
|
{
|
|
i++;
|
|
istart++;
|
|
}
|
|
}
|
|
}
|
|
if (istart != i)
|
|
nlines++;
|
|
|
|
lines = new char[][nlines];
|
|
nlines = 0;
|
|
istart = 0;
|
|
for (i = 0; i < s.length; i++)
|
|
{ char c;
|
|
|
|
c = s[i];
|
|
if (c == '\r' || c == '\n')
|
|
{
|
|
lines[nlines] = s[istart .. i];
|
|
nlines++;
|
|
istart = i + 1;
|
|
if (c == '\r' && i + 1 < s.length && s[i + 1] == '\n')
|
|
{
|
|
i++;
|
|
istart++;
|
|
}
|
|
}
|
|
}
|
|
if (istart != i)
|
|
{ lines[nlines] = s[istart .. i];
|
|
nlines++;
|
|
}
|
|
|
|
assert(nlines == lines.length);
|
|
return lines;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.join.splitlines\n");
|
|
|
|
char[] s = "\rpeter\n\rpaul\r\njerry\n";
|
|
char[][] lines;
|
|
int i;
|
|
|
|
lines = splitlines(s);
|
|
//printf("lines.length = %d\n", lines.length);
|
|
assert(lines.length == 5);
|
|
//printf("lines[0] = %llx, '%.*s'\n", lines[0], lines[0]);
|
|
assert(lines[0].length == 0);
|
|
i = cmp(lines[1], "peter");
|
|
assert(i == 0);
|
|
assert(lines[2].length == 0);
|
|
i = cmp(lines[3], "paul");
|
|
assert(i == 0);
|
|
i = cmp(lines[4], "jerry");
|
|
assert(i == 0);
|
|
|
|
s = s[0 .. s.length - 1]; // lop off trailing \n
|
|
lines = splitlines(s);
|
|
//printf("lines.length = %d\n", lines.length);
|
|
assert(lines.length == 5);
|
|
i = cmp(lines[4], "jerry");
|
|
assert(i == 0);
|
|
}
|
|
|
|
|
|
/*****************************************
|
|
* Strips leading or trailing whitespace, or both.
|
|
*/
|
|
|
|
char[] stripl(char[] s)
|
|
{
|
|
uint i;
|
|
|
|
for (i = 0; i < s.length; i++)
|
|
{
|
|
if (!std.ctype.isspace(s[i]))
|
|
break;
|
|
}
|
|
return s[i .. s.length];
|
|
}
|
|
|
|
char[] stripr(char[] s)
|
|
{
|
|
uint i;
|
|
|
|
for (i = s.length; i > 0; i--)
|
|
{
|
|
if (!std.ctype.isspace(s[i - 1]))
|
|
break;
|
|
}
|
|
return s[0 .. i];
|
|
}
|
|
|
|
char[] strip(char[] s)
|
|
{
|
|
return stripr(stripl(s));
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.strip.unittest\n");
|
|
char[] s;
|
|
int i;
|
|
|
|
s = strip(" foo\t ");
|
|
i = cmp(s, "foo");
|
|
assert(i == 0);
|
|
}
|
|
|
|
/*******************************************
|
|
* Returns s[] sans trailing delimiter[], if any.
|
|
* If delimiter[] is null, removes trailing CR, LF, or CRLF, if any.
|
|
*/
|
|
|
|
char[] chomp(char[] s, char[] delimiter = null)
|
|
{
|
|
if (delimiter is null)
|
|
{ size_t len = s.length;
|
|
|
|
if (len)
|
|
{ char c = s[len - 1];
|
|
|
|
if (c == '\r') // if ends in CR
|
|
len--;
|
|
else if (c == '\n') // if ends in LF
|
|
{
|
|
len--;
|
|
if (len && s[len - 1] == '\r')
|
|
len--; // remove CR-LF
|
|
}
|
|
}
|
|
return s[0 .. len];
|
|
}
|
|
else if (s.length >= delimiter.length)
|
|
{
|
|
if (s[length - delimiter.length .. length] == delimiter)
|
|
return s[0 .. length - delimiter.length];
|
|
}
|
|
return s;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.chomp.unittest\n");
|
|
char[] s;
|
|
|
|
s = chomp(null);
|
|
assert(s is null);
|
|
s = chomp("hello");
|
|
assert(s == "hello");
|
|
s = chomp("hello\n");
|
|
assert(s == "hello");
|
|
s = chomp("hello\r");
|
|
assert(s == "hello");
|
|
s = chomp("hello\r\n");
|
|
assert(s == "hello");
|
|
s = chomp("hello\n\r");
|
|
assert(s == "hello\n");
|
|
s = chomp("hello\n\n");
|
|
assert(s == "hello\n");
|
|
s = chomp("hello\r\r");
|
|
assert(s == "hello\r");
|
|
s = chomp("hello\nxxx\n");
|
|
assert(s == "hello\nxxx");
|
|
|
|
s = chomp(null, null);
|
|
assert(s is null);
|
|
s = chomp("hello", "o");
|
|
assert(s == "hell");
|
|
s = chomp("hello", "p");
|
|
assert(s == "hello");
|
|
s = chomp("hello", null);
|
|
assert(s == "hello");
|
|
s = chomp("hello", "llo");
|
|
assert(s == "he");
|
|
}
|
|
|
|
|
|
/***********************************************
|
|
* Returns s[] sans trailing character, if there is one.
|
|
* If last two characters are CR-LF, then both are removed.
|
|
*/
|
|
|
|
char[] chop(char[] s)
|
|
{ size_t len = s.length;
|
|
|
|
if (len)
|
|
{
|
|
if (len >= 2 && s[len - 1] == '\n' && s[len - 2] == '\r')
|
|
return s[0 .. len - 2];
|
|
|
|
// If we're in a tail of a UTF-8 sequence, back up
|
|
while ((s[len - 1] & 0xC0) == 0x80)
|
|
{
|
|
len--;
|
|
if (len == 0)
|
|
throw new std.utf.UtfError("invalid UTF sequence", 0);
|
|
}
|
|
|
|
return s[0 .. len - 1];
|
|
}
|
|
return s;
|
|
}
|
|
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.chop.unittest\n");
|
|
char[] s;
|
|
|
|
s = chop(null);
|
|
assert(s is null);
|
|
s = chop("hello");
|
|
assert(s == "hell");
|
|
s = chop("hello\r\n");
|
|
assert(s == "hello");
|
|
s = chop("hello\n\r");
|
|
assert(s == "hello\n");
|
|
}
|
|
|
|
|
|
/*******************************************
|
|
* Left justify, right justify, or center string
|
|
* in field width chars wide.
|
|
*/
|
|
|
|
char[] ljustify(char[] s, int width)
|
|
{
|
|
if (s.length >= width)
|
|
return s;
|
|
char[] r = new char[width];
|
|
r[0..s.length] = s;
|
|
r[s.length .. width] = cast(char)' ';
|
|
return r;
|
|
}
|
|
|
|
char[] rjustify(char[] s, int width)
|
|
{
|
|
if (s.length >= width)
|
|
return s;
|
|
char[] r = new char[width];
|
|
r[0 .. width - s.length] = cast(char)' ';
|
|
r[width - s.length .. width] = s;
|
|
return r;
|
|
}
|
|
|
|
char[] center(char[] s, int width)
|
|
{
|
|
if (s.length >= width)
|
|
return s;
|
|
char[] r = new char[width];
|
|
int left = (width - s.length) / 2;
|
|
r[0 .. left] = cast(char)' ';
|
|
r[left .. left + s.length] = s;
|
|
r[left + s.length .. width] = cast(char)' ';
|
|
return r;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.justify.unittest\n");
|
|
|
|
char[] s = "hello";
|
|
char[] r;
|
|
int i;
|
|
|
|
r = ljustify(s, 8);
|
|
i = cmp(r, "hello ");
|
|
assert(i == 0);
|
|
|
|
r = rjustify(s, 8);
|
|
i = cmp(r, " hello");
|
|
assert(i == 0);
|
|
|
|
r = center(s, 8);
|
|
i = cmp(r, " hello ");
|
|
assert(i == 0);
|
|
|
|
r = zfill(s, 8);
|
|
i = cmp(r, "000hello");
|
|
assert(i == 0);
|
|
}
|
|
|
|
|
|
/*****************************************
|
|
* Same as rjustify(), but fill with '0's
|
|
*/
|
|
|
|
char[] zfill(char[] s, int width)
|
|
{
|
|
if (s.length >= width)
|
|
return s;
|
|
char[] r = new char[width];
|
|
r[0 .. width - s.length] = cast(char)'0';
|
|
r[width - s.length .. width] = s;
|
|
return r;
|
|
}
|
|
|
|
/********************************************
|
|
* Replace occurrences of from[] with to[] in s[].
|
|
*/
|
|
|
|
char[] replace(char[] s, char[] from, char[] to)
|
|
{
|
|
char[] p;
|
|
int i;
|
|
int istart;
|
|
|
|
//printf("replace('%.*s','%.*s','%.*s')\n", s, from, to);
|
|
istart = 0;
|
|
while (istart < s.length)
|
|
{
|
|
i = find(s[istart .. s.length], from);
|
|
if (i == -1)
|
|
{
|
|
p ~= s[istart .. s.length];
|
|
break;
|
|
}
|
|
p ~= s[istart .. istart + i];
|
|
p ~= to;
|
|
istart += i + from.length;
|
|
}
|
|
return p;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.replace.unittest\n");
|
|
|
|
char[] s = "This is a foo foo list";
|
|
char[] from = "foo";
|
|
char[] to = "silly";
|
|
char[] r;
|
|
int i;
|
|
|
|
r = replace(s, from, to);
|
|
i = cmp(r, "This is a silly silly list");
|
|
assert(i == 0);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////
|
|
// Return a string that is string[] with slice[] replaced by replacement[].
|
|
|
|
char[] replaceSlice(char[] string, char[] slice, char[] replacement)
|
|
in
|
|
{
|
|
// Verify that slice[] really is a slice of string[]
|
|
int so = cast(char*)slice - cast(char*)string;
|
|
assert(so >= 0);
|
|
//printf("string.length = %d, so = %d, slice.length = %d\n", string.length, so, slice.length);
|
|
assert(string.length >= so + slice.length);
|
|
}
|
|
body
|
|
{
|
|
char[] result;
|
|
int so = cast(char*)slice - cast(char*)string;
|
|
|
|
result.length = string.length - slice.length + replacement.length;
|
|
|
|
result[0 .. so] = string[0 .. so];
|
|
result[so .. so + replacement.length] = replacement;
|
|
result[so + replacement.length .. result.length] = string[so + slice.length .. string.length];
|
|
|
|
return result;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.replaceSlice.unittest\n");
|
|
|
|
char[] string = "hello";
|
|
char[] slice = string[2 .. 4];
|
|
|
|
char[] r = replaceSlice(string, slice, "bar");
|
|
int i;
|
|
i = cmp(r, "hebaro");
|
|
assert(i == 0);
|
|
}
|
|
|
|
/**********************************************
|
|
* Insert sub[] into s[] at location index.
|
|
*/
|
|
|
|
char[] insert(char[] s, int index, char[] sub)
|
|
in
|
|
{
|
|
assert(0 <= index && index <= s.length);
|
|
}
|
|
body
|
|
{
|
|
if (sub.length == 0)
|
|
return s;
|
|
|
|
if (s.length == 0)
|
|
return sub;
|
|
|
|
int newlength = s.length + sub.length;
|
|
char[] result = new char[newlength];
|
|
|
|
result[0 .. index] = s[0 .. index];
|
|
result[index .. index + sub.length] = sub;
|
|
result[index + sub.length .. newlength] = s[index .. s.length];
|
|
return result;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.insert.unittest\n");
|
|
|
|
char[] r;
|
|
int i;
|
|
|
|
r = insert("abcd", 0, "e");
|
|
i = cmp(r, "eabcd");
|
|
assert(i == 0);
|
|
|
|
r = insert("abcd", 4, "e");
|
|
i = cmp(r, "abcde");
|
|
assert(i == 0);
|
|
|
|
r = insert("abcd", 2, "ef");
|
|
i = cmp(r, "abefcd");
|
|
assert(i == 0);
|
|
|
|
r = insert(null, 0, "e");
|
|
i = cmp(r, "e");
|
|
assert(i == 0);
|
|
|
|
r = insert("abcd", 0, null);
|
|
i = cmp(r, "abcd");
|
|
assert(i == 0);
|
|
}
|
|
|
|
/***********************************************
|
|
* Count up all instances of sub[] in s[].
|
|
*/
|
|
|
|
size_t count(char[] s, char[] sub)
|
|
{
|
|
size_t i;
|
|
int j;
|
|
int count = 0;
|
|
|
|
for (i = 0; i < s.length; i += j + sub.length)
|
|
{
|
|
j = find(s[i .. s.length], sub);
|
|
if (j == -1)
|
|
break;
|
|
count++;
|
|
}
|
|
return count;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.count.unittest\n");
|
|
|
|
char[] s = "This is a fofofof list";
|
|
char[] sub = "fof";
|
|
int i;
|
|
|
|
i = count(s, sub);
|
|
assert(i == 2);
|
|
}
|
|
|
|
|
|
/************************************************
|
|
* Replace tabs with the appropriate number of spaces.
|
|
* tabsize is the distance between tab stops.
|
|
*/
|
|
|
|
char[] expandtabs(char[] s, int tabsize)
|
|
{
|
|
char[] r;
|
|
int i;
|
|
int istart;
|
|
int col;
|
|
static char[8] spaces = " ";
|
|
|
|
col = 0;
|
|
for (i = 0; i < s.length; i++)
|
|
{
|
|
char c;
|
|
|
|
c = s[i];
|
|
if (c == '\t')
|
|
{ int tabstop;
|
|
|
|
r ~= s[istart .. i];
|
|
istart = i + 1;
|
|
|
|
tabstop = col + tabsize;
|
|
tabstop -= tabstop % tabsize;
|
|
while (col < tabstop)
|
|
{
|
|
int n = tabstop - col;
|
|
if (n > spaces.length)
|
|
n = spaces.length;
|
|
r ~= spaces[0 .. n];
|
|
col += n;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
col++;
|
|
}
|
|
}
|
|
r ~= s[istart .. i];
|
|
return r;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.expandtabs.unittest\n");
|
|
|
|
char[] s = "This \tis\t a fofof\tof list";
|
|
char[] r;
|
|
int i;
|
|
|
|
r = expandtabs(s, 8);
|
|
i = cmp(r, "This is a fofof of list");
|
|
assert(i == 0);
|
|
}
|
|
|
|
|
|
/************************************
|
|
* Construct translation table for translate().
|
|
* BUG: only works with ASCII
|
|
*/
|
|
|
|
char[] maketrans(char[] from, char[] to)
|
|
in
|
|
{
|
|
assert(from.length == to.length);
|
|
assert(from.length <= 128);
|
|
foreach (char c; from)
|
|
{
|
|
assert(c <= 0x7F);
|
|
}
|
|
foreach (char c; to)
|
|
{
|
|
assert(c <= 0x7F);
|
|
}
|
|
}
|
|
body
|
|
{
|
|
char[] t = new char[256];
|
|
int i;
|
|
|
|
for (i = 0; i < t.length; i++)
|
|
t[i] = cast(char)i;
|
|
|
|
for (i = 0; i < from.length; i++)
|
|
t[from[i]] = to[i];
|
|
|
|
return t;
|
|
}
|
|
|
|
/******************************************
|
|
* Translate characters in s[] using table created by maketrans().
|
|
* Delete chars in delchars[].
|
|
* BUG: only works with ASCII
|
|
*/
|
|
|
|
char[] translate(char[] s, char[] transtab, char[] delchars)
|
|
in
|
|
{
|
|
assert(transtab.length == 256);
|
|
}
|
|
body
|
|
{
|
|
char[] r;
|
|
int count;
|
|
bit[256] deltab;
|
|
|
|
deltab[] = false;
|
|
foreach (char c; delchars)
|
|
{
|
|
deltab[c] = true;
|
|
}
|
|
|
|
count = 0;
|
|
foreach (char c; s)
|
|
{
|
|
if (!deltab[c])
|
|
count++;
|
|
//printf("s[%d] = '%c', count = %d\n", i, s[i], count);
|
|
}
|
|
|
|
r = new char[count];
|
|
count = 0;
|
|
foreach (char c; s)
|
|
{
|
|
if (!deltab[c])
|
|
{
|
|
r[count] = transtab[c];
|
|
count++;
|
|
}
|
|
}
|
|
|
|
return r;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.translate.unittest\n");
|
|
|
|
char[] from = "abcdef";
|
|
char[] to = "ABCDEF";
|
|
char[] s = "The quick dog fox";
|
|
char[] t;
|
|
char[] r;
|
|
int i;
|
|
|
|
t = maketrans(from, to);
|
|
r = translate(s, t, "kg");
|
|
//printf("r = '%.*s'\n", r);
|
|
i = cmp(r, "ThE quiC Do Fox");
|
|
assert(i == 0);
|
|
}
|
|
|
|
/***********************************************
|
|
* Convert to char[].
|
|
*/
|
|
|
|
char[] toString(bit b)
|
|
{
|
|
return b ? "true" : "false";
|
|
}
|
|
|
|
char[] toString(char c)
|
|
{
|
|
char[] result = new char[2];
|
|
result[0] = c;
|
|
result[1] = 0;
|
|
return result[0 .. 1];
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.toString(char).unittest\n");
|
|
|
|
char[] s = "foo";
|
|
char[] s2;
|
|
foreach (char c; s)
|
|
{
|
|
s2 ~= std.string.toString(c);
|
|
}
|
|
//printf("%.*s", s2);
|
|
assert(s2 == "foo");
|
|
}
|
|
|
|
char[] toString(ubyte ub) { return toString(cast(uint) ub); }
|
|
char[] toString(ushort us) { return toString(cast(uint) us); }
|
|
|
|
char[] toString(uint u)
|
|
{ char[uint.sizeof * 3] buffer;
|
|
int ndigits;
|
|
char c;
|
|
char[] result;
|
|
|
|
ndigits = 0;
|
|
if (u < 10)
|
|
// Avoid storage allocation for simple stuff
|
|
result = digits[u .. u + 1];
|
|
else
|
|
{
|
|
while (u)
|
|
{
|
|
c = (u % 10) + '0';
|
|
u /= 10;
|
|
ndigits++;
|
|
buffer[buffer.length - ndigits] = c;
|
|
}
|
|
result = new char[ndigits];
|
|
result[] = buffer[buffer.length - ndigits .. buffer.length];
|
|
}
|
|
return result;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.toString(uint).unittest\n");
|
|
|
|
char[] r;
|
|
int i;
|
|
|
|
r = toString(0u);
|
|
i = cmp(r, "0");
|
|
assert(i == 0);
|
|
|
|
r = toString(9u);
|
|
i = cmp(r, "9");
|
|
assert(i == 0);
|
|
|
|
r = toString(123u);
|
|
i = cmp(r, "123");
|
|
assert(i == 0);
|
|
}
|
|
|
|
char[] toString(ulong u)
|
|
{ char[ulong.sizeof * 3] buffer;
|
|
int ndigits;
|
|
char c;
|
|
char[] result;
|
|
|
|
if (u < 0x1_0000_0000)
|
|
return toString(cast(uint)u);
|
|
ndigits = 0;
|
|
while (u)
|
|
{
|
|
c = (u % 10) + '0';
|
|
u /= 10;
|
|
ndigits++;
|
|
buffer[buffer.length - ndigits] = c;
|
|
}
|
|
result = new char[ndigits];
|
|
result[] = buffer[buffer.length - ndigits .. buffer.length];
|
|
return result;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.toString(ulong).unittest\n");
|
|
|
|
char[] r;
|
|
int i;
|
|
|
|
r = toString(0ul);
|
|
i = cmp(r, "0");
|
|
assert(i == 0);
|
|
|
|
r = toString(9ul);
|
|
i = cmp(r, "9");
|
|
assert(i == 0);
|
|
|
|
r = toString(123ul);
|
|
i = cmp(r, "123");
|
|
assert(i == 0);
|
|
}
|
|
|
|
char[] toString(byte b) { return toString(cast(int) b); }
|
|
char[] toString(short s) { return toString(cast(int) s); }
|
|
|
|
char[] toString(int i)
|
|
{ char[1 + int.sizeof * 3] buffer;
|
|
char c;
|
|
char[] result;
|
|
|
|
if (i >= 0)
|
|
return toString(cast(uint)i);
|
|
|
|
uint u = -i;
|
|
int ndigits = 1;
|
|
while (u)
|
|
{
|
|
c = (u % 10) + '0';
|
|
u /= 10;
|
|
buffer[buffer.length - ndigits] = c;
|
|
ndigits++;
|
|
}
|
|
buffer[buffer.length - ndigits] = '-';
|
|
result = new char[ndigits];
|
|
result[] = buffer[buffer.length - ndigits .. buffer.length];
|
|
return result;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.toString(int).unittest\n");
|
|
|
|
char[] r;
|
|
int i;
|
|
|
|
r = toString(0);
|
|
i = cmp(r, "0");
|
|
assert(i == 0);
|
|
|
|
r = toString(9);
|
|
i = cmp(r, "9");
|
|
assert(i == 0);
|
|
|
|
r = toString(123);
|
|
i = cmp(r, "123");
|
|
assert(i == 0);
|
|
|
|
r = toString(-0);
|
|
i = cmp(r, "0");
|
|
assert(i == 0);
|
|
|
|
r = toString(-9);
|
|
i = cmp(r, "-9");
|
|
assert(i == 0);
|
|
|
|
r = toString(-123);
|
|
i = cmp(r, "-123");
|
|
assert(i == 0);
|
|
}
|
|
|
|
char[] toString(long i)
|
|
{ char[1 + long.sizeof * 3] buffer;
|
|
char c;
|
|
char[] result;
|
|
|
|
if (i >= 0)
|
|
return toString(cast(ulong)i);
|
|
if (cast(int)i == i)
|
|
return toString(cast(int)i);
|
|
|
|
ulong u = -i;
|
|
int ndigits = 1;
|
|
while (u)
|
|
{
|
|
c = (u % 10) + '0';
|
|
u /= 10;
|
|
buffer[buffer.length - ndigits] = c;
|
|
ndigits++;
|
|
}
|
|
buffer[buffer.length - ndigits] = '-';
|
|
result = new char[ndigits];
|
|
result[] = buffer[buffer.length - ndigits .. buffer.length];
|
|
return result;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.toString(long).unittest\n");
|
|
|
|
char[] r;
|
|
int i;
|
|
|
|
r = toString(0l);
|
|
i = cmp(r, "0");
|
|
assert(i == 0);
|
|
|
|
r = toString(9l);
|
|
i = cmp(r, "9");
|
|
assert(i == 0);
|
|
|
|
r = toString(123l);
|
|
i = cmp(r, "123");
|
|
assert(i == 0);
|
|
|
|
r = toString(-0l);
|
|
i = cmp(r, "0");
|
|
assert(i == 0);
|
|
|
|
r = toString(-9l);
|
|
i = cmp(r, "-9");
|
|
assert(i == 0);
|
|
|
|
r = toString(-123l);
|
|
i = cmp(r, "-123");
|
|
assert(i == 0);
|
|
}
|
|
|
|
char[] toString(float f) { return toString(cast(double) f); }
|
|
|
|
char[] toString(double d)
|
|
{
|
|
char[20] buffer;
|
|
|
|
sprintf(buffer, "%g", d);
|
|
return toString(buffer).dup;
|
|
}
|
|
|
|
char[] toString(real r)
|
|
{
|
|
char[20] buffer;
|
|
|
|
sprintf(buffer, "%Lg", r);
|
|
return toString(buffer).dup;
|
|
}
|
|
|
|
char[] toString(ifloat f) { return toString(cast(idouble) f); }
|
|
|
|
char[] toString(idouble d)
|
|
{
|
|
char[21] buffer;
|
|
|
|
sprintf(buffer, "%gi", d);
|
|
return toString(buffer).dup;
|
|
}
|
|
|
|
char[] toString(ireal r)
|
|
{
|
|
char[21] buffer;
|
|
|
|
sprintf(buffer, "%Lgi", r);
|
|
return toString(buffer).dup;
|
|
}
|
|
|
|
char[] toString(cfloat f) { return toString(cast(cdouble) f); }
|
|
|
|
char[] toString(cdouble d)
|
|
{
|
|
char[20 + 1 + 20 + 1] buffer;
|
|
|
|
sprintf(buffer, "%g+%gi", d.re, d.im);
|
|
return toString(buffer).dup;
|
|
}
|
|
|
|
char[] toString(creal r)
|
|
{
|
|
char[20 + 1 + 20 + 1] buffer;
|
|
|
|
sprintf(buffer, "%Lg+%Lgi", r.re, r.im);
|
|
return toString(buffer).dup;
|
|
}
|
|
|
|
char[] toString(long value, uint radix)
|
|
in
|
|
{
|
|
assert(radix >= 2 && radix <= 36);
|
|
}
|
|
body
|
|
{
|
|
if (radix == 10)
|
|
return toString(value); // handle signed cases only for radix 10
|
|
return toString(cast(ulong)value, radix);
|
|
}
|
|
|
|
char[] toString(ulong value, uint radix)
|
|
in
|
|
{
|
|
assert(radix >= 2 && radix <= 36);
|
|
}
|
|
body
|
|
{
|
|
char[value.sizeof * 8] buffer;
|
|
uint i = buffer.length;
|
|
|
|
if (value < radix && value < hexdigits.length)
|
|
return hexdigits[value .. value + 1];
|
|
|
|
do
|
|
{ ubyte c;
|
|
|
|
c = value % radix;
|
|
value = value / radix;
|
|
i--;
|
|
buffer[i] = (c < 10) ? c + '0' : c + 'A' - 10;
|
|
} while (value);
|
|
return buffer[i .. length].dup;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.toString(ulong, uint).unittest\n");
|
|
|
|
char[] r;
|
|
int i;
|
|
|
|
r = toString(-10L, 10u);
|
|
assert(r == "-10");
|
|
|
|
r = toString(15L, 2u);
|
|
//writefln("r = '%s'", r);
|
|
assert(r == "1111");
|
|
|
|
r = toString(1L, 2u);
|
|
//writefln("r = '%s'", r);
|
|
assert(r == "1");
|
|
|
|
r = toString(0x1234AFL, 16u);
|
|
//writefln("r = '%s'", r);
|
|
assert(r == "1234AF");
|
|
}
|
|
|
|
/*************************************************
|
|
* Convert to char[].
|
|
*/
|
|
|
|
char[] toString(char *s)
|
|
{
|
|
return s ? s[0 .. strlen(s)] : cast(char[])null;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("string.toString(char*).unittest\n");
|
|
|
|
char[] r;
|
|
int i;
|
|
|
|
r = toString(null);
|
|
i = cmp(r, "");
|
|
assert(i == 0);
|
|
|
|
r = toString("foo\0");
|
|
i = cmp(r, "foo");
|
|
assert(i == 0);
|
|
}
|
|
|
|
|
|
/*****************************************************
|
|
*/
|
|
|
|
|
|
char[] format(...)
|
|
{
|
|
char[] s;
|
|
|
|
void putc(dchar c)
|
|
{
|
|
std.utf.encode(s, c);
|
|
}
|
|
|
|
std.format.doFormat(&putc, _arguments, _argptr);
|
|
return s;
|
|
}
|
|
|
|
|
|
char[] sformat(char[] s, ...)
|
|
{ size_t i;
|
|
|
|
void putc(dchar c)
|
|
{
|
|
if (c <= 0x7F)
|
|
{
|
|
if (i >= s.length)
|
|
throw new ArrayBoundsError("std.string.sformat", 0);
|
|
s[i] = c;
|
|
++i;
|
|
}
|
|
else
|
|
{ char[4] buf;
|
|
char[] b;
|
|
|
|
b = std.utf.toUTF8(buf, c);
|
|
if (i + b.length > s.length)
|
|
throw new ArrayBoundsError("std.string.sformat", 0);
|
|
s[i..i+b.length] = b[];
|
|
i += b.length;
|
|
}
|
|
}
|
|
|
|
std.format.doFormat(&putc, _arguments, _argptr);
|
|
return s[0 .. i];
|
|
}
|
|
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("std.string.format.unittest\n");
|
|
|
|
char[] r;
|
|
int i;
|
|
/+
|
|
r = format(null);
|
|
i = cmp(r, "");
|
|
assert(i == 0);
|
|
+/
|
|
r = format("foo");
|
|
i = cmp(r, "foo");
|
|
assert(i == 0);
|
|
|
|
r = format("foo%%");
|
|
i = cmp(r, "foo%");
|
|
assert(i == 0);
|
|
|
|
r = format("foo%s", 'C');
|
|
i = cmp(r, "fooC");
|
|
assert(i == 0);
|
|
|
|
r = format("%s foo", "bar");
|
|
i = cmp(r, "bar foo");
|
|
assert(i == 0);
|
|
|
|
r = format("%s foo %s", "bar", "abc");
|
|
i = cmp(r, "bar foo abc");
|
|
assert(i == 0);
|
|
|
|
r = format("foo %d", -123);
|
|
i = cmp(r, "foo -123");
|
|
assert(i == 0);
|
|
|
|
r = format("foo %d", 123);
|
|
i = cmp(r, "foo 123");
|
|
assert(i == 0);
|
|
}
|
|
|
|
|
|
/***********************************************
|
|
* See if character c is in the pattern.
|
|
*/
|
|
|
|
int inPattern(dchar c, char[] pattern)
|
|
{
|
|
int result = 0;
|
|
int range = 0;
|
|
dchar lastc;
|
|
|
|
foreach (size_t i, dchar p; pattern)
|
|
{
|
|
if (p == '^' && i == 0)
|
|
{ result = 1;
|
|
if (i + 1 == pattern.length)
|
|
return (c == p); // or should this be an error?
|
|
}
|
|
else if (range)
|
|
{
|
|
range = 0;
|
|
if (lastc <= c && c <= p || c == p)
|
|
return result ^ 1;
|
|
}
|
|
else if (p == '-' && i > result && i + 1 < pattern.length)
|
|
{
|
|
range = 1;
|
|
continue;
|
|
}
|
|
else if (c == p)
|
|
return result ^ 1;
|
|
lastc = p;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("std.string.inPattern.unittest\n");
|
|
|
|
int i;
|
|
|
|
i = inPattern('x', "x");
|
|
assert(i == 1);
|
|
i = inPattern('x', "y");
|
|
assert(i == 0);
|
|
i = inPattern('x', cast(char[])null);
|
|
assert(i == 0);
|
|
i = inPattern('x', "^y");
|
|
assert(i == 1);
|
|
i = inPattern('x', "yxxy");
|
|
assert(i == 1);
|
|
i = inPattern('x', "^yxxy");
|
|
assert(i == 0);
|
|
i = inPattern('x', "^abcd");
|
|
assert(i == 1);
|
|
i = inPattern('^', "^^");
|
|
assert(i == 0);
|
|
i = inPattern('^', "^");
|
|
assert(i == 1);
|
|
i = inPattern('^', "a^");
|
|
assert(i == 1);
|
|
i = inPattern('x', "a-z");
|
|
assert(i == 1);
|
|
i = inPattern('x', "A-Z");
|
|
assert(i == 0);
|
|
i = inPattern('x', "^a-z");
|
|
assert(i == 0);
|
|
i = inPattern('x', "^A-Z");
|
|
assert(i == 1);
|
|
i = inPattern('-', "a-");
|
|
assert(i == 1);
|
|
i = inPattern('-', "^A-");
|
|
assert(i == 0);
|
|
i = inPattern('a', "z-a");
|
|
assert(i == 1);
|
|
i = inPattern('z', "z-a");
|
|
assert(i == 1);
|
|
i = inPattern('x', "z-a");
|
|
assert(i == 0);
|
|
}
|
|
|
|
|
|
/***********************************************
|
|
* See if character c is in the intersection of the patterns.
|
|
*/
|
|
|
|
int inPattern(dchar c, char[][] patterns)
|
|
{ int result;
|
|
|
|
foreach (char[] pattern; patterns)
|
|
{
|
|
if (!inPattern(c, pattern))
|
|
{ result = 0;
|
|
break;
|
|
}
|
|
result = 1;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/********************************************
|
|
* Count characters in s that match pattern.
|
|
*/
|
|
|
|
size_t countchars(char[] s, char[] pattern)
|
|
{
|
|
size_t count;
|
|
|
|
foreach (dchar c; s)
|
|
{
|
|
count += inPattern(c, pattern);
|
|
}
|
|
return count;
|
|
}
|
|
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("std.string.count.unittest\n");
|
|
|
|
size_t c;
|
|
|
|
c = countchars("abc", "a-c");
|
|
assert(c == 3);
|
|
c = countchars("hello world", "or");
|
|
assert(c == 3);
|
|
}
|
|
|
|
|
|
/********************************************
|
|
* Return string that is s with all characters removed that match pattern.
|
|
*/
|
|
|
|
char[] removechars(char[] s, char[] pattern)
|
|
{
|
|
char[] r = s;
|
|
int changed;
|
|
size_t j;
|
|
|
|
foreach (size_t i, dchar c; s)
|
|
{
|
|
if (!inPattern(c, pattern))
|
|
{
|
|
if (changed)
|
|
{
|
|
if (r is s)
|
|
r = s[0 .. j].dup;
|
|
std.utf.encode(r, c);
|
|
}
|
|
}
|
|
else if (!changed)
|
|
{ changed = 1;
|
|
j = i;
|
|
}
|
|
}
|
|
if (changed && r is s)
|
|
r = s[0 .. j].dup;
|
|
return r;
|
|
}
|
|
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("std.string.remove.unittest\n");
|
|
|
|
char[] r;
|
|
|
|
r = removechars("abc", "a-c");
|
|
assert(r is null);
|
|
r = removechars("hello world", "or");
|
|
assert(r == "hell wld");
|
|
r = removechars("hello world", "d");
|
|
assert(r == "hello worl");
|
|
}
|
|
|
|
|
|
/***************************************************
|
|
* Return string where sequences of a character from pattern
|
|
* are replaced with a single instance of that character.
|
|
* If pattern is null, it defaults to all characters.
|
|
*/
|
|
|
|
char[] squeeze(char[] s, char[] pattern = null)
|
|
{
|
|
char[] r = s;
|
|
dchar lastc;
|
|
size_t lasti;
|
|
int run;
|
|
int changed;
|
|
|
|
foreach (size_t i, dchar c; s)
|
|
{
|
|
if (run && lastc == c)
|
|
{
|
|
changed = 1;
|
|
}
|
|
else if (pattern is null || inPattern(c, pattern))
|
|
{
|
|
run = 1;
|
|
if (changed)
|
|
{ if (r is s)
|
|
r = s[0 .. lasti].dup;
|
|
std.utf.encode(r, c);
|
|
}
|
|
else
|
|
lasti = i + std.utf.stride(s, i);
|
|
lastc = c;
|
|
}
|
|
else
|
|
{
|
|
run = 0;
|
|
if (changed)
|
|
{ if (r is s)
|
|
r = s[0 .. lasti].dup;
|
|
std.utf.encode(r, c);
|
|
}
|
|
}
|
|
}
|
|
if (changed)
|
|
{
|
|
if (r is s)
|
|
r = s[0 .. lasti];
|
|
}
|
|
return r;
|
|
}
|
|
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("std.string.squeeze.unittest\n");
|
|
char[] s,r;
|
|
|
|
r = squeeze("hello");
|
|
//writefln("r = '%s'", r);
|
|
assert(r == "helo");
|
|
s = "abcd";
|
|
r = squeeze(s);
|
|
assert(r is s);
|
|
s = "xyzz";
|
|
r = squeeze(s);
|
|
assert(r.ptr == s.ptr); // should just be a slice
|
|
r = squeeze("hello goodbyee", "oe");
|
|
assert(r == "hello godbye");
|
|
}
|
|
|
|
|
|
/**********************************************
|
|
* Return string that is the 'successor' to s.
|
|
* If the rightmost character is a-zA-Z0-9, it is incremented within
|
|
* its case or digits. If it generates a carry, the process is
|
|
* repeated with the one to its immediate left.
|
|
*/
|
|
|
|
char[] succ(char[] s)
|
|
{
|
|
if (s.length && isalnum(s[length - 1]))
|
|
{
|
|
char[] r = s.dup;
|
|
size_t i = r.length - 1;
|
|
|
|
while (1)
|
|
{ dchar c = s[i];
|
|
dchar carry;
|
|
|
|
switch (c)
|
|
{
|
|
case '9':
|
|
c = '0';
|
|
carry = '1';
|
|
goto Lcarry;
|
|
case 'z':
|
|
case 'Z':
|
|
c -= 'Z' - 'A';
|
|
carry = c;
|
|
Lcarry:
|
|
r[i] = c;
|
|
if (i == 0)
|
|
{
|
|
char[] t = new char[r.length + 1];
|
|
t[0] = carry;
|
|
t[1 .. length] = r[];
|
|
return t;
|
|
}
|
|
i--;
|
|
break;
|
|
|
|
default:
|
|
if (std.ctype.isalnum(c))
|
|
r[i]++;
|
|
return r;
|
|
}
|
|
}
|
|
}
|
|
return s;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("std.string.succ.unittest\n");
|
|
|
|
char[] r;
|
|
|
|
r = succ(null);
|
|
assert(r is null);
|
|
r = succ("!@#$%");
|
|
assert(r == "!@#$%");
|
|
r = succ("1");
|
|
assert(r == "2");
|
|
r = succ("9");
|
|
assert(r == "10");
|
|
r = succ("999");
|
|
assert(r == "1000");
|
|
r = succ("zz99");
|
|
assert(r == "aaa00");
|
|
}
|
|
|
|
|
|
/***********************************************
|
|
* Translate characters in from[] to characters in to[].
|
|
*/
|
|
|
|
char[] tr(char[] str, char[] from, char[] to, char[] modifiers = null)
|
|
{
|
|
int mod_c;
|
|
int mod_d;
|
|
int mod_s;
|
|
|
|
foreach (char c; modifiers)
|
|
{
|
|
switch (c)
|
|
{
|
|
case 'c': mod_c = 1; break; // complement
|
|
case 'd': mod_d = 1; break; // delete unreplaced chars
|
|
case 's': mod_s = 1; break; // squeeze duplicated replaced chars
|
|
}
|
|
}
|
|
|
|
if (to is null && !mod_d)
|
|
to = from;
|
|
|
|
char[] result = new char[str.length];
|
|
result.length = 0;
|
|
int m;
|
|
dchar lastc;
|
|
|
|
foreach (dchar c; str)
|
|
{ dchar lastf;
|
|
dchar lastt;
|
|
dchar newc;
|
|
int n = 0;
|
|
|
|
for (size_t i = 0; i < from.length; )
|
|
{
|
|
dchar f = std.utf.decode(from, i);
|
|
//writefln("\tf = '%s', c = '%s', lastf = '%x', '%x', i = %d, %d", f, c, lastf, dchar.init, i, from.length);
|
|
if (f == '-' && lastf != dchar.init && i < from.length)
|
|
{
|
|
dchar nextf = std.utf.decode(from, i);
|
|
//writefln("\tlastf = '%s', c = '%s', nextf = '%s'", lastf, c, nextf);
|
|
if (lastf <= c && c <= nextf)
|
|
{
|
|
n += c - lastf - 1;
|
|
if (mod_c)
|
|
goto Lnotfound;
|
|
goto Lfound;
|
|
}
|
|
n += nextf - lastf;
|
|
lastf = lastf.init;
|
|
continue;
|
|
}
|
|
|
|
if (c == f)
|
|
{ if (mod_c)
|
|
goto Lnotfound;
|
|
goto Lfound;
|
|
}
|
|
lastf = f;
|
|
n++;
|
|
}
|
|
if (!mod_c)
|
|
goto Lnotfound;
|
|
n = 0; // consider it 'found' at position 0
|
|
|
|
Lfound:
|
|
|
|
// Find the nth character in to[]
|
|
//writefln("\tc = '%s', n = %d", c, n);
|
|
dchar nextt;
|
|
for (size_t i = 0; i < to.length; )
|
|
{ dchar t = std.utf.decode(to, i);
|
|
if (t == '-' && lastt != dchar.init && i < to.length)
|
|
{
|
|
nextt = std.utf.decode(to, i);
|
|
//writefln("\tlastt = '%s', c = '%s', nextt = '%s', n = %d", lastt, c, nextt, n);
|
|
n -= nextt - lastt;
|
|
if (n < 0)
|
|
{
|
|
newc = nextt + n + 1;
|
|
goto Lnewc;
|
|
}
|
|
lastt = dchar.init;
|
|
continue;
|
|
}
|
|
if (n == 0)
|
|
{ newc = t;
|
|
goto Lnewc;
|
|
}
|
|
lastt = t;
|
|
nextt = t;
|
|
n--;
|
|
}
|
|
if (mod_d)
|
|
continue;
|
|
newc = nextt;
|
|
|
|
Lnewc:
|
|
if (mod_s && m && newc == lastc)
|
|
continue;
|
|
std.utf.encode(result, newc);
|
|
m = 1;
|
|
lastc = newc;
|
|
continue;
|
|
|
|
Lnotfound:
|
|
std.utf.encode(result, c);
|
|
lastc = c;
|
|
m = 0;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug(string) printf("std.string.tr.unittest\n");
|
|
|
|
char[] r;
|
|
//writefln("r = '%s'", r);
|
|
|
|
r = tr("abcdef", "cd", "CD");
|
|
assert(r == "abCDef");
|
|
|
|
r = tr("abcdef", "b-d", "B-D");
|
|
assert(r == "aBCDef");
|
|
|
|
r = tr("abcdefgh", "b-dh", "B-Dx");
|
|
assert(r == "aBCDefgx");
|
|
|
|
r = tr("abcdefgh", "b-dh", "B-CDx");
|
|
assert(r == "aBCDefgx");
|
|
|
|
r = tr("abcdefgh", "b-dh", "B-BCDx");
|
|
assert(r == "aBCDefgx");
|
|
|
|
r = tr("abcdef", "ef", "*", "c");
|
|
assert(r == "****ef");
|
|
|
|
r = tr("abcdef", "ef", "", "d");
|
|
assert(r == "abcd");
|
|
|
|
r = tr("hello goodbye", "lo", null, "s");
|
|
assert(r == "helo godbye");
|
|
|
|
r = tr("hello goodbye", "lo", "x", "s");
|
|
assert(r == "hex gxdbye");
|
|
|
|
r = tr("14-Jul-87", "a-zA-Z", " ", "cs");
|
|
assert(r == " Jul ");
|
|
|
|
r = tr("Abc", "AAA", "XYZ");
|
|
assert(r == "Xbc");
|
|
}
|