mirror of
https://github.com/dlang/phobos.git
synced 2025-04-29 14:40:30 +03:00
1236 lines
28 KiB
D
1236 lines
28 KiB
D
Ddoc
|
|
|
|
$(SPEC_S Arrays,
|
|
|
|
$(P There are four kinds of arrays:)
|
|
|
|
$(TABLE1
|
|
$(TR $(TD int* p;) $(TD Pointers to data))
|
|
|
|
$(TR $(TD int[3] s;) $(TD Static arrays))
|
|
|
|
$(TR $(TD int[] a;) $(TD Dynamic arrays))
|
|
|
|
$(TR $(TD int[char[]] x;) $(TD <a href="#associative">Associative arrays</a>))
|
|
)
|
|
|
|
<h3>Pointers</h3>
|
|
|
|
---------
|
|
int* p;
|
|
---------
|
|
|
|
$(P These are simple pointers to data, analogous to C pointers.
|
|
Pointers are provided for interfacing with C and for
|
|
specialized systems work.
|
|
There
|
|
is no length associated with it, and so there is no way for the
|
|
compiler or runtime to do bounds checking, etc., on it.
|
|
Most conventional uses for pointers can be replaced with
|
|
dynamic arrays, $(TT out) and $(TT ref) parameters,
|
|
and reference types.
|
|
)
|
|
|
|
<h3>Static Arrays</h3>
|
|
|
|
---------
|
|
int[3] s;
|
|
---------
|
|
|
|
$(P These are analogous to C arrays. Static arrays are distinguished
|
|
by having a length fixed at compile time.
|
|
)
|
|
|
|
$(P The total size of a static array cannot exceed 16Mb.
|
|
A dynamic array should be used instead for such large arrays.
|
|
)
|
|
|
|
$(P A static array with a dimension of 0 is allowed, but no
|
|
space is allocated for it. It's useful as the last member
|
|
of a variable length struct, or as the degenerate case of
|
|
a template expansion.
|
|
)
|
|
|
|
<h3>Dynamic Arrays</h3>
|
|
|
|
---------
|
|
int[] a;
|
|
---------
|
|
|
|
$(P Dynamic arrays consist of a length and a pointer to the array data.
|
|
Multiple dynamic arrays can share all or parts of the array data.
|
|
)
|
|
|
|
<h2>Array Declarations</h2>
|
|
|
|
$(P There are two ways to declare arrays, prefix and postfix.
|
|
The prefix form is the preferred method, especially for
|
|
non-trivial types.
|
|
)
|
|
|
|
<h4>Prefix Array Declarations</h4>
|
|
|
|
$(P Prefix declarations appear before the identifier being
|
|
declared and read right to left, so:
|
|
)
|
|
|
|
---------
|
|
int[] a; // dynamic array of ints
|
|
int[4][3] b; // array of 3 arrays of 4 ints each
|
|
int[][5] c; // array of 5 dynamic arrays of ints.
|
|
int*[]*[3] d; // array of 3 pointers to dynamic arrays of pointers to ints
|
|
int[]* e; // pointer to dynamic array of ints
|
|
---------
|
|
|
|
|
|
<h4>Postfix Array Declarations</h4>
|
|
|
|
$(P Postfix declarations appear after the identifier being
|
|
declared and read left to right.
|
|
Each group lists equivalent declarations:
|
|
)
|
|
|
|
---------
|
|
// dynamic array of ints
|
|
int[] a;
|
|
int a[];
|
|
|
|
// array of 3 arrays of 4 ints each
|
|
int[4][3] b;
|
|
int[4] b[3];
|
|
int b[3][4];
|
|
|
|
// array of 5 dynamic arrays of ints.
|
|
int[][5] c;
|
|
int[] c[5];
|
|
int c[5][];
|
|
|
|
// array of 3 pointers to dynamic arrays of pointers to ints
|
|
int*[]*[3] d;
|
|
int*[]* d[3];
|
|
int* (*d[3])[];
|
|
|
|
// pointer to dynamic array of ints
|
|
int[]* e;
|
|
int (*e)[];
|
|
---------
|
|
|
|
$(P $(B Rationale:) The postfix form matches the way arrays are
|
|
declared in C and C++, and supporting this form provides an
|
|
easy migration path for programmers used to it.
|
|
)
|
|
|
|
<h2>Usage</h2>
|
|
|
|
$(P There are two broad kinds of operations to do on an array -
|
|
affecting
|
|
the handle to the array,
|
|
and affecting the contents of the array.
|
|
C only has
|
|
operators to affect the handle. In D, both are accessible.
|
|
)
|
|
|
|
$(P The handle to an array is specified by naming the array, as
|
|
in p, s or a:
|
|
)
|
|
|
|
---------
|
|
int* p;
|
|
int[3] s;
|
|
int[] a;
|
|
|
|
int* q;
|
|
int[3] t;
|
|
int[] b;
|
|
|
|
p = q; // p points to the same thing q does.
|
|
p = s; // p points to the first element of the array s.
|
|
p = a; // p points to the first element of the array a.
|
|
|
|
s = ...; // error, since s is a compiled in static
|
|
// reference to an array.
|
|
|
|
a = p; // error, since the length of the array pointed
|
|
// to by p is unknown
|
|
a = s; // a is initialized to point to the s array
|
|
a = b; // a points to the same array as b does
|
|
---------
|
|
|
|
<h2><a name="slicing">Slicing</a></h2>
|
|
|
|
$(P $(I Slicing) an array means to specify a subarray of it.
|
|
An array slice does not copy the data, it is only another
|
|
reference to it.
|
|
For example:
|
|
)
|
|
|
|
---------
|
|
int[10] a; // declare array of 10 ints
|
|
int[] b;
|
|
|
|
b = a[1..3]; // a[1..3] is a 2 element array consisting of
|
|
// a[1] and a[2]
|
|
foo(b[1]); // equivalent to foo(0)
|
|
a[2] = 3;
|
|
foo(b[1]); // equivalent to foo(3)
|
|
---------
|
|
|
|
$(P The [] is shorthand for a slice of the entire array.
|
|
For example, the assignments to b:
|
|
)
|
|
|
|
---------
|
|
int[10] a;
|
|
int[] b;
|
|
|
|
b = a;
|
|
b = a[];
|
|
b = a[0 .. a.length];
|
|
---------
|
|
|
|
$(P are all semantically equivalent.
|
|
)
|
|
|
|
$(P Slicing
|
|
is not only handy for referring to parts of other arrays,
|
|
but for converting pointers into bounds-checked arrays:
|
|
)
|
|
|
|
---------
|
|
int* p;
|
|
int[] b = p[0..8];
|
|
---------
|
|
|
|
<h2>Array Copying</h2>
|
|
|
|
$(P When the slice operator appears as the lvalue of an assignment
|
|
expression, it means that the contents of the array are the
|
|
target of the assignment rather than a reference to the array.
|
|
Array copying happens when the lvalue is a slice, and the rvalue
|
|
is an array of or pointer to the same type.
|
|
)
|
|
|
|
---------
|
|
int[3] s;
|
|
int[3] t;
|
|
|
|
s[] = t; // the 3 elements of t[3] are copied into s[3]
|
|
s[] = t[]; // the 3 elements of t[3] are copied into s[3]
|
|
s[1..2] = t[0..1]; // same as s[1] = t[0]
|
|
s[0..2] = t[1..3]; // same as s[0] = t[1], s[1] = t[2]
|
|
s[0..4] = t[0..4]; // error, only 3 elements in s
|
|
s[0..2] = t; // error, different lengths for lvalue and rvalue
|
|
---------
|
|
|
|
$(P Overlapping copies are an error:)
|
|
|
|
---------
|
|
s[0..2] = s[1..3]; // error, overlapping copy
|
|
s[1..3] = s[0..2]; // error, overlapping copy
|
|
---------
|
|
|
|
$(P Disallowing overlapping makes it possible for more aggressive
|
|
parallel code optimizations than possible with the serial
|
|
semantics of C.
|
|
)
|
|
|
|
<h2>Array Setting</h2>
|
|
|
|
$(P If a slice operator appears as the lvalue of an assignment
|
|
expression, and the type of the rvalue is the same as the element
|
|
type of the lvalue, then the lvalue's array contents
|
|
are set to the rvalue.
|
|
)
|
|
|
|
---------
|
|
int[3] s;
|
|
int* p;
|
|
|
|
s[] = 3; // same as s[0] = 3, s[1] = 3, s[2] = 3
|
|
p[0..2] = 3; // same as p[0] = 3, p[1] = 3
|
|
---------
|
|
|
|
<h2>Array Concatenation</h2>
|
|
|
|
$(P The binary operator ~ is the $(I cat) operator. It is used
|
|
to concatenate arrays:
|
|
)
|
|
|
|
---------
|
|
int[] a;
|
|
int[] b;
|
|
int[] c;
|
|
|
|
a = b ~ c; // Create an array from the concatenation of the
|
|
// b and c arrays
|
|
---------
|
|
|
|
$(P Many languages overload the + operator to mean concatenation.
|
|
This confusingly leads to, does:
|
|
)
|
|
|
|
---------
|
|
"10" + 3
|
|
---------
|
|
|
|
$(P produce the number 13 or the string "103" as the result? It isn't
|
|
obvious, and the language designers wind up carefully writing rules
|
|
to disambiguate it - rules that get incorrectly implemented,
|
|
overlooked, forgotten, and ignored. It's much better to have + mean
|
|
addition, and a separate operator to be array concatenation.
|
|
)
|
|
|
|
$(P Similarly, the ~= operator means append, as in:
|
|
)
|
|
|
|
---------
|
|
a ~= b; // a becomes the concatenation of a and b
|
|
---------
|
|
|
|
$(P Concatenation always creates a copy of its operands, even
|
|
if one of the operands is a 0 length array, so:
|
|
)
|
|
|
|
---------
|
|
a = b; // a refers to b
|
|
a = b ~ c[0..0]; // a refers to a copy of b
|
|
---------
|
|
|
|
|
|
$(COMMENT
|
|
<h2>Array Operations</h2>
|
|
|
|
$(P $(B Note): Array operations are not implemented.
|
|
)
|
|
|
|
$(P In general, (a[n..m] $(I op) e) is defined as:
|
|
)
|
|
|
|
---------
|
|
for (i = n; i < m; i++)
|
|
a[i] $(I op) e;
|
|
---------
|
|
|
|
$(P So, for the expression:
|
|
)
|
|
|
|
---------
|
|
a[] = b[] + 3;
|
|
---------
|
|
|
|
$(P the result is equivalent to:)
|
|
|
|
---------
|
|
for (i = 0; i < a.length; i++)
|
|
a[i] = b[i] + 3;
|
|
---------
|
|
|
|
$(P When more than one [] operator appears in an expression, the range
|
|
represented by all must match.
|
|
)
|
|
|
|
---------
|
|
a[1..3] = b[] + 3; // error, 2 elements not same as 3 elements
|
|
---------
|
|
)
|
|
|
|
|
|
<h2>Pointer Arithmetic</h2>
|
|
|
|
---------
|
|
int[3] abc; // static array of 3 ints
|
|
int[] def = [ 1, 2, 3 ]; // dynamic array of 3 ints
|
|
|
|
void dibb(int* array)
|
|
{
|
|
array[2]; // means same thing as *(array + 2)
|
|
*(array + 2); // get 3rd element
|
|
}
|
|
|
|
void diss(int[] array)
|
|
{
|
|
array[2]; // ok
|
|
*(array + 2); // error, array is not a pointer
|
|
}
|
|
|
|
void ditt(int[3] array)
|
|
{
|
|
array[2]; // ok
|
|
*(array + 2); // error, array is not a pointer
|
|
}
|
|
---------
|
|
|
|
<h2>Rectangular Arrays</h2>
|
|
|
|
$(P Experienced FORTRAN numerics programmers know that multidimensional
|
|
"rectangular" arrays for things like matrix operations are much faster than trying to
|
|
access them via pointers to pointers resulting from "array of pointers to array" semantics.
|
|
For example, the D syntax:
|
|
)
|
|
|
|
---------
|
|
double[][] matrix;
|
|
---------
|
|
|
|
$(P declares matrix as an array of pointers to arrays. (Dynamic arrays are implemented as
|
|
pointers to the array data.) Since the arrays can have varying sizes (being dynamically
|
|
sized), this is sometimes called "jagged" arrays. Even worse for optimizing the code, the
|
|
array rows can sometimes point to each other! Fortunately, D static arrays, while using
|
|
the same syntax, are implemented as a fixed rectangular layout:
|
|
)
|
|
|
|
---------
|
|
double[3][3] matrix;
|
|
---------
|
|
|
|
$(P declares a rectangular matrix with 3 rows and 3 columns, all contiguously in memory. In
|
|
other languages, this would be called a multidimensional array and be declared as:
|
|
)
|
|
---------
|
|
double matrix[3,3];
|
|
---------
|
|
|
|
<h2>Array Length</h2>
|
|
|
|
$(P Within the [ ] of a static or a dynamic array,
|
|
the variable $(B length)
|
|
is implicitly declared and set to the length of the array.
|
|
The symbol $(B $) can also be so used.
|
|
)
|
|
|
|
---------
|
|
int[4] foo;
|
|
int[] bar = foo;
|
|
int* p = &foo[0];
|
|
|
|
// These expressions are equivalent:
|
|
bar[]
|
|
bar[0 .. 4]
|
|
bar[0 .. $(B length)]
|
|
bar[0 .. $(B $)]
|
|
bar[0 .. bar.length]
|
|
|
|
p[0 .. length] // 'length' is not defined, since p is not an array
|
|
bar[0]+length // 'length' is not defined, out of scope of [ ]
|
|
|
|
bar[$(B length)-1] // retrieves last element of the array
|
|
---------
|
|
|
|
<h2>Array Properties</h2>
|
|
|
|
$(P Static array properties are:)
|
|
|
|
$(TABLE1
|
|
$(TR
|
|
$(TD $(B .sizeof))
|
|
$(TD Returns the array length multiplied by the number of
|
|
bytes per array element.
|
|
)
|
|
)
|
|
|
|
$(TR
|
|
$(TD $(B .length))
|
|
$(TD Returns the number of elements in the array.
|
|
This is a fixed quantity for static arrays.
|
|
It is of type $(B size_t).
|
|
)
|
|
)
|
|
|
|
$(TR
|
|
$(TD $(B .ptr))
|
|
$(TD Returns a pointer to the first element of the array.
|
|
)
|
|
)
|
|
|
|
$(TR
|
|
$(TD $(B .dup))
|
|
$(TD Create a dynamic array of the same size
|
|
and copy the contents of the array into it.
|
|
)
|
|
)
|
|
|
|
$(TR
|
|
$(TD $(B .idup))
|
|
$(TD Create a dynamic array of the same size
|
|
and copy the contents of the array into it.
|
|
The copy is typed as being invariant.
|
|
$(I D 2.0 only)
|
|
)
|
|
)
|
|
|
|
$(TR
|
|
$(TD $(B .reverse))
|
|
$(TD Reverses in place the order of the elements in the array.
|
|
Returns the array.
|
|
)
|
|
)
|
|
|
|
$(TR
|
|
$(TD $(B .sort))
|
|
$(TD Sorts in place the order of the elements in the array.
|
|
Returns the array.
|
|
)
|
|
)
|
|
|
|
)
|
|
|
|
$(P Dynamic array properties are:)
|
|
|
|
$(TABLE1
|
|
$(TR
|
|
$(TD $(B .sizeof))
|
|
$(TD Returns the size of the dynamic array reference,
|
|
which is 8 on 32 bit machines.
|
|
)
|
|
)
|
|
|
|
$(TR
|
|
$(TD $(B .length))
|
|
$(TD Get/set number of elements in the array.
|
|
It is of type $(B size_t).
|
|
)
|
|
)
|
|
|
|
$(TR
|
|
$(TD $(B .ptr))
|
|
$(TD Returns a pointer to the first element of the array.
|
|
)
|
|
)
|
|
|
|
$(TR
|
|
$(TD $(B .dup))
|
|
$(TD Create a dynamic array of the same size
|
|
and copy the contents of the array into it.
|
|
)
|
|
)
|
|
|
|
$(TR
|
|
$(TD $(B .idup))
|
|
$(TD Create a dynamic array of the same size
|
|
and copy the contents of the array into it.
|
|
The copy is typed as being invariant.
|
|
$(I D 2.0 only)
|
|
)
|
|
)
|
|
|
|
$(TR
|
|
$(TD $(B .reverse))
|
|
$(TD Reverses in place the order of the elements in the array.
|
|
Returns the array.
|
|
)
|
|
)
|
|
|
|
$(TR
|
|
$(TD $(B .sort))
|
|
$(TD Sorts in place the order of the elements in the array.
|
|
Returns the array.
|
|
)
|
|
)
|
|
|
|
)
|
|
|
|
$(P For the $(B .sort) property to work on arrays of class
|
|
objects, the class definition must define the function:
|
|
$(TT int opCmp(Object)). This is used to determine the
|
|
ordering of the class objects. Note that the parameter
|
|
is of type $(TT Object), not the type of the class.)
|
|
|
|
$(P For the $(B .sort) property to work on arrays of
|
|
structs or unions, the struct or union definition must
|
|
define the function:
|
|
$(TT int opCmp(S)) or
|
|
$(TT int opCmp(S*)).
|
|
The type $(TT S) is the type of the struct or union.
|
|
This function will determine the sort ordering.
|
|
)
|
|
|
|
$(P Examples:)
|
|
|
|
---------
|
|
p.length // error, length not known for pointer
|
|
s.length // compile time constant 3
|
|
a.length // runtime value
|
|
|
|
p.dup // error, length not known
|
|
s.dup // creates an array of 3 elements, copies
|
|
// elements s into it
|
|
a.dup // creates an array of a.length elements, copies
|
|
// elements of a into it
|
|
---------
|
|
|
|
<h3><a name="resize">Setting Dynamic Array Length</a></h3>
|
|
|
|
$(P The $(B $(TT .length)) property of a dynamic array can be set
|
|
as the lvalue of an = operator:
|
|
)
|
|
|
|
---------
|
|
array.length = 7;
|
|
---------
|
|
|
|
$(P This causes the array to be reallocated in place, and the existing
|
|
contents copied over to the new array. If the new array length is
|
|
shorter,
|
|
only enough are copied to fill the new array. If the new array length
|
|
is longer, the remainder is filled out with the default initializer.
|
|
)
|
|
|
|
$(P To maximize efficiency, the runtime always tries to resize the
|
|
array in place to avoid extra copying. It will always do a copy
|
|
if the new size is larger and the array was not allocated via the
|
|
new operator or a previous
|
|
resize operation.
|
|
)
|
|
|
|
$(P This means that if there is an array slice immediately following the
|
|
array being resized, the resized array could overlap the slice; i.e.:
|
|
)
|
|
|
|
---------
|
|
char[] a = new char[20];
|
|
char[] b = a[0..10];
|
|
char[] c = a[10..20];
|
|
|
|
b.length = 15; // always resized in place because it is sliced
|
|
// from a[] which has enough memory for 15 chars
|
|
b[11] = 'x'; // a[11] and c[1] are also affected
|
|
|
|
a.length = 1;
|
|
a.length = 20; // no net change to memory layout
|
|
|
|
c.length = 12; // always does a copy because c[] is not at the
|
|
// start of a gc allocation block
|
|
c[5] = 'y'; // does not affect contents of a[] or b[]
|
|
|
|
a.length = 25; // may or may not do a copy
|
|
a[3] = 'z'; // may or may not affect b[3] which still overlaps
|
|
// the old a[3]
|
|
---------
|
|
|
|
$(P To guarantee copying behavior, use the .dup property to ensure
|
|
a unique array that can be resized.
|
|
)
|
|
|
|
$(P These issues also apply to concatenating arrays with the ~ and ~=
|
|
operators.
|
|
)
|
|
|
|
$(P Resizing a dynamic array is a relatively expensive operation.
|
|
So, while the following method of filling an array:
|
|
)
|
|
|
|
---------
|
|
int[] array;
|
|
while (1)
|
|
{ c = getinput();
|
|
if (!c)
|
|
break;
|
|
array.length = array.length + 1;
|
|
array[array.length - 1] = c;
|
|
}
|
|
---------
|
|
|
|
$(P will work, it will be inefficient. A more practical
|
|
approach would be to minimize the number of resizes:
|
|
)
|
|
|
|
---------
|
|
int[] array;
|
|
array.length = 100; // guess
|
|
for (i = 0; 1; i++)
|
|
{ c = getinput();
|
|
if (!c)
|
|
break;
|
|
if (i == array.length)
|
|
array.length = array.length * 2;
|
|
array[i] = c;
|
|
}
|
|
array.length = i;
|
|
---------
|
|
|
|
$(P Picking a good initial guess is an art, but you usually can
|
|
pick a value covering 99% of the cases.
|
|
For example, when gathering user
|
|
input from the console - it's unlikely to be longer than 80.
|
|
)
|
|
|
|
<h3>Functions as Array Properties</h3>
|
|
|
|
$(P If the first parameter to a function is an array, the
|
|
function can be called as if it were a property of the array:
|
|
)
|
|
|
|
---
|
|
int[] array;
|
|
void foo(int[] a, int x);
|
|
|
|
foo(array, 3);
|
|
array.foo(3); // means the same thing
|
|
---
|
|
|
|
<h2><a name="bounds">Array Bounds Checking</a></h2>
|
|
|
|
$(P It is an error to index an array with an index that is less than
|
|
0 or greater than or equal to the array length. If an index is
|
|
out of bounds, an ArrayBoundsError exception is raised if detected
|
|
at runtime, and an error if detected at compile time.
|
|
A program may not rely on array bounds checking happening, for
|
|
example, the following program is incorrect:
|
|
)
|
|
|
|
---------
|
|
try
|
|
{
|
|
for (i = 0; ; i++)
|
|
{
|
|
array[i] = 5;
|
|
}
|
|
}
|
|
catch (ArrayBoundsError)
|
|
{
|
|
// terminate loop
|
|
}
|
|
---------
|
|
|
|
The loop is correctly written:
|
|
|
|
---------
|
|
for (i = 0; i < array.length; i++)
|
|
{
|
|
array[i] = 5;
|
|
}
|
|
---------
|
|
|
|
$(P $(B Implementation Note:) Compilers should attempt to detect
|
|
array bounds errors at compile time, for example:
|
|
)
|
|
|
|
---------
|
|
int[3] foo;
|
|
int x = foo[3]; // error, out of bounds
|
|
---------
|
|
|
|
$(P Insertion of array bounds checking code at runtime should be
|
|
turned on and off
|
|
with a compile time switch.
|
|
)
|
|
|
|
<h2>Array Initialization</h2>
|
|
|
|
<h3>Default Initialization</h3>
|
|
|
|
$(UL
|
|
$(LI Pointers are initialized to $(B null).)
|
|
$(LI Static array contents are initialized to the default
|
|
initializer for the array element type.)
|
|
$(LI Dynamic arrays are initialized to having 0 elements.)
|
|
$(LI Associative arrays are initialized to having 0 elements.)
|
|
)
|
|
|
|
<h3>Void Initialization</h3>
|
|
|
|
$(P Void initialization happens when the $(I Initializer) for
|
|
an array is $(B void). What it means is that no initialization
|
|
is done, i.e. the contents of the array will be undefined.
|
|
This is most useful as an efficiency optimization.
|
|
Void initializations are an advanced technique and should only be used
|
|
when profiling indicates that it matters.
|
|
)
|
|
|
|
<h3>Static Initialization of Static Arrays</h3>
|
|
|
|
$(P Static initalizations are supplied by a list of array
|
|
element values enclosed in [ ]. The values can be optionally
|
|
preceded by an index and a :.
|
|
If an index is not supplied, it is set to the previous index
|
|
plus 1, or 0 if it is the first value.
|
|
)
|
|
|
|
---------
|
|
int[3] a = [ 1:2, 3 ]; // a[0] = 0, a[1] = 2, a[2] = 3
|
|
---------
|
|
|
|
$(P This is most handy when the array indices are given by enums:)
|
|
|
|
---------
|
|
enum Color { red, blue, green };
|
|
|
|
int value[Color.max + 1] = [ Color.blue:6, Color.green:2, Color.red:5 ];
|
|
---------
|
|
|
|
$(P These arrays are static when they appear in global scope.
|
|
Otherwise, they need to be marked with $(B const) or $(B static)
|
|
storage classes to make them static arrays.)
|
|
|
|
|
|
<h2>Special Array Types</h2>
|
|
|
|
<a name="strings"><h3>Strings</h3></a>
|
|
|
|
$(P A string is
|
|
an array of characters. String literals are just
|
|
an easy way to write character arrays.
|
|
String literals are immutable (read only).
|
|
)
|
|
|
|
$(V1
|
|
---------
|
|
char[] str;
|
|
char[] str1 = "abc";
|
|
str[0] = 'b'; // error, "abc" is read only, may crash
|
|
---------
|
|
)
|
|
$(V2
|
|
---------
|
|
char[] str1 = "abc"; // error, "abc" is not mutable
|
|
char[] str2 = "abc".dup; // ok, make mutable copy
|
|
invariant(char)[] str3 = "abc"; // ok
|
|
---------
|
|
)
|
|
$(P char[] strings are in UTF-8 format.
|
|
wchar[] strings are in UTF-16 format.
|
|
dchar[] strings are in UTF-32 format.
|
|
)
|
|
|
|
$(P Strings can be copied, compared, concatenated, and appended:)
|
|
|
|
---------
|
|
str1 = str2;
|
|
if (str1 < str3) ...
|
|
func(str3 ~ str4);
|
|
str4 ~= str1;
|
|
---------
|
|
|
|
$(P with the obvious semantics. Any generated temporaries get cleaned up
|
|
by the garbage collector (or by using alloca()). Not only that,
|
|
this works with any
|
|
array not just a special String array.
|
|
)
|
|
|
|
$(P A pointer to a char can be generated:
|
|
)
|
|
|
|
---------
|
|
char* p = &str[3]; // pointer to 4th element
|
|
char* p = str; // pointer to 1st element
|
|
---------
|
|
|
|
$(P Since strings, however, are not 0 terminated in D,
|
|
when transferring a pointer
|
|
to a string to C, add a terminating 0:
|
|
)
|
|
|
|
---------
|
|
str ~= "\0";
|
|
---------
|
|
|
|
$(P or use the function $(TT std.string.toStringz).)
|
|
|
|
$(P The type of a string is determined by the semantic phase of
|
|
compilation. The type is
|
|
one of: char[], wchar[], dchar[], and is determined by
|
|
implicit conversion rules.
|
|
If there are two equally applicable implicit conversions,
|
|
the result is an error. To
|
|
disambiguate these cases, a cast or a postfix of $(B c),
|
|
$(B w) or $(B d) can be used:
|
|
)
|
|
|
|
---------
|
|
cast(wchar [])"abc" // this is an array of wchar characters
|
|
"abc"w // so is this
|
|
---------
|
|
|
|
$(P String literals that do not have a postfix character and that
|
|
have not been cast can be implicitly converted between char[],
|
|
wchar[], and dchar[] as necessary.
|
|
)
|
|
|
|
---------
|
|
char c;
|
|
wchar w;
|
|
dchar d;
|
|
|
|
c = 'b'; // c is assigned the character 'b'
|
|
w = 'b'; // w is assigned the wchar character 'b'
|
|
w = 'bc'; // error - only one wchar character at a time
|
|
w = "b"[0]; // w is assigned the wchar character 'b'
|
|
w = \r[0]; // w is assigned the carriage return wchar character
|
|
d = 'd'; // d is assigned the character 'd'
|
|
---------
|
|
|
|
<h4>C's printf() and Strings</h4>
|
|
|
|
$(P $(B printf()) is a C function and is not part of D. $(B printf())
|
|
will print C strings, which are 0 terminated. There are two ways
|
|
to use $(B printf()) with D strings. The first is to add a
|
|
terminating 0, and cast the result to a char*:
|
|
)
|
|
|
|
---------
|
|
str ~= "\0";
|
|
printf("the string is '%s'\n", cast(char*)str);
|
|
---------
|
|
|
|
$(P or:)
|
|
|
|
---------
|
|
import std.string;
|
|
printf("the string is '%s'\n", std.string.toStringz(str));
|
|
---------
|
|
|
|
$(P String literals already have a 0 appended to them, so
|
|
can be used directly:)
|
|
|
|
-----------
|
|
printf("the string is '%s'\n", cast(char*)"string literal");
|
|
-----------
|
|
|
|
$(P So, why does the first string literal to printf not need
|
|
the cast? The first parameter is prototyped as a char*, and
|
|
a string literal can be implicitly cast to a char*.
|
|
The rest of the arguments to printf, however, are variadic
|
|
(specified by ...),
|
|
and a string literal is passed as a (length,pointer) combination
|
|
to variadic parameters.)
|
|
|
|
$(P The second way is to use the precision specifier. The way D arrays
|
|
are laid out, the length comes first, so the following works:)
|
|
|
|
---------
|
|
printf("the string is '%.*s'\n", str);
|
|
---------
|
|
|
|
$(P The best way is to use std.stdio.writefln, which can handle
|
|
D strings:)
|
|
|
|
---------
|
|
import std.stdio;
|
|
writefln("the string is '%s'", str);
|
|
---------
|
|
|
|
<h3>Implicit Conversions</h3>
|
|
|
|
$(P A pointer $(TT $(I T)*) can be implicitly converted to
|
|
one of the following:)
|
|
|
|
$(UL
|
|
$(LI $(TT void*))
|
|
)
|
|
|
|
$(P A static array $(TT $(I T)[$(I dim)]) can be implicitly
|
|
converted to
|
|
one of the following:
|
|
)
|
|
|
|
$(UL
|
|
$(LI $(TT $(I T)[]))
|
|
$(LI $(TT $(I U)[]))
|
|
$(LI $(TT void[]))
|
|
)
|
|
|
|
$(P A dynamic array $(TT $(I T)[]) can be implicitly converted to
|
|
one of the following:
|
|
)
|
|
|
|
$(UL
|
|
$(LI $(TT $(I U)[]))
|
|
$(LI $(TT void[]))
|
|
)
|
|
|
|
$(P Where $(I U) is a base class of $(I T).)
|
|
|
|
<hr>
|
|
<h1><a name="associative">Associative Arrays</a></h1>
|
|
|
|
$(P Associative arrays have an index that is not necessarily an integer,
|
|
and can be sparsely populated. The index for an associative array
|
|
is called the $(I key), and its type is called the $(I KeyType).
|
|
)
|
|
|
|
$(P Associative arrays are declared by placing the $(I KeyType)
|
|
within the [] of an array declaration:
|
|
)
|
|
|
|
---------
|
|
int[char[]] b; // associative array b of ints that are
|
|
// indexed by an array of characters.
|
|
// The $(I KeyType) is char[]
|
|
b["hello"] = 3; // set value associated with key "hello" to 3
|
|
func(b["hello"]); // pass 3 as parameter to func()
|
|
---------
|
|
|
|
$(P Particular keys in an associative array can be removed with the
|
|
remove function:
|
|
)
|
|
|
|
---------
|
|
b.$(B remove)("hello");
|
|
---------
|
|
|
|
$(P The $(I InExpression) yields a pointer to the value
|
|
if the key is in the associative array, or $(B null) if not:
|
|
)
|
|
|
|
---------
|
|
int* p;
|
|
p = ("hello" $(B in) b);
|
|
if (p != $(B null))
|
|
...
|
|
---------
|
|
|
|
$(P $(I KeyType)s cannot be functions or voids.
|
|
)
|
|
|
|
$(P If the $(I KeyType) is a struct type, a default mechanism is used
|
|
to compute the hash and comparisons of it based on the binary
|
|
data within the struct value. A custom mechanism can be used
|
|
by providing the following functions as struct members:
|
|
)
|
|
|
|
---------
|
|
uint $(B toHash)();
|
|
int $(B opCmp)($(I KeyType)* s);
|
|
---------
|
|
|
|
$(P For example:)
|
|
|
|
---------
|
|
import std.string;
|
|
|
|
struct MyString
|
|
{
|
|
char[] str;
|
|
|
|
uint $(B toHash)()
|
|
{ uint hash;
|
|
foreach (char c; s)
|
|
hash = (hash * 9) + c;
|
|
return hash;
|
|
}
|
|
|
|
int $(B opCmp)(MyString* s)
|
|
{
|
|
return std.string.cmp(this.str, s.str);
|
|
}
|
|
}
|
|
---------
|
|
|
|
<h3>Using Classes as the KeyType</h3>
|
|
|
|
$(P Classes can be used as the $(I KeyType). For this to work,
|
|
the class definition must override the following member functions
|
|
of class $(TT Object):)
|
|
|
|
$(UL
|
|
$(LI $(TT hash_t toHash()))
|
|
$(LI $(TT int opEquals(Object)))
|
|
$(LI $(TT int opCmp(Object)))
|
|
)
|
|
|
|
$(P Note that the parameter to $(TT opCmp) and $(TT opEquals) is
|
|
of type
|
|
$(TT Object), not the type of the class in which it is defined.)
|
|
|
|
$(P For example:)
|
|
|
|
---
|
|
class Foo
|
|
{
|
|
int a, b;
|
|
|
|
hash_t toHash() { return a + b; }
|
|
|
|
int opEquals(Object o)
|
|
{ Foo f = cast(Foo) o;
|
|
return f && a == foo.a && b == foo.b;
|
|
}
|
|
|
|
int opCmp(Object o)
|
|
{ Foo f = cast(Foo) o;
|
|
if (!f)
|
|
return -1;
|
|
if (a == foo.a)
|
|
return b - foo.b;
|
|
return a - foo.a;
|
|
}
|
|
}
|
|
---
|
|
|
|
$(P The implementation may use either $(TT opEquals) or $(TT opCmp) or
|
|
both. Care should be taken so that the results of
|
|
$(TT opEquals) and $(TT opCmp) are consistent with each other when
|
|
the class objects are the same or not.)
|
|
|
|
<h3>Using Structs or Unions as the KeyType</h3>
|
|
|
|
$(P Structs or unions can be used as the $(I KeyType). For this to work,
|
|
the struct or union definition must define the following
|
|
member functions:)
|
|
|
|
$(UL
|
|
$(LI $(TT hash_t toHash()))
|
|
$(LI $(TT int opEquals(S)) or $(TT int opEquals(S*)))
|
|
$(LI $(TT int opCmp(S)) or $(TT int opCmp(S*)))
|
|
)
|
|
|
|
$(P Note that the parameter to $(TT opCmp) and $(TT opEquals)
|
|
can be either the struct or union type, or a pointer to the struct
|
|
or untion type.)
|
|
|
|
$(P For example:)
|
|
|
|
---
|
|
struct S
|
|
{
|
|
int a, b;
|
|
|
|
hash_t toHash() { return a + b; }
|
|
|
|
int opEquals(S s)
|
|
{
|
|
return a == s.a && b == s.b;
|
|
}
|
|
|
|
int opCmp(S* s)
|
|
{
|
|
if (a == s.a)
|
|
return b - s.b;
|
|
return a - s.a;
|
|
}
|
|
}
|
|
---
|
|
|
|
$(P The implementation may use either $(TT opEquals) or $(TT opCmp) or
|
|
both. Care should be taken so that the results of
|
|
$(TT opEquals) and $(TT opCmp) are consistent with each other when
|
|
the struct/union objects are the same or not.)
|
|
|
|
<h3>Properties</h3>
|
|
|
|
Properties for associative arrays are:
|
|
|
|
$(TABLE1
|
|
|
|
$(TR
|
|
$(TD $(B .sizeof))
|
|
$(TD Returns the size of the reference to the associative
|
|
array; it is typically 8.
|
|
)
|
|
)
|
|
|
|
$(TR
|
|
$(TD $(B .length))
|
|
$(TD Returns number of values in the associative array.
|
|
Unlike for dynamic arrays, it is read-only.
|
|
)
|
|
)
|
|
|
|
$(TR
|
|
$(TD $(B .keys))
|
|
$(TD Returns dynamic array, the elements of which are the keys in
|
|
the associative array.
|
|
)
|
|
)
|
|
|
|
$(TR
|
|
$(TD $(B .values))
|
|
$(TD Returns dynamic array, the elements of which are the values in
|
|
the associative array.
|
|
)
|
|
)
|
|
|
|
$(TR
|
|
$(TD $(B .rehash))
|
|
$(TD Reorganizes the associative array in place so that lookups
|
|
are more efficient. rehash is effective when, for example,
|
|
the program is done loading up a symbol table and now needs
|
|
fast lookups in it.
|
|
Returns a reference to the reorganized array.
|
|
)
|
|
)
|
|
|
|
)
|
|
|
|
<hr>
|
|
<h3>Associative Array Example: word count</h3>
|
|
|
|
---------
|
|
import std.file; // D file I/O
|
|
import std.stdio;
|
|
|
|
int main (char[][] args)
|
|
{
|
|
int word_total;
|
|
int line_total;
|
|
int char_total;
|
|
int[char[]] dictionary;
|
|
|
|
writefln(" lines words bytes file");
|
|
for (int i = 1; i < args.length; ++i) // program arguments
|
|
{
|
|
char[] input; // input buffer
|
|
int w_cnt, l_cnt, c_cnt; // word, line, char counts
|
|
int inword;
|
|
int wstart;
|
|
|
|
// read file into input[]
|
|
input = cast(char[])std.file.read(args[i]);
|
|
|
|
foreach (j, char c; input)
|
|
{
|
|
if (c == '\n')
|
|
++l_cnt;
|
|
if (c >= '0' && c <= '9')
|
|
{
|
|
}
|
|
else if (c >= 'a' && c <= 'z' ||
|
|
c >= 'A' && c <= 'Z')
|
|
{
|
|
if (!inword)
|
|
{
|
|
wstart = j;
|
|
inword = 1;
|
|
++w_cnt;
|
|
}
|
|
}
|
|
else if (inword)
|
|
{
|
|
char[] word = input[wstart .. j];
|
|
dictionary[word]++; // increment count for word
|
|
inword = 0;
|
|
}
|
|
++c_cnt;
|
|
}
|
|
if (inword)
|
|
{
|
|
char[] word = input[wstart .. input.length];
|
|
dictionary[word]++;
|
|
}
|
|
writefln("%8d%8d%8d %s", l_cnt, w_cnt, c_cnt, args[i]);
|
|
line_total += l_cnt;
|
|
word_total += w_cnt;
|
|
char_total += c_cnt;
|
|
}
|
|
|
|
if (args.length > 2)
|
|
{
|
|
writef("-------------------------------------\n%8ld%8ld%8ld total",
|
|
line_total, word_total, char_total);
|
|
}
|
|
|
|
writefln("-------------------------------------");
|
|
foreach (word; dictionary.keys.sort)
|
|
{
|
|
writefln("%3d %s", dictionary[word], word);
|
|
}
|
|
return 0;
|
|
}
|
|
---------
|
|
|
|
)
|
|
|
|
Macros:
|
|
TITLE=Arrays
|
|
WIKI=Arrays
|
|
|