phobos 0.66

This commit is contained in:
Brad Roberts 2007-09-10 02:20:11 +00:00
parent c7ea30e9a0
commit b8dd7daa64
11 changed files with 1216 additions and 22 deletions

262
deh2.d Normal file
View file

@ -0,0 +1,262 @@
//
// Copyright (c) 1999-2003 by Digital Mars, www.digitalmars.com
// All Rights Reserved
// Written by Walter Bright
// Exception handling support
//debug=1;
import linuxextern;
extern (C) int _d_isbaseof(ClassInfo oc, ClassInfo c);
alias int (*fp_t)(); // function pointer in ambient memory model
struct DHandlerInfo
{
uint offset; // offset from function address to start of guarded section
int prev_index; // previous table index
uint cioffset; // offset to DCatchInfo data from start of table (!=0 if try-catch)
void *finally_code; // pointer to finally code to execute
// (!=0 if try-finally)
}
// Address of DHandlerTable, searched for by eh_finddata()
struct DHandlerTable
{
void *fptr; // pointer to start of function
uint espoffset; // offset of ESP from EBP
uint retoffset; // offset from start of function to return code
uint nhandlers; // dimension of handler_info[]
DHandlerInfo handler_info[1];
}
struct DCatchBlock
{
ClassInfo type; // catch type
uint bpoffset; // EBP offset of catch var
void *code; // catch handler code
}
// Create one of these for each try-catch
struct DCatchInfo
{
uint ncatches; // number of catch blocks
DCatchBlock catch_block[1]; // data for each catch block
}
// One of these is generated for each function with try-catch or try-finally
struct FuncTable
{
void *fptr; // pointer to start of function
DHandlerTable *handlertable; // eh data for this function
uint fsize; // size of function in bytes
}
void terminate()
{
asm
{
hlt ;
}
}
/*******************************************
* Given address that is inside a function,
* figure out which function it is in.
* Return DHandlerTable if there is one, NULL if not.
*/
DHandlerTable *__eh_finddata(void *address)
{
FuncTable *ft;
debug printf("__eh_finddata(address = x%x)\n", address);
debug printf("_deh_beg = x%x, _deh_end = x%x\n", &_deh_beg, &_deh_end);
for (ft = (FuncTable *)&_deh_beg;
ft < (FuncTable *)&_deh_end;
ft++)
{
debug printf("\tfptr = x%x, fsize = x%x, handlertable = x%x\n",
ft.fptr, ft.fsize, ft.handlertable);
if (ft.fptr <= address &&
address < (void *)((char *)ft.fptr + ft.fsize))
{
return ft.handlertable;
}
}
return null;
}
/******************************
* Given EBP, find return address to caller, and caller's EBP.
* Input:
* regbp Value of EBP for current function
* *pretaddr Return address
* Output:
* *pretaddr return address to caller
* Returns:
* caller's EBP
*/
uint __eh_find_caller(uint regbp, uint *pretaddr)
{
uint bp = *(uint *)regbp;
if (bp) // if not end of call chain
{
// Perform sanity checks on new EBP.
// If it is screwed up, terminate() hopefully before we do more damage.
if (bp <= regbp)
// stack should grow to smaller values
terminate();
*pretaddr = *(uint *)(regbp + int.size);
}
return bp;
}
/***********************************
* Throw a D object.
*/
extern (Windows) void _d_throw(Object *h)
{
uint regebp;
debug
{
printf("_d_throw(h = %p, &h = %p)\n", h, &h);
printf("\tvptr = %p\n", *(void **)h);
}
asm
{
mov regebp,EBP ;
}
while (1) // for each function on the stack
{
DHandlerTable *handler_table;
FuncTable *pfunc;
DHandlerInfo *phi;
uint retaddr;
uint funcoffset;
uint spoff;
uint retoffset;
int index;
int dim;
int ndx;
int prev_ndx;
regebp = __eh_find_caller(regebp,&retaddr);
if (!regebp)
{ // if end of call chain
debug printf("end of call chain\n");
break;
}
debug printf("found caller, EBP = x%x, retaddr = x%x\n", regebp, retaddr);
handler_table = __eh_finddata((void *)retaddr); // find static data associated with function
if (!handler_table) // if no static data
{
debug printf("no handler table\n");
continue;
}
funcoffset = (uint)handler_table.fptr;
spoff = handler_table.espoffset;
retoffset = handler_table.retoffset;
debug
{
printf("retaddr = x%x\n",(uint)retaddr);
printf("regebp=x%04x, funcoffset=x%04x, spoff=x%x, retoffset=x%x\n",
regebp,funcoffset,spoff,retoffset);
}
// Find start index for retaddr in static data
dim = handler_table.nhandlers;
index = -1;
for (int i = 0; i < dim; i++)
{
phi = &handler_table.handler_info[i];
if ((uint)retaddr >= funcoffset + phi.offset)
index = i;
}
// walk through handler table, checking each handler
// with an index smaller than the current table_index
for (ndx = index; ndx != -1; ndx = prev_ndx)
{
phi = &handler_table.handler_info[ndx];
prev_ndx = phi.prev_index;
if (phi.cioffset)
{
// this is a catch handler (no finally)
DCatchInfo *pci;
int ncatches;
int i;
pci = (DCatchInfo *)((char *)handler_table + phi.cioffset);
ncatches = pci.ncatches;
for (i = 0; i < ncatches; i++)
{
DCatchBlock *pcb;
ClassInfo ci = **(ClassInfo **)h;
pcb = &pci.catch_block[i];
if (_d_isbaseof(ci, pcb.type))
{ // Matched the catch type, so we've found the handler.
// Initialize catch variable
*(void **)(regebp + (pcb.bpoffset)) = h;
// Jump to catch block. Does not return.
{
uint catch_esp;
fp_t catch_addr;
catch_addr = (fp_t)(pcb.code);
catch_esp = regebp - handler_table.espoffset - fp_t.size;
asm
{
mov EAX,catch_esp ;
mov ECX,catch_addr ;
mov [EAX],ECX ;
mov EBP,regebp ;
mov ESP,EAX ; // reset stack
ret ; // jump to catch block
}
}
}
}
}
else if (phi.finally_code)
{ // Call finally block
// Note that it is unnecessary to adjust the ESP, as the finally block
// accesses all items on the stack as relative to EBP.
void *blockaddr = phi.finally_code;
asm
{
push EBX ;
mov EBX,blockaddr ;
push EBP ;
mov EBP,regebp ;
call EBX ;
pop EBP ;
pop EBX ;
}
}
}
}
}

View file

@ -36,7 +36,7 @@ extern (C)
void gc_init()
{
_gc = (GC *) c.stdlib.calloc(1, GC.size);
_gc.init();
_gc.initialize();
//_gc.setStackBottom(_atopsp);
_gc.scanStaticData();
}

View file

@ -163,11 +163,11 @@ struct GC
Gcx *gcx; // implementation
static ClassInfo gcLock; // global lock
void init()
void initialize()
{
gcLock = GCLock.classinfo;
gcx = (Gcx *)c.stdlib.calloc(1, Gcx.size);
gcx.init();
gcx.initialize();
version (Win32)
{
setStackBottom(win32.os_query_stackBottom());
@ -760,7 +760,7 @@ struct Gcx
GC_FINALIZER finalizer; // finalizer function (one per GC)
void init()
void initialize()
{ int dummy;
((byte *)this)[0 .. Gcx.size] = 0;
@ -1165,7 +1165,7 @@ struct Gcx
pool = (Pool *)c.stdlib.calloc(1, Pool.size);
if (pool)
{
pool.init(npages);
pool.initialize(npages);
if (!pool.baseAddr)
goto Lerr;
@ -1825,7 +1825,7 @@ struct Pool
uint ncommitted; // ncommitted <= npages
ubyte* pagetable;
void init(uint npages)
void initialize(uint npages)
{
uint poolsize;

View file

@ -71,12 +71,12 @@ void smoke()
printf("smoke.1\n");
gc = newGC();
gc.init();
gc.initialize();
deleteGC(gc);
printf("smoke.2\n");
gc = newGC();
gc.init();
gc.initialize();
char *p = (char *)gc.malloc(10);
assert(p);
strcpy(p, "Hello!");
@ -112,7 +112,7 @@ void smoke2()
printf("--------------------------smoke2()\n");
gc = newGC();
gc.init();
gc.initialize();
for (i = 0; i < SMOKE2_SIZE; i++)
{
@ -154,7 +154,7 @@ void smoke3()
printf("--------------------------smoke3()\n");
gc = newGC();
gc.init();
gc.initialize();
// for (i = 0; i < 1000000; i++)
for (i = 0; i < 1000; i++)
@ -187,7 +187,7 @@ void smoke4()
printf("--------------------------smoke4()\n");
gc = newGC();
gc.init();
gc.initialize();
for (i = 0; i < 80000; i++)
{
@ -271,7 +271,7 @@ int main(int argc, char *argv[])
gc = newGC();
printf("gc = %p\n", gc);
gc.init();
gc.initialize();
smoke();
smoke2();

View file

@ -53,14 +53,13 @@ OBJS= assert.o deh2.o switch.o complex.o gcstats.o \
stream.o switcherr.o array.o gc.o adi.o \
qsort.o thread.o obj.o \
crc32.o conv.o arraycast.o errno.o alloca.o cmath.o \
ti_Aa.o ti_Ag.o ti_C.o ti_int.o ti_char.o \
ti_wchar.o ti_uint.o ti_short.o ti_ushort.o \
ti_byte.o ti_ubyte.o ti_long.o ti_ulong.o ti_ptr.o \
ti_float.o ti_double.o ti_real.o ti_delegate.o \
date.o dateparse.o llmath.o math2.o \
ti_creal.o ti_ireal.o \
ti_cfloat.o ti_ifloat.o \
ti_cdouble.o ti_idouble.o
ti_creal.o ti_ireal.o ti_cfloat.o ti_ifloat.o \
ti_cdouble.o ti_idouble.o \
ti_Aa.o ti_Ag.o ti_C.o ti_int.o ti_char.o \
date.o dateparse.o llmath.o math2.o
HDR=mars.h

View file

@ -34,6 +34,7 @@
//debug = regexp; // uncomment to turn on debugging printf's
import c.stdio;
import string;
import ctype;
import outbuffer;
@ -117,6 +118,7 @@ enum : ubyte
REanychar, // any character
REanystar, // ".*"
REstring, // string of characters
REistring, // string of characters, case insensitive
REtestbit, // any in bitmap, non-consuming
REbit, // any in the bit map
REnotbit, // any not in the bit map
@ -641,6 +643,13 @@ void printProgram(ubyte[] prog)
pc += 1 + uint.size + len * tchar.size;
break;
case REistring:
len = *(uint *)&prog[pc + 1];
printf("\tREistring x%x, '%.*s'\n", len,
(&prog[pc + 1 + uint.size])[0 .. len]);
pc += 1 + uint.size + len * tchar.size;
break;
case REtestbit:
pu = (ushort *)&prog[pc + 1];
printf("\tREtestbit %d, %d\n", pu[0], pu[1]);
@ -901,6 +910,27 @@ int trymatch(int pc, int pcend)
pc += 1 + uint.size + len * tchar.size;
break;
case REistring:
len = *(uint *)&program[pc + 1];
debug(regexp) printf("\tREistring x%x, '%.*s'\n", len,
(&program[pc + 1 + uint.size])[0 .. len]);
if (src + len > input.length)
goto Lnomatch;
version (Win32)
{
if (memicmp(cast(char*)&program[pc + 1 + uint.size], &input[src], len * tchar.size))
goto Lnomatch;
}
else
{
if (icmp((cast(char*)&program[pc + 1 + uint.size])[0..len],
input[src .. src + len]))
goto Lnomatch;
}
src += len;
pc += 1 + uint.size + len * tchar.size;
break;
case REtestbit:
pu = ((ushort *)&program[pc + 1]);
debug(regexp) printf("\tREtestbit %d, %d, '%c', x%02x\n",
@ -1617,7 +1647,7 @@ int parseAtom()
{
debug(regexp) printf("writing string len %d, c = '%c', pattern[p] = '%c'\n", len+1, c, pattern[p]);
buf.reserve(5 + (1 + len) * tchar.size);
buf.write(REstring);
buf.write((attributes & REA.ignoreCase) ? REistring : REstring);
buf.write(len + 1);
buf.write(c);
buf.write(pattern[p .. p + len]);
@ -2026,6 +2056,7 @@ void optimize()
case REwchar:
case REiwchar:
case REstring:
case REistring:
case REtestbit:
case REbit:
case REnotbit:
@ -2127,6 +2158,17 @@ int startchars(Range r, ubyte[] prog)
r.setbit2(c);
return 1;
case REistring:
len = *(uint *)&prog[i + 1];
assert(len);
c = *(tchar *)&prog[i + 1 + uint.size];
debug(regexp) printf("\tREistring %d, '%c'\n", len, c);
if (c <= 0x7F)
{ r.setbit2(ctype.toupper((tchar)c));
r.setbit2(ctype.tolower((tchar)c));
}
return 1;
case REtestbit:
case REbit:
maxc = ((ushort *)&prog[i + 1])[0];

View file

@ -21,6 +21,8 @@ import random;
import date;
import dateparse;
import stream;
import utf;
import uri;
int main(char[][] args)
{
@ -40,6 +42,8 @@ int main(char[][] args)
a.sort; // qsort
date.getUTCtime(); // date
StreamError se = new StreamError(""); // stream
isValidDchar((dchar)0); // utf
uri.ascii2hex(0); // uri
printf("hello world\n");
printf("args.length = %d\n", args.length);

342
uri.d Normal file
View file

@ -0,0 +1,342 @@
debug=uri; // uncomment to turn on debugging printf's
/* ====================== URI Functions ================ */
import ctype;
import c.stdlib;
import utf;
class URIerror : Error
{
this()
{
super("URI error");
}
}
enum
{
URI_Alpha = 1,
URI_Reserved = 2,
URI_Mark = 4,
URI_Digit = 8,
URI_Hash = 0x10, // '#'
}
char[16] hex2ascii = "0123456789ABCDEF";
ubyte[128] uri_flags; // indexed by character
static this()
{
// Initialize uri_flags[]
static void helper(char[] p, uint flags)
{ int i;
for (i = 0; i < p.length; i++)
uri_flags[p[i]] |= flags;
}
uri_flags['#'] |= URI_Hash;
for (int i = 'A'; i <= 'Z'; i++)
{ uri_flags[i] |= URI_Alpha;
uri_flags[i + 0x20] |= URI_Alpha; // lowercase letters
}
helper("0123456789", URI_Digit);
helper(";/?:@&=+$,", URI_Reserved);
helper("-_.!~*'()", URI_Mark);
}
private char[] URI_Encode(dchar[] string, uint unescapedSet)
{ uint len;
uint j;
uint k;
dchar V;
dchar C;
// result buffer
char *R;
uint Rlen;
uint Rsize; // alloc'd size
char buffer[50];
len = string.length;
R = buffer;
Rsize = buffer.length;
Rlen = 0;
for (k = 0; k != len; k++)
{
C = string[k];
// if (C in unescapedSet)
if (C < uri_flags.length && uri_flags[C] & unescapedSet)
{
if (Rlen == Rsize)
{ char* R2;
Rsize *= 2;
R2 = (char *)alloca(Rsize * char.size);
if (!R2)
goto LthrowURIerror;
R2[0..Rlen] = R[0..Rlen];
R = R2;
}
R[Rlen] = cast(char)C;
Rlen++;
}
else
{ char[6] Octet;
uint L;
V = C;
// Transform V into octets
if (V <= 0x7F)
{
Octet[0] = cast(char) V;
L = 1;
}
else if (V <= 0x7FF)
{
Octet[0] = cast(char)(0xC0 | (V >> 6));
Octet[1] = cast(char)(0x80 | (V & 0x3F));
L = 2;
}
else if (V <= 0xFFFF)
{
Octet[0] = cast(char)(0xE0 | (V >> 12));
Octet[1] = cast(char)(0x80 | ((V >> 6) & 0x3F));
Octet[2] = cast(char)(0x80 | (V & 0x3F));
L = 3;
}
else if (V <= 0x1FFFFF)
{
Octet[0] = cast(char)(0xF0 | (V >> 18));
Octet[1] = cast(char)(0x80 | ((V >> 12) & 0x3F));
Octet[2] = cast(char)(0x80 | ((V >> 6) & 0x3F));
Octet[3] = cast(char)(0x80 | (V & 0x3F));
L = 4;
}
/+
else if (V <= 0x3FFFFFF)
{
Octet[0] = cast(char)(0xF8 | (V >> 24));
Octet[1] = cast(char)(0x80 | ((V >> 18) & 0x3F));
Octet[2] = cast(char)(0x80 | ((V >> 12) & 0x3F));
Octet[3] = cast(char)(0x80 | ((V >> 6) & 0x3F));
Octet[4] = cast(char)(0x80 | (V & 0x3F));
L = 5;
}
else if (V <= 0x7FFFFFFF)
{
Octet[0] = cast(char)(0xFC | (V >> 30));
Octet[1] = cast(char)(0x80 | ((V >> 24) & 0x3F));
Octet[2] = cast(char)(0x80 | ((V >> 18) & 0x3F));
Octet[3] = cast(char)(0x80 | ((V >> 12) & 0x3F));
Octet[4] = cast(char)(0x80 | ((V >> 6) & 0x3F));
Octet[5] = cast(char)(0x80 | (V & 0x3F));
L = 6;
}
+/
else
{ goto LthrowURIerror; // undefined UCS code
}
if (Rlen + L * 3 > Rsize)
{ char *R2;
Rsize = 2 * (Rlen + L * 3);
R2 = (char *)alloca(Rsize * char.size);
if (!R2)
goto LthrowURIerror;
R2[0..Rlen] = R[0..Rlen];
R = R2;
}
while (L--)
{
R[Rlen] = '%';
R[Rlen + 1] = hex2ascii[Octet[j] >> 4];
R[Rlen + 2] = hex2ascii[Octet[j] & 15];
Rlen += 3;
}
}
}
char[] result = new char[Rlen];
result[] = R[0..Rlen];
return result;
LthrowURIerror:
throw new URIerror();
return null;
}
uint ascii2hex(dchar c)
{
return (c <= '9') ? c - '0' :
(c <= 'F') ? c - 'A' + 10 :
c - 'a' + 10;
}
private dchar[] URI_Decode(char[] string, uint reservedSet)
{ uint len;
uint j;
uint k;
uint V;
dchar C;
char* s;
// Result array, allocated on stack
dchar* R;
uint Rlen;
uint Rsize; // alloc'd size
len = string.length;
s = string;
// Preallocate result buffer R guaranteed to be large enough for result
Rsize = len;
R = cast(dchar *)alloca(Rsize * dchar.size);
if (!R)
goto LthrowURIerror;
Rlen = 0;
for (k = 0; k != len; k++)
{ char B;
uint start;
C = s[k];
if (C != '%')
{ R[Rlen] = C;
Rlen++;
continue;
}
start = k;
if (k + 2 >= len)
goto LthrowURIerror;
if (!isxdigit(s[k + 1]) || !isxdigit(s[k + 2]))
goto LthrowURIerror;
B = cast(char)((ascii2hex(s[k + 1]) << 4) + ascii2hex(s[k + 2]));
k += 2;
if ((B & 0x80) == 0)
{
C = B;
}
else
{ uint n;
for (n = 1; ; n++)
{
if (n > 4)
goto LthrowURIerror;
if (((B << n) & 0x80) == 0)
{
if (n == 1)
goto LthrowURIerror;
break;
}
}
// Pick off (7 - n) significant bits of B from first byte of octet
V = B & ((1 << (7 - n)) - 1); // (!!!)
if (k + (3 * (n - 1)) >= len)
goto LthrowURIerror;
for (j = 1; j != n; j++)
{
k++;
if (s[k] != '%')
goto LthrowURIerror;
if (!isxdigit(s[k + 1]) || !isxdigit(s[k + 2]))
goto LthrowURIerror;
B = cast(char)((ascii2hex(s[k + 1]) << 4) + ascii2hex(s[k + 2]));
if ((B & 0xC0) != 0x80)
goto LthrowURIerror;
k += 2;
V = (V << 6) | (B & 0x3F);
}
if (V > 0x10FFFF)
goto LthrowURIerror;
C = V;
}
if (C < uri_flags.length && uri_flags[C] & reservedSet)
{
// R ~= s[start .. k + 1];
int width = (k + 1) - start;
for (int ii = 0; ii < width; ii++)
R[Rlen + ii] = s[start + ii];
Rlen += width;
}
else
{
R[Rlen] = C;
Rlen++;
}
}
assert(Rlen <= Rsize); // enforce our preallocation size guarantee
// Copy array on stack to array in memory
dchar[] d = new dchar[Rlen];
d[] = R[0..Rlen];
return d;
LthrowURIerror:
throw new URIerror();
return null;
}
char[] decode(char[] encodedURI)
{
dchar[] s;
s = URI_Decode(encodedURI, URI_Reserved | URI_Hash);
return utf.toUTF8(s);
}
char[] decodeComponent(char[] encodedURIComponent)
{
dchar[] s;
s = URI_Decode(encodedURIComponent, 0);
return utf.toUTF8(s);
}
char[] encode(char[] uri)
{
dchar[] s;
s = utf.toUTF32(uri);
return URI_Encode(s, URI_Reserved | URI_Hash | URI_Alpha | URI_Digit | URI_Mark);
}
char[] encodeComponent(char[] uriComponent)
{
dchar[] s;
s = utf.toUTF32(uriComponent);
return URI_Encode(s, URI_Alpha | URI_Digit | URI_Mark);
}
unittest
{
debug(uri) printf("uri.encodeURI.unittest\n");
char[] s = "http://www.digitalmars.com/~fred/fred's RX.html#foo";
char[] t = "http://www.digitalmars.com/~fred/fred's%20RX.html#foo";
char[] r;
r = encode(s);
printf("r = '%.*s'\n", r);
assert(r == t);
r = decode(t);
printf("r = '%.*s'\n", r);
assert(r == s);
}

502
utf.d Normal file
View file

@ -0,0 +1,502 @@
// utf.d
// Written by Walter Bright
// Copyright (c) 2003 Digital Mars
// All Rights Reserved
// www.digitalmars.com
// Description of UTF-8 at:
// http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
//debug=utf; // uncomment to turn on debugging printf's
class UtfError : Error
{
uint idx; // index in string of where error occurred
this(char[] s, uint i)
{
idx = i;
super("s");
}
}
alias uint dchar;
bit isValidDchar(dchar c)
{
return c < 0xD800 ||
(c > 0xDFFF && c <= 0x10FFFF && c != 0xFFFE && c != 0xFFFF);
}
unittest
{
debug(utf) printf("utf.isValidDchar.unittest\n");
assert(isValidDchar((dchar)'a') == true);
assert(isValidDchar((dchar)0x1FFFFF) == false);
}
/* =================== Decode ======================= */
dchar decode(char[] s, inout uint idx)
in
{
assert(idx >= 0 && idx < s.length);
}
out (result)
{
assert(isValidDchar(result));
}
body
{
uint len = s.length;
dchar V;
uint i = idx;
char u = s[i];
if (u & 0x80)
{ uint n;
char u2;
/* The following encodings are valid, except for the 5 and 6 byte
* combinations:
* 0xxxxxxx
* 110xxxxx 10xxxxxx
* 1110xxxx 10xxxxxx 10xxxxxx
* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
*/
for (n = 1; ; n++)
{
if (n > 4)
goto Lerr; // only do the first 4 of 6 encodings
if (((u << n) & 0x80) == 0)
{
if (n == 1)
goto Lerr;
break;
}
}
// Pick off (7 - n) significant bits of B from first byte of octet
V = cast(dchar)(u & ((1 << (7 - n)) - 1));
if (i + (n - 1) >= len)
goto Lerr; // off end of string
/* The following combinations are overlong, and illegal:
* 1100000x (10xxxxxx)
* 11100000 100xxxxx (10xxxxxx)
* 11110000 1000xxxx (10xxxxxx 10xxxxxx)
* 11111000 10000xxx (10xxxxxx 10xxxxxx 10xxxxxx)
* 11111100 100000xx (10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx)
*/
u2 = s[i + 1];
if ((u & 0xFE) == 0xC0 ||
(u == 0xE0 && (u2 & 0xE0) == 0x80) ||
(u == 0xF0 && (u2 & 0xF0) == 0x80) ||
(u == 0xF8 && (u2 & 0xF8) == 0x80) ||
(u == 0xFC && (u2 & 0xFC) == 0x80))
goto Lerr; // overlong combination
for (uint j = 1; j != n; j++)
{
u = s[i + j];
if ((u & 0xC0) != 0x80)
goto Lerr; // trailing bytes are 10xxxxxx
V = (V << 6) | (u & 0x3F);
}
if (!isValidDchar(V))
goto Lerr;
i += n;
}
else
{
V = cast(dchar) u;
i++;
}
idx = i;
return V;
Lerr:
throw new UtfError("invalid UTF-8 sequence", i);
}
unittest
{ uint i;
dchar c;
debug(utf) printf("utf.decode.unittest\n");
static char[] s1 = "abcd";
i = 0;
c = decode(s1, i);
assert(c == (dchar)'a');
assert(i == 1);
c = decode(s1, i);
assert(c == (dchar)'b');
assert(i == 2);
static char[] s2 = "\xC2\xA9";
i = 0;
c = decode(s2, i);
assert(c == (dchar)"\u00A9");
assert(i == 2);
static char[] s3 = "\xE2\x89\xA0";
i = 0;
c = decode(s3, i);
assert(c == (dchar)"\u2260");
assert(i == 3);
static char[][] s4 =
[ "\xE2\x89", // too short
"\xC0\x8A",
"\xE0\x80\x8A",
"\xF0\x80\x80\x8A",
"\xF8\x80\x80\x80\x8A",
"\xFC\x80\x80\x80\x80\x8A",
];
for (int j = 0; j < s4.length; j++)
{
try
{
i = 0;
c = decode(s4[j], i);
assert(0);
}
catch (UtfError u)
{
i = 23;
}
assert(i == 23);
}
}
/********************************************************/
dchar decode(wchar[] s, inout uint idx)
in
{
assert(idx >= 0 && idx < s.length);
}
out (result)
{
assert(isValidDchar(result));
}
body
{
char[] msg;
dchar V;
uint i = idx;
uint u = s[i];
if (u & ~0x7F)
{ if (u >= 0xD800 && u <= 0xDBFF)
{ uint u2;
if (i + 1 == s.length)
{ msg = "surrogate UTF-16 high value past end of string";
goto Lerr;
}
u2 = s[i + 1];
if (u2 < 0xDC00 || u2 > 0xDFFF)
{ msg = "surrogate UTF-16 low value out of range";
goto Lerr;
}
u = ((u - 0xD7C0) << 10) + (u2 - 0xDC00);
i += 2;
}
else if (u >= 0xDC00 && u <= 0xDFFF)
{ msg = "unpaired surrogate UTF-16 value";
goto Lerr;
}
else if (u == 0xFFFE || u == 0xFFFF)
{ msg = "illegal UTF-16 value";
goto Lerr;
}
}
else
{
i++;
}
idx = i;
return cast(dchar)u;
Lerr:
throw new UtfError(msg, i);
}
/********************************************************/
dchar decode(dchar[] s, inout uint idx)
in
{
assert(idx >= 0 && idx < s.length);
}
body
{
uint i = idx;
dchar c = s[i];
if (!isValidDchar(c))
goto Lerr;
idx = i + 1;
return c;
Lerr:
throw new UtfError("invalid UTF-32 value", i);
}
/* =================== Encode ======================= */
void encode(inout char[] s, dchar c)
in
{
assert(isValidDchar(c));
}
body
{
char[] r = s;
if (c <= 0x7F)
{
r ~= cast(char) c;
}
else
{
char[4] buf;
uint L;
if (c <= 0x7FF)
{
buf[0] = cast(char)(0xC0 | (c >> 6));
buf[1] = cast(char)(0x80 | (c & 0x3F));
L = 2;
}
else if (c <= 0xFFFF)
{
buf[0] = cast(char)(0xE0 | (c >> 12));
buf[1] = cast(char)(0x80 | ((c >> 6) & 0x3F));
buf[2] = cast(char)(0x80 | (c & 0x3F));
L = 3;
}
else if (c <= 0x10FFFF)
{
buf[0] = cast(char)(0xF0 | (c >> 18));
buf[1] = cast(char)(0x80 | ((c >> 12) & 0x3F));
buf[2] = cast(char)(0x80 | ((c >> 6) & 0x3F));
buf[3] = cast(char)(0x80 | (c & 0x3F));
L = 4;
}
else
{
assert(0);
}
r ~= buf[0 .. L];
}
s = r;
}
unittest
{
debug(utf) printf("utf.encode.unittest\n");
char[] s = "abcd";
encode(s, (dchar)'a');
assert(s.length == 5);
assert(s == "abcda");
encode(s, (dchar)"\u00A9");
assert(s.length == 7);
assert(s == "abcda\xC2\xA9");
//assert(s == "abcda\u00A9"); // BUG: fix compiler
encode(s, (dchar)"\u2260");
assert(s.length == 10);
assert(s == "abcda\xC2\xA9\xE2\x89\xA0");
}
/********************************************************/
void encode(inout wchar[] s, dchar c)
in
{
assert(isValidDchar(c));
}
body
{
wchar[] r = s;
if (c <= 0xFFFF)
{
r ~= cast(wchar) c;
}
else
{
wchar[2] buf;
buf[0] = (((c - 0x10000) >> 10) & 0x3FF) + 0xD800;
buf[1] = ((c - 0x10000) & 0x3FF) + 0xDC00;
r ~= buf;
}
s = r;
}
void encode(inout dchar[] s, dchar c)
in
{
assert(isValidDchar(c));
}
body
{
s ~= c;
}
/* =================== Validation ======================= */
void validate(char[] s)
{
uint len = s.length;
uint i;
for (i = 0; i < len; )
{
decode(s, i);
}
}
void validate(wchar[] s)
{
uint len = s.length;
uint i;
for (i = 0; i < len; )
{
decode(s, i);
}
}
void validate(dchar[] s)
{
uint len = s.length;
uint i;
for (i = 0; i < len; )
{
decode(s, i);
}
}
/* =================== Conversion to UTF8 ======================= */
char[] toUTF8(char[] s)
in
{
validate(s);
}
body
{
return s;
}
char[] toUTF8(wchar[] s)
{
char[] r;
for (int i = 0; i < s.length; i++)
{
encode(r, cast(dchar)s[i]);
}
return r;
}
char[] toUTF8(dchar[] s)
{
char[] r;
for (int i = 0; i < s.length; i++)
{
encode(r, s[i]);
}
return r;
}
/* =================== Conversion to UTF16 ======================= */
wchar[] toUTF16(char[] s)
{
wchar[] r;
for (uint i = 0; i < s.length; )
{
dchar c = decode(s, i);
encode(r, c);
}
return r;
}
wchar[] toUTF16(wchar[] s)
in
{
validate(s);
}
body
{
return s;
}
wchar[] toUTF16(dchar[] s)
{
wchar[] r;
for (uint i = 0; i < s.length; i++)
{
encode(r, s[i]);
}
return r;
}
/* =================== Conversion to UTF32 ======================= */
dchar[] toUTF32(char[] s)
{
dchar[] r;
for (uint i = 0; i < s.length; )
{
dchar c = decode(s, i);
r ~= c;
}
return r;
}
dchar[] toUTF32(wchar[] s)
{
dchar[] r;
for (uint i = 0; i < s.length; )
{
dchar c = decode(s, i);
r ~= c;
}
return r;
}
dchar[] toUTF32(dchar[] s)
in
{
validate(s);
}
body
{
return s;
}

View file

@ -13,8 +13,8 @@
# This relies on LIB.EXE 8.00 or later, and MAKE.EXE 5.01 or later.
CFLAGS=-g -mn -6 -r
DFLAGS=-O -release
#DFLAGS=-unittest -g
#DFLAGS=-O -release
DFLAGS=-unittest -g
CC=sc
#DMD=\dmd\bin\dmd
@ -57,7 +57,7 @@ OBJS= assert.obj deh.obj switch.obj complex.obj gcstats.obj \
outbuffer.obj ctype.obj regexp.obj random.obj windows.obj \
stream.obj switcherr.obj com.obj array.obj gc.obj adi.obj \
qsort.obj math2.obj date.obj dateparse.obj thread.obj obj.obj \
iunknown.obj crc32.obj conv.obj arraycast.obj \
iunknown.obj crc32.obj conv.obj arraycast.obj utf.obj uri.obj \
ti_Aa.obj ti_Ag.obj ti_C.obj ti_int.obj ti_char.obj \
ti_wchar.obj ti_uint.obj ti_short.obj ti_ushort.obj \
ti_byte.obj ti_ubyte.obj ti_long.obj ti_ulong.obj ti_ptr.obj \
@ -92,7 +92,7 @@ SRC7=ti_wchar.d ti_uint.d ti_short.d ti_ushort.d \
ti_creal.d ti_ireal.d ti_cfloat.d ti_ifloat.d \
ti_cdouble.d ti_idouble.d
SRC8=crc32.d stdint.d conv.d gcstats.d
SRC8=crc32.d stdint.d conv.d gcstats.d utf.d uri.d
phobos.lib : $(OBJS) minit.obj gc2\dmgc.lib win32.mak
lib -c phobos.lib $(OBJS) minit.obj gc2\dmgc.lib

View file

@ -1178,8 +1178,34 @@ struct PAINTSTRUCT {
}
alias PAINTSTRUCT* PPAINTSTRUCT, NPPAINTSTRUCT, LPPAINTSTRUCT;
// flags for GetDCEx()
enum
{
DCX_WINDOW = 0x00000001,
DCX_CACHE = 0x00000002,
DCX_NORESETATTRS = 0x00000004,
DCX_CLIPCHILDREN = 0x00000008,
DCX_CLIPSIBLINGS = 0x00000010,
DCX_PARENTCLIP = 0x00000020,
DCX_EXCLUDERGN = 0x00000040,
DCX_INTERSECTRGN = 0x00000080,
DCX_EXCLUDEUPDATE = 0x00000100,
DCX_INTERSECTUPDATE = 0x00000200,
DCX_LOCKWINDOWUPDATE = 0x00000400,
DCX_VALIDATE = 0x00200000,
}
export
{
BOOL UpdateWindow(HWND hWnd);
HWND SetActiveWindow(HWND hWnd);
HWND GetForegroundWindow();
BOOL PaintDesktop(HDC hdc);
BOOL SetForegroundWindow(HWND hWnd);
HWND WindowFromDC(HDC hDC);
HDC GetDC(HWND hWnd);
HDC GetDCEx(HWND hWnd, HRGN hrgnClip, DWORD flags);
HDC GetWindowDC(HWND hWnd);
int ReleaseDC(HWND hWnd, HDC hDC);
HDC BeginPaint(HWND hWnd, LPPAINTSTRUCT lpPaint);
@ -1196,6 +1222,23 @@ export
BOOL RedrawWindow(HWND hWnd, RECT *lprcUpdate, HRGN hrgnUpdate, UINT flags);
}
// flags for RedrawWindow()
enum
{
RDW_INVALIDATE = 0x0001,
RDW_INTERNALPAINT = 0x0002,
RDW_ERASE = 0x0004,
RDW_VALIDATE = 0x0008,
RDW_NOINTERNALPAINT = 0x0010,
RDW_NOERASE = 0x0020,
RDW_NOCHILDREN = 0x0040,
RDW_ALLCHILDREN = 0x0080,
RDW_UPDATENOW = 0x0100,
RDW_ERASENOW = 0x0200,
RDW_FRAME = 0x0400,
RDW_NOFRAME = 0x0800,
}
export
{
BOOL GetClientRect(HWND hWnd, LPRECT lpRect);