diff --git a/cgi.d b/cgi.d deleted file mode 120000 index 283c969..0000000 --- a/cgi.d +++ /dev/null @@ -1 +0,0 @@ -../../djs/proxy/cgi.d \ No newline at end of file diff --git a/cgi.d b/cgi.d new file mode 100644 index 0000000..b1289f2 --- /dev/null +++ b/cgi.d @@ -0,0 +1,1106 @@ +module arsd.cgi; + +// FIXME: would be cool to flush part of a dom document before complete +// somehow in here and dom.d. + + +// FIXME: 100 Continue in the nph section? Probably belongs on the +// httpd class though. + +public import std.string; +import std.uri; +import std.exception; +import std.base64; +//import std.algorithm; +public import std.stdio; +static import std.date; +public import std.conv; +import std.range; + +import std.process; + +T[] consume(T)(T[] range, int count) { + if(count > range.length) + count = range.length; + return range[count..$]; +} + +int locationOf(T)(T[] data, string item) { + const(ubyte[]) d = cast(const(ubyte[])) data; + const(ubyte[]) i = cast(const(ubyte[])) item; + + for(int a = 0; a < d.length; a++) { + if(a + i.length > d.length) + return -1; + if(d[a..a+i.length] == i) + return a; + } + + return -1; +} + +/+ +/// If you pass -1 to Cgi.this() as maxContentLength, it +/// lets you use one of these instead of buffering the data +/// itself. + +/// The benefit is you can handle data of any size without needing +/// a buffering solution. The downside is this is one-way and the order +/// of elements might not be what you want. If you need buffering, you've +/// gotta do it yourself. +struct CgiVariableStream { + bool empty() { + return true; + } + + void popFront() { + + } + + /// If you want to do an upload progress bar, these functions + /// might help. + int bytesReceived() { + + } + + /// ditto + /// But, note this won't necessarily be known, so it may return zero! + int bytesExpected() { + + } + + + /// The stream returns these Elements. + struct Element { + enum Type { String, File } + + /// Since the last popFront, is this a new element or a + /// continuation of the last? + bool isNew; + + /// Is this the last piece of this element? + /// Note that sometimes isComplete will only be true with an empty + /// payload, since it can't be sure until it actually receives the terminator. + /// This, unless you are buffering parts, you can't depend on it. + bool isComplete; + + /// Metainfo from the part header is preserved + string name; + string fileName; + string contentType; + + ubyte[] content; + } +} ++/ +/// The main interface with the web request +class Cgi { + public: + enum RequestMethod { GET, HEAD, POST, PUT, DELETE, // GET and POST are the ones that really work + // these are defined in the standard, but idk if they are useful for anything + OPTIONS, TRACE, CONNECT, + // this is an extension for when the method is not specified and you want to assume + CommandLine } + + /** Initializes it using the CGI interface */ + this(int maxContentLength = 5_000_000, + // use this to override the environment variable functions + string delegate(string env) getenv = null, + // and this should return a chunk of data. return empty when done + const(ubyte)[] delegate() readdata = null, + // finally, use this to do custom output if needed + void delegate(const(ubyte)[]) _rawDataOutput = null + ) + { + rawDataOutput = _rawDataOutput; + if(getenv is null) + getenv = delegate string(string env) { return .getenv(env); }; + + // FIXME: this is wrong on IIS! + requestUri = getenv("REQUEST_URI"); + cookie = getenv("HTTP_COOKIE"); + referrer = getenv("HTTP_REFERER"); + userAgent = getenv("HTTP_USER_AGENT"); + queryString = getenv("QUERY_STRING"); + remoteAddress = getenv("REMOTE_ADDR"); + host = getenv("HTTP_HOST"); + pathInfo = getenv("PATH_INFO"); + scriptName = getenv("SCRIPT_NAME"); + authorization = getenv("HTTP_AUTHORIZATION"); + // this is a hack because Apache is a shitload of fuck and + // refuses to send the real header to us. Compatible + // programs should send both the standard and X- versions + + // NOTE: if you have access to .htaccess or httpd.conf, you can make this + // unnecessary with mod_rewrite, so it is commented + + //if(authorization.length == 0) // if the std is there, use it + // authorization = getenv("HTTP_X_AUTHORIZATION"); + + if(getenv("SERVER_PORT").length) + port = to!int(getenv("SERVER_PORT")); + else + port = 0; // this was probably called from the command line + + auto rm = getenv("REQUEST_METHOD"); + if(rm.length) + requestMethod = to!RequestMethod(getenv("REQUEST_METHOD")); + else + requestMethod = RequestMethod.CommandLine; + + https = getenv("HTTPS") == "on"; + + // FIXME: DOCUMENT_ROOT? + + immutable(ubyte)[] data; + string contentType; + + // FIXME: what about PUT? + if(requestMethod == RequestMethod.POST) { + contentType = getenv("CONTENT_TYPE"); + + // FIXME: is this ever not going to be set? I guess it depends + // on if the server de-chunks and buffers... seems like it has potential + // to be slow if they did that. The spec says it is always there though. + // And it has worked reliably for me all year in the live environment, + // but some servers might be different. + int contentLength = to!int(getenv("CONTENT_LENGTH")); + if(contentLength) { + if(maxContentLength > 0 && contentLength > maxContentLength) { + setResponseStatus("413 Request entity too large"); + write("You tried to upload a file that is too large."); + close(); + throw new Exception("POST too large"); + } + + if(readdata is null) + foreach(ubyte[] chunk; stdin.byChunk(4096)) { // FIXME: maybe it should pass the range directly to the parser + if(chunk.length > contentLength) { + data ~= chunk[0..contentLength]; + contentLength = 0; + break; + } else { + data ~= chunk; + contentLength -= chunk.length; + } + if(contentLength == 0) + break; + } + else { + // we have a custom data source.. + auto chunk = readdata(); + while(chunk.length) { + // FIXME: DRY + if(chunk.length > contentLength) { + data ~= chunk[0..contentLength]; + contentLength = 0; + break; + } else { + data ~= chunk; + contentLength -= chunk.length; + } + if(contentLength == 0) + break; + + chunk = readdata(); + } + } + + } + + version(preserveData) + originalPostData = data; + } + + mixin(createVariableHashes()); + // fixme: remote_user script name + } + + /** Initializes it from some almost* raw HTTP request data + headers[0] should be the "GET / HTTP/1.1" line + + * Note the data should /not/ be chunked at this point. + + headers: each http header, excluding the \r\n at the end, but including the request line at headers[0] + data: the request data (usually POST variables) + address: the remote IP + _rawDataOutput: delegate to accept response data. If not null, this is called for all data output, which + will include HTTP headers and the status line. The data may also be chunked; it is already suitable for + being sent directly down the wire. + + If null, the data is sent to stdout. + + + + FIXME: data should be able to be streaming, for large files + */ + this(string[] headers, immutable(ubyte)[] data, string address, void delegate(const(ubyte)[]) _rawDataOutput = null, int pathInfoStarts = 0) { + auto parts = headers[0].split(" "); + + https = false; + port = 80; // FIXME + + rawDataOutput = _rawDataOutput; + nph = true; + + requestMethod = to!RequestMethod(parts[0]); + + requestUri = parts[1]; + + scriptName = requestUri[0 .. pathInfoStarts]; + + int question = requestUri.indexOf("?"); + if(question == -1) { + queryString = ""; + pathInfo = requestUri[pathInfoStarts..$]; + } else { + queryString = requestUri[question+1..$]; + pathInfo = requestUri[pathInfoStarts..question]; + } + + remoteAddress = address; + + if(headers[0].indexOf("HTTP/1.0")) { + http10 = true; + autoBuffer = true; + } + + string contentType = ""; + + foreach(header; headers[1..$]) { + int colon = header.indexOf(":"); + if(colon == -1) + throw new Exception("HTTP headers should have a colon!"); + string name = header[0..colon].toLower; + string value = header[colon+2..$]; // skip the colon and the space + + switch(name) { + case "authorization": + authorization = value; + break; + case "content-type": + contentType = value; + break; + case "host": + host = value; + break; + case "user-agent": + userAgent = value; + break; + case "referer": + referrer = value; + break; + case "cookie": + cookie ~= value; + break; + default: + // ignore it + } + } + + // Need to set up get, post, and cookies + mixin(createVariableHashes()); + } + + // This gets mixed in because it is shared but set inside invariant constructors + pure private static string createVariableHashes() { + return q{ + if(queryString.length == 0) + get = null;//get.init; + else { + auto _get = decodeVariables(queryString); + getArray = assumeUnique(_get); + + string[string] ga; + + // Some sites are shit and don't let you handle multiple parameters. + // If so, compile this in and encode it as a single parameter + version(with_cgi_packed) { + auto idx = pathInfo.indexOf("PACKED"); + if(idx != -1) { + auto pi = pathInfo[idx + "PACKED".length .. $]; + + auto _unpacked = decodeVariables( + cast(string) base64UrlDecode(pi)); + + foreach(k, v; _unpacked) + ga[k] = v[$-1]; + + pathInfo = pathInfo[0 .. idx]; + } + + if("arsd_packed_data" in getArray) { + auto _unpacked = decodeVariables( + cast(string) base64UrlDecode(getArray["arsd_packed_data"][0])); + + foreach(k, v; _unpacked) + ga[k] = v[$-1]; + } + } + + foreach(k, v; getArray) + ga[k] = v[$-1]; + + get = assumeUnique(ga); + } + + if(cookie.length == 0) + cookies = null;//cookies.init; + else { + auto _cookies = decodeVariables(cookie, "; "); + cookiesArray = assumeUnique(_cookies); + + string[string] ca; + foreach(k, v; cookiesArray) + ca[k] = v[$-1]; + + cookies = assumeUnique(ca); + } + + if(data.length == 0) + post = null;//post.init; + else { + int terminator = contentType.indexOf(";"); + if(terminator == -1) + terminator = contentType.length; + switch(contentType[0..terminator]) { + default: assert(0); + case "multipart/form-data": + string[][string] _post; + + UploadedFile[string] _files; + + int b = contentType[terminator..$].indexOf("boundary=") + terminator; + assert(b >= 0, "no boundary"); + immutable boundary = contentType[b+9..$]; + + int pos = 0; + + // all boundaries except the first should have a \r\n before them + while(pos < data.length) { + assert(data[pos] == '-', "no leading dash"); + pos++; + assert(data[pos] == '-', "no second leading dash"); + pos++; + //writefln("**expected** %s\n** got** %s", boundary, cast(string) data[pos..pos+boundary.length]); + assert(data[pos..pos+boundary.length] == cast(const(ubyte[])) boundary, "not lined up on boundary"); + pos += boundary.length; + if(data[pos] == '\r' && data[pos+1] == '\n') { + pos += 2; + } else { + assert(data[pos] == '-', "improper ending #1"); + assert(data[pos+1] == '-', "improper ending #2"); + if(pos+2 != data.length) { + pos += 2; + assert(data[pos] == '\r', "not new line part 1"); + assert(data[pos + 1] == '\n', "not new line part 2"); + assert(pos + 2 == data.length, "wtf, wrong length"); + } + break; + } + + auto nextloc = locationOf(data[pos..$], boundary) + pos - 2; // the -2 is a HACK + assert(nextloc > 0, "empty piece"); + assert(nextloc != -1, "no next boundary"); + immutable thisOne = data[pos..nextloc-2]; // the 2 skips the leading \r\n of the next boundary + + // thisOne has the headers and the data + int headerEndLocation = locationOf(thisOne, "\r\n\r\n"); + assert(headerEndLocation >= 0, "no header"); + auto thisOnesHeaders = thisOne[0..headerEndLocation]; + auto thisOnesData = thisOne[headerEndLocation+4..$]; + + string[] pieceHeaders = split(cast(string) thisOnesHeaders, "\r\n"); + + UploadedFile piece; + bool isFile = false; + + foreach(h; pieceHeaders) { + int p = h.indexOf(":"); + assert(p != -1, "no colon in header"); + string hn = h[0..p]; + string hv = h[p+2..$]; + + switch(hn.toLower) { + default: assert(0); + case "content-disposition": + auto info = hv.split("; "); + foreach(i; info[1..$]) { // skipping the form-data + auto o = i.split("="); // FIXME + string pn = o[0]; + string pv = o[1][1..$-1]; + + if(pn == "name") { + piece.name = pv; + } else if (pn == "filename") { + piece.filename = pv; + isFile = true; + } + } + break; + case "content-type": + piece.contentType = hv; + break; + } + } + + piece.content = thisOnesData; + + //writefln("Piece: [%s] (%s) %d\n***%s****", piece.name, piece.filename, piece.content.length, cast(string) piece.content); + + if(isFile) + _files[piece.name] = piece; + else + _post[piece.name] ~= cast(string) piece.content; + + pos = nextloc; + } + + postArray = assumeUnique(_post); + files = assumeUnique(_files); + break; + case "application/x-www-form-urlencoded": + auto _post = decodeVariables(cast(string) data); + postArray = assumeUnique(_post); + break; + } + string[string] pa; + foreach(k, v; postArray) + pa[k] = v[$-1]; + + post = assumeUnique(pa); + } + }; + } + + struct UploadedFile { + string name; + string filename; + string contentType; + immutable(ubyte)[] content; + } + + void requireBasicAuth(string user, string pass, string message = null) { + if(authorization != "Basic " ~ Base64.encode(cast(immutable(ubyte)[]) (user ~ ":" ~ pass))) { + setResponseStatus("401 Authorization Required"); + header ("WWW-Authenticate: Basic realm=\""~message~"\""); + close(); + throw new Exception("Not authorized"); + } + } + + /// Very simple caching controls - setCache(false) means it will never be cached. + /// setCache(true) means it will always be cached for as long as possible. + /// Use setResponseExpires and updateResponseExpires for more control + void setCache(bool allowCaching) { + noCache = !allowCaching; + } + + /// This gets a full url for the current request, including port, protocol, host, path, and query + string getCurrentCompleteUri() const { + return format("http%s://%s%s%s", + https ? "s" : "", + host, + port == 80 ? "" : ":" ~ to!string(port), + requestUri); + } + + /// Sets the HTTP status of the response. For example, "404 File Not Found" or "500 Internal Server Error". + /// It assumes "200 OK", and automatically changes to "302 Found" if you call setResponseLocation(). + /// Note setResponseStatus() must be called *before* you write() any data to the output. + void setResponseStatus(string status) { + assert(!outputtedResponseData); + responseStatus = status; + } + private string responseStatus = null; + + /// Sets the location header, which the browser will redirect the user to automatically. + /// Note setResponseLocation() must be called *before* you write() any data to the output. + /// The optional important argument is used if it's a default suggestion rather than something to insist upon. + void setResponseLocation(string uri, bool important = true) { + if(!important && isCurrentResponseLocationImportant) + return; // important redirects always override unimportant ones + + assert(!outputtedResponseData); + responseStatus = "302 Found"; + responseLocation = uri.strip; + isCurrentResponseLocationImportant = important; + } + private string responseLocation = null; + private bool isCurrentResponseLocationImportant = false; + + /// Sets the Expires: http header. See also: updateResponseExpires, setPublicCaching + /// The parameter is in unix_timestamp * 1000. Try setResponseExpires(getUTCtime() + SOME AMOUNT) for normal use. + /// Note: the when parameter is different than setCookie's expire parameter. + void setResponseExpires(long when, bool isPublic = false) { + responseExpires = when; + setCache(true); // need to enable caching so the date has meaning + + responseIsPublic = isPublic; + } + private long responseExpires = long.min; + private bool responseIsPublic = false; + + /// This is like setResponseExpires, but it can be called multiple times. The setting most in the past is the one kept. + /// If you have multiple functions, they all might call updateResponseExpires about their own return value. The program + /// output as a whole is as cacheable as the least cachable part in the chain. + + /// setCache(false) always overrides this - it is, by definition, the strictest anti-cache statement available. + /// Conversely, setting here overrides setCache(true), since any expiration date is in the past of infinity. + void updateResponseExpires(long when, bool isPublic) { + if(responseExpires == long.min) + setResponseExpires(when, isPublic); + else if(when < responseExpires) + setResponseExpires(when, responseIsPublic && isPublic); // if any part of it is private, it all is + } + + /* + /// Set to true if you want the result to be cached publically - that is, is the content shared? + /// Should generally be false if the user is logged in. It assumes private cache only. + /// setCache(true) also turns on public caching, and setCache(false) sets to private. + void setPublicCaching(bool allowPublicCaches) { + publicCaching = allowPublicCaches; + } + private bool publicCaching = false; + */ + + /// Sets an HTTP cookie, automatically encoding the data to the correct string. + /// expiresIn is how many milliseconds in the future the cookie will expire. + /// TIP: to make a cookie accessible from subdomains, set the domain to .yourdomain.com. + /// Note setCookie() must be called *before* you write() any data to the output. + void setCookie(string name, string data, long expiresIn = 0, string path = null, string domain = null, bool httpOnly = false) { + assert(!outputtedResponseData); + string cookie = name ~ "="; + cookie ~= data; + if(path !is null) + cookie ~= "; path=" ~ path; + if(expiresIn != 0) + cookie ~= "; expires=" ~ printDate(expiresIn + std.date.getUTCtime()); + if(domain !is null) + cookie ~= "; domain=" ~ domain; + if(httpOnly == true ) + cookie ~= "; HttpOnly"; + + responseCookies ~= cookie; + } + private string[] responseCookies; + + /// Clears a previously set cookie with the given name, path, and domain. + void clearCookie(string name, string path = null, string domain = null) { + assert(!outputtedResponseData); + setCookie(name, "", 1, path, domain); + } + + /// Sets the content type of the response, for example "text/html" (the default) for HTML, or "image/png" for a PNG image + void setResponseContentType(string ct) { + assert(!outputtedResponseData); + responseContentType = ct; + } + private string responseContentType = null; + + /// Adds a custom header. It should be the name: value, but without any line terminator. + /// For example: header("X-My-Header: Some value"); + /// Note you should use the specialized functions in this object if possible to avoid + /// duplicates in the output. + void header(string h) { + customHeaders ~= h; + } + + private string[] customHeaders; + + /// Writes the data to the output, flushing headers if they have not yet been sent. + void write(const(void)[] t, bool isAll = false) { + assert(!closed, "Output has already been closed"); + if(!outputtedResponseData && (!autoBuffer || isAll)) { + string[] hd; + // Flush the headers + if(responseStatus !is null) { + if(nph) { + if(http10) + hd ~= "HTTP/1.0 " ~ responseStatus; + else + hd ~= "HTTP/1.1 " ~ responseStatus; + } else + hd ~= "Status: " ~ responseStatus; + } else if (nph) { + if(http10) + hd ~= "HTTP/1.0 200 OK"; + else + hd ~= "HTTP/1.1 200 OK"; + } + if(nph) { // we're responsible for setting the date too according to http 1.1 + hd ~= "Date: " ~ printDate(std.date.getUTCtime()); + if(!isAll) { + if(!http10) { + hd ~= "Transfer-Encoding: chunked"; + responseChunked = true; + } + } else + hd ~= "Content-Length: " ~ to!string(t.length); + + } + + // FIXME: what if the user wants to set his own content-length? + // The custom header function can do it, so maybe that's best. + // Or we could reuse the isAll param. + if(responseLocation !is null) { + hd ~= "Location: " ~ responseLocation; + } + if(!noCache && responseExpires != long.min) { // an explicit expiration date is set + hd ~= "Expires: " ~ printDate(responseExpires); + // FIXME: assuming everything is private unless you use nocache - generally right for dynamic pages, but not necessarily + hd ~= "Cache-Control: "~(responseIsPublic ? "public" : "private")~", no-cache=\"set-cookie\""; + } + if(responseCookies !is null && responseCookies.length > 0) { + foreach(c; responseCookies) + hd ~= "Set-Cookie: " ~ c; + } + if(noCache) { // we specifically do not want caching (this is actually the default) + hd ~= "Cache-Control: private, no-cache=\"set-cookie\""; + hd ~= "Expires: 0"; + hd ~= "Pragma: no-cache"; + } else { + if(responseExpires == long.min) { // caching was enabled, but without a date set - that means assume cache forever + hd ~= "Cache-Control: public"; + hd ~= "Expires: Tue, 31 Dec 2030 14:00:00 GMT"; // FIXME: should not be more than one year in the future + } + } + if(responseContentType !is null) { + hd ~= "Content-Type: " ~ responseContentType; + } else + hd ~= "Content-Type: text/html; charset=utf-8"; + + if(customHeaders !is null) + hd ~= customHeaders; + + // FIXME: what about duplicated headers? + + foreach(h; hd) { + if(rawDataOutput !is null) + rawDataOutput(cast(const(ubyte)[]) (h ~ "\r\n")); + else + writeln(h); + } + if(rawDataOutput !is null) + rawDataOutput(cast(const(ubyte)[]) ("\r\n")); + else + writeln(""); + + outputtedResponseData = true; + } + + if(requestMethod != RequestMethod.HEAD && t.length > 0) { + if (autoBuffer) { + outputBuffer ~= cast(ubyte[]) t; + } + if(!autoBuffer || isAll) { + if(rawDataOutput !is null) + if(nph && responseChunked) + rawDataOutput(makeChunk(cast(const(ubyte)[]) t)); + else + rawDataOutput(cast(const(ubyte)[]) t); + else + stdout.rawWrite(t); + } + } + } + + void flush() { + if(rawDataOutput is null) + stdout.flush(); + } + + version(autoBuffer) + bool autoBuffer = true; + else + bool autoBuffer = false; + ubyte[] outputBuffer; + + /// Flushes the buffers to the network, signifying that you are done. + /// You should always call this explicitly when you are done outputting data. + void close() { + if(closed) + return; // don't double close + + if(!outputtedResponseData) + write(""); + + // writing auto buffered data + if(requestMethod != RequestMethod.HEAD && autoBuffer) { + if(!nph) + stdout.rawWrite(outputBuffer); + else + write(outputBuffer, true); // tell it this is everything + } + + // closing the last chunk... + if(nph && rawDataOutput !is null && responseChunked) + rawDataOutput(cast(const(ubyte)[]) "0\r\n\r\n"); + + closed = true; + } + + // Closes without doing anything, shouldn't be used often + void rawClose() { + closed = true; + } + + /// Gets a request variable as a specific type, or the default value of it isn't there + /// or isn't convertable to the request type. Checks both GET and POST variables. + T request(T = string)(in string name, in T def = T.init) const nothrow { + try { + return + (name in post) ? to!T(post[name]) : + (name in get) ? to!T(get[name]) : + def; + } catch(Exception e) { return def; } + } + + private void delegate(const(ubyte)[]) rawDataOutput = null; + + private bool outputtedResponseData; + private bool nph; + private bool http10; + private bool closed; + private bool responseChunked = false; + + private bool noCache = true; + + version(preserveData) + immutable(ubyte)[] originalPostData; + + immutable(char[]) host; + immutable(char[]) userAgent; + immutable(char[]) pathInfo; + immutable(char[]) scriptName; + immutable(char[]) authorization; + + immutable(char[]) queryString; + immutable(char[]) referrer; + immutable(char[]) cookie; + immutable(char[]) requestUri; + + immutable(RequestMethod) requestMethod; + + immutable(string[string]) get; + immutable(string[string]) post; + immutable(string[string]) cookies; + immutable(UploadedFile)[string] files; + + // Use these if you expect multiple items submitted with the same name. btw, assert(get[name] is getArray[name][$-1); should pass. Same for post and cookies. + // the order of the arrays is the order the data arrives + immutable(string[][string]) getArray; + immutable(string[][string]) postArray; + immutable(string[][string]) cookiesArray; + + immutable(char[]) remoteAddress; + + immutable bool https; + immutable int port; + private: + //RequestMethod _requestMethod; +} +/* +import std.file; +struct Session { + this(Cgi cgi) { + sid = "test.sid"; + + cgi.setCookie("arsd_sid", sid); + } + + void loadFromFile() { + if(exists("/tmp/arsd-cgi-session-" ~ sid)) { + + } + } + + void saveToFile() { + std.file.write("/tmp/arsd-cgi-session-" ~ sid, + + ); + } + + ~this() { + saveToFile(); + } + + @disable this(this) { } + + string sid; + string[string] session; +} +*/ +string[][string] decodeVariables(string data, string separator = "&") { + auto vars = data.split(separator); + string[][string] _get; + foreach(var; vars) { + int equal = var.indexOf("="); + if(equal == -1) { + _get[decodeComponent(var)] ~= ""; + } else { + //_get[decodeComponent(var[0..equal])] ~= decodeComponent(var[equal + 1 .. $].replace("+", " ")); + // stupid + -> space conversion. + _get[decodeComponent(var[0..equal]).replace("+", " ")] ~= decodeComponent(var[equal + 1 .. $].replace("+", " ")); + } + } + return _get; +} + +string[string] decodeVariablesSingle(string data) { + string[string] va; + auto varArray = decodeVariables(data); + foreach(k, v; varArray) + va[k] = v[$-1]; + + return va; +} + +string encodeVariables(string[string] data) { + string ret; + + bool outputted = false; + foreach(k, v; data) { + if(outputted) + ret ~= "&"; + else + outputted = true; + + ret ~= std.uri.encodeComponent(k) ~ "=" ~ std.uri.encodeComponent(v); + } + + return ret; +} + +string encodeVariables(string[][string] data) { + string ret; + + bool outputted = false; + foreach(k, arr; data) { + foreach(v; arr) { + if(outputted) + ret ~= "&"; + else + outputted = true; + ret ~= std.uri.encodeComponent(k) ~ "=" ~ std.uri.encodeComponent(v); + } + } + + return ret; +} + +const(ubyte)[] makeChunk(const(ubyte)[] data) { + const(ubyte)[] ret; + + ret = cast(const(ubyte)[]) toHex(data.length); + ret ~= cast(const(ubyte)[]) "\r\n"; + ret ~= data; + ret ~= cast(const(ubyte)[]) "\r\n"; + + return ret; +} + +string toHex(int num) { + string ret; + while(num) { + int v = num % 16; + num /= 16; + char d = cast(char) ((v < 10) ? v + '0' : (v-10) + 'a'); + ret ~= d; + } + + return to!string(array(ret.retro)); +} + +mixin template GenericMain(alias fun, T...) { // kinda hacky - the T... is passed to Cgi's constructor in standard cgi mode, and ignored elsewhere +version(embedded_httpd) + import arsd.httpd; + + void main() { + version(embedded_httpd) { + serveHttp(&fun, 8080);//5005); + return; + } + + version(fastcgi) { + FCGX_Stream* input, output, error; + FCGX_ParamArray env; + + const(ubyte)[] getFcgiChunk() { + const(ubyte)[] ret; + while(FCGX_HasSeenEOF(input) != -1) + ret ~= cast(ubyte) FCGX_GetChar(input); + return ret; + } + + void writeFcgi(const(ubyte)[] data) { + FCGX_PutStr(data.ptr, data.length, output); + } + + while(FCGX_Accept(&input, &output, &error, &env) >= 0) { + string[string] fcgienv; + + for(auto e = env; e !is null && *e !is null; e++) { + string cur = to!string(*e); + auto idx = cur.indexOf("="); + string name, value; + if(idx == -1) + name = cur; + else { + name = cur[0 .. idx]; + value = cur[idx + 1 .. $]; + } + + fcgienv[name] = value; + } + + string getFcgiEnvVar(string what) { + if(what in fcgienv) + return fcgienv[what]; + return ""; + } + + auto cgi = new Cgi(5_000_000, &getFcgiEnvVar, &getFcgiChunk, &writeFcgi); + try { + fun(cgi); + cgi.close(); + } catch(Throwable t) { + auto msg = t.toString; + FCGX_PutStr(cast(ubyte*) msg.ptr, msg.length, error); + msg = "Status: 500 Internal Server Error\n"; + msg ~= "Content-Type: text/plain\n\n"; + debug msg ~= t.toString; + else msg ~= "An unexpected error has occurred."; + + FCGX_PutStr(cast(ubyte*) msg.ptr, msg.length, output); + } + } + + return; + } + + auto cgi = new Cgi(T); + + try { + fun(cgi); + cgi.close(); + } catch (Throwable c) { + // FIXME: this sucks + string message = "An unexpected error has occurred."; + + debug message = c.toString(); + + writefln("Status: 500 Internal Server Error\nContent-Type: text/html\n\n%s", "Internal Server Error



"~(std.array.replace(std.array.replace(message, "<", "<"), ">", ">"))~"
"); + + string str = c.toString(); + int idx = str.indexOf("\n"); + if(idx != -1) + str = str[0..idx]; + stderr.writeln(str); + } + } +} + +string printDate(long date) { + return std.date.toUTCString(date).replace("UTC", "GMT"); // the standard is stupid +} + + +version(with_cgi_packed) { +// This is temporary until Phobos supports base64 +import std.base64; +immutable(ubyte)[] base64UrlDecode(string e) { + string encoded = e.idup; + while (encoded.length % 4) { + encoded ~= "="; // add padding + } + + // convert base64 URL to standard base 64 + encoded = encoded.replace("-", "+"); + encoded = encoded.replace("_", "/"); + + return cast(immutable(ubyte)[]) Base64.decode(encoded); +} + // should be set as arsd_packed_data + string packedDataEncode(in string[string] variables) { + string result; + + bool outputted = false; + foreach(k, v; variables) { + if(outputted) + result ~= "&"; + else + outputted = true; + + result ~= std.uri.encodeComponent(k) ~ "=" ~ std.uri.encodeComponent(v); + } + + result = cast(string) Base64.encode(cast(ubyte[]) result); + + // url variant + result.replace("=", ""); + result.replace("+", "-"); + result.replace("/", "_"); + + return result; + } +} + + +// Referencing this gigantic typeid seems to remind the compiler +// to actually put the symbol in the object file. I guess the immutable +// assoc array array isn't actually included in druntime +void hackAroundLinkerError() { + writeln(typeid(const(immutable(char)[][])[immutable(char)[]])); + writeln(typeid(immutable(char)[][][immutable(char)[]])); + writeln(typeid(Cgi.UploadedFile[immutable(char)[]])); + writeln(typeid(immutable(Cgi.UploadedFile)[immutable(char)[]])); + writeln(typeid(immutable(char[])[immutable(char)[]])); +} + + + + + +version(fastcgi) { + pragma(lib, "fcgi"); + extern(C) { + + struct FCGX_Stream { + ubyte* rdNext; + ubyte* wrNext; + ubyte* stop; + ubyte* stopUnget; + int isReader; + int isClosed; + int wasFCloseCalled; + int FCGI_errno; + void* function(FCGX_Stream* stream) fillBuffProc; + void* function(FCGX_Stream* stream, int doClose) emptyBuffProc; + void* data; + } + + alias char** FCGX_ParamArray; + + int FCGX_Accept(FCGX_Stream** stdin, FCGX_Stream** stdout, FCGX_Stream** stderr, FCGX_ParamArray* envp); + int FCGX_GetChar(FCGX_Stream* stream); + int FCGX_PutStr(const ubyte* str, int n, FCGX_Stream* stream); + int FCGX_HasSeenEOF(FCGX_Stream* stream); + + } +} + + + + + +/* +Copyright: Adam D. Ruppe, 2008 - 2011 +License: Boost License 1.0. +Authors: Adam D. Ruppe + + Copyright Adam D. Ruppe 2008 - 2011. +Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or copy at + http://www.boost.org/LICENSE_1_0.txt) +*/ diff --git a/dom.d b/dom.d deleted file mode 120000 index 505b2fa..0000000 --- a/dom.d +++ /dev/null @@ -1 +0,0 @@ -/home/me/program/djs/dom.d \ No newline at end of file diff --git a/dom.d b/dom.d new file mode 100644 index 0000000..74536b6 --- /dev/null +++ b/dom.d @@ -0,0 +1,3455 @@ +module arsd.dom; + +import std.string; +// import std.ascii; +import std.exception; + +import std.uri; +import std.array; + +import std.stdio; + +// Biggest (known) fixme left for "tag soup":

....

in loose mode should close it on the second opening. +// Biggest FIXME for real documents: character set encoding detection + +// Should I support Element.dataset? it does dash to camelcase for attribute "data-xxx-xxx" + +/* + To pwn haml, it might be interesting to add a + + getElementBySelectorAndMakeIfNotThere + + It first does querySelector. If null, find the path that was closest to matching using + the weight rules or the left to right reading, whatever gets close. + + Then make the elements so it works and return the first matching element. + + + virtual Element setMainPart() {} // usually does innertext but can be overridden by certain elements + + + The haml converter produces a mixin string that does getElementBySelectorAndMakeIfNotThere and calls + setMainPart on it. boom. +*/ + +T[] insertAfter(T)(T[] arr, int position, T[] what) { + assert(position < arr.length); + T[] ret; + ret.length = arr.length + what.length; + int a = 0; + foreach(i; arr[0..position+1]) + ret[a++] = i; + + foreach(i; what) + ret[a++] = i; + + foreach(i; arr[position+1..$]) + ret[a++] = i; + + return ret; +} + +bool isInArray(T)(T item, T[] arr) { + foreach(i; arr) + if(item == i) + return true; + return false; +} + +class Stack(T) { + void push(T t) { + arr ~= t; + } + + T pop() { + assert(arr.length); + T tmp = arr[$-1]; + arr.length = arr.length - 1; + return tmp; + } + + T peek() { + return arr[$-1]; + } + + bool empty() { + return arr.length ? false : true; + } + + T[] arr; +} + +class ElementStream { + Element front() { + return current.element; + } + + this(Element start) { + current.element = start; + current.childPosition = -1; + isEmpty = false; + stack = new Stack!(Current); + } + + /* + Handle it + handle its children + + */ + void popFront() { + more: + if(isEmpty) return; + + current.childPosition++; + if(current.childPosition >= current.element.children.length) { + if(stack.empty()) + isEmpty = true; + else { + current = stack.pop(); + goto more; + } + } else { + stack.push(current); + current.element = current.element.children[current.childPosition]; + current.childPosition = -1; + } + } + + void currentKilled() { + if(stack.empty) // should never happen + isEmpty = true; + else { + current = stack.pop(); + current.childPosition--; // when it is killed, the parent is brought back a lil so when we popFront, this is then right + } + } + + bool empty() { + return isEmpty; + } + + struct Current { + Element element; + int childPosition; + } + + Current current; + + Stack!(Current) stack; + bool isEmpty; +} + +string[string] dup(in string[string] arr) { + string[string] ret; + foreach(k, v; arr) + ret[k] = v; + return ret; +} + +/* + swapNode + cloneNode +*/ +class Element { + Element[] children; + string tagName; + string[string] attributes; + bool selfClosed; + + Document parentDocument; + + this(Document _parentDocument, string _tagName, string[string] _attributes = null, bool _selfClosed = false) { + parentDocument = _parentDocument; + tagName = _tagName; + if(_attributes !is null) + attributes = _attributes; + selfClosed = _selfClosed; + } + + @property Element previousSibling(string tagName = null) { + if(this.parentNode is null) + return null; + Element ps = null; + foreach(e; this.parentNode.childNodes) { + if(e is this) + break; + if(tagName is null || e.tagName == tagName) + ps = e; + } + + return ps; + } + + @property Element nextSibling(string tagName = null) { + if(this.parentNode is null) + return null; + Element ns = null; + bool mightBe = false; + foreach(e; this.parentNode.childNodes) { + if(e is this) { + mightBe = true; + continue; + } + if(mightBe) + if(tagName is null || e.tagName == tagName) { + ns = e; + break; + } + } + + return ns; + } + + + // if you change something here, it won't apply... FIXME const? but changing it would be nice if it applies to the style attribute too though you should use style there. + @property CssStyle computedStyle() { + if(_computedStyle is null) { + auto style = this.getAttribute("style"); + /* we'll treat shitty old html attributes as css here */ + if(this.hasAttribute("width")) + style ~= "; width: " ~ this.width; + if(this.hasAttribute("height")) + style ~= "; width: " ~ this.height; + if(this.hasAttribute("bgcolor")) + style ~= "; background-color: " ~ this.bgcolor; + if(this.tagName == "body" && this.hasAttribute("text")) + style ~= "; color: " ~ this.text; + if(this.hasAttribute("color")) + style ~= "; color: " ~ this.color; + /* done */ + + + _computedStyle = new CssStyle(null, style); // gives at least something to work with + } + return _computedStyle; + } + + private CssStyle _computedStyle; + + // These properties are useless in most cases, but if you write a layout engine on top of this lib, they may be good + version(browser) { + void* expansionHook; + int offsetWidth; + int offsetHeight; + int offsetLeft; + int offsetTop; + Element offsetParent; + bool hasLayout; + int zIndex; + + int absoluteLeft() { + int a = offsetLeft; + auto p = offsetParent; + while(p) { + a += p.offsetLeft; + p = p.offsetParent; + } + + return a; + } + + int absoluteTop() { + int a = offsetTop; + auto p = offsetParent; + while(p) { + a += p.offsetTop; + p = p.offsetParent; + } + + return a; + } + } + + // Back to the regular dom functions + + @property Element cloned() { + auto e = new Element(parentDocument, tagName, attributes.dup, selfClosed); + foreach(child; children) { + e.appendChild(child.cloned); + } + + return e; + } + + /// Returns the first child of this element. If it has no children, returns null. + @property Element firstChild() { + return children.length ? children[0] : null; + } + + @property Element lastChild() { + return children.length ? children[$ - 1] : null; + } + + /// Convenience constructor when you don't care about the parentDocument. Note this might break things on the document. + /// Note also that without a parent document, elements are always in strict, case-sensitive mode. + this(string _tagName, string[string] _attributes = null) { + tagName = _tagName; + if(_attributes !is null) + attributes = _attributes; + selfClosed = tagName.isInArray(selfClosedElements); + } + + /* + private this() { + + } + */ + + private this(Document _parentDocument) { + parentDocument = _parentDocument; + } + + private void parseAttributes(string[] whichOnes = null) { +/+ + if(whichOnes is null) + whichOnes = attributes.keys; + foreach(attr; whichOnes) { + switch(attr) { + case "id": + + break; + case "class": + + break; + case "style": + + break; + default: + // we don't care about it + } + } ++/ + } + + public: + /// Appends the given element to this one. The given element must not have a parent already. + Element appendChild(Element e) + in { + assert(e !is null); + assert(e.parentNode is null); + } + out (ret) { + assert(e.parentNode is this); + assert(e is ret); + } + body { + selfClosed = false; + e.parentNode = this; + e.parentDocument = this.parentDocument; + children ~= e; + return e; + } + + /// Inserts the second element to this node, right before the first param + Element insertBefore(Element where, Element what) + in { + assert(where !is null); + assert(where.parentNode is this); + assert(what !is null); + assert(what.parentNode is null); + } + out (ret) { + assert(where.parentNode is this); + assert(what.parentNode is this); + assert(ret is what); + } + body { + foreach(i, e; children) { + if(e is where) { + children = children[0..i] ~ what ~ children[i..$]; + what.parentNode = this; + return what; + } + } + + return what; + + assert(0); + } + + Element insertAfter(Element where, Element what) + in { + assert(where !is null); + assert(where.parentNode is this); + assert(what !is null); + assert(what.parentNode is null); + } + out (ret) { + assert(where.parentNode is this); + assert(what.parentNode is this); + assert(ret is what); + } + body { + foreach(i, e; children) { + if(e is where) { + children = children[0 .. i + 1] ~ what ~ children[i + 1 .. $]; + what.parentNode = this; + return what; + } + } + + return what; + + assert(0); + } + + + /// convenience function to quickly add a tag with some text or + /// other relevant info (for example, it's a src for an element + /// instead of inner text) + Element addChild(string tagName, string childInfo = null, string childInfo2 = null) { + auto e = parentDocument.createElement(tagName); + if(childInfo !is null) + switch(tagName) { + case "img": + e.src = childInfo; + if(childInfo2 !is null) + e.alt = childInfo2; + break; + case "option": + e.innerText = childInfo; + if(childInfo2 !is null) + e.value = childInfo2; + break; + case "input": + e.type = "hidden"; + e.name = childInfo; + if(childInfo2 !is null) + e.value = childInfo2; + break; + case "a": + e.innerText = childInfo; + if(childInfo2 !is null) + e.href = childInfo2; + break; + case "script": + case "style": + e.innerRawSource = childInfo; + break; + case "meta": + e.name = childInfo; + if(childInfo2 !is null) + e.content = childInfo2; + break; + default: + e.innerText = childInfo; + } + return appendChild(e); + } + + Element addChild(string tagName, Element firstChild) + in { + assert(parentDocument !is null); + assert(firstChild !is null); + } + out(ret) { + assert(ret !is null); + assert(ret.parentNode is this); + assert(firstChild.parentNode is ret); + } + body { + auto e = parentDocument.createElement(tagName); + e.appendChild(firstChild); + this.appendChild(e); + return e; + } + + T getParent(T)(string tagName = null) if(is(T : Element)) { + if(tagName is null) { + static if(is(T == Form)) + tagName = "form"; + else static if(is(T == Table)) + tagName = "table"; + else static if(is(T == Table)) + tagName == "a"; + } + + auto par = this.parentNode; + while(par !is null) { + if(tagName is null || par.tagName == tagName) + break; + par = par.parentNode; + } + + auto t = cast(T) par; + if(t is null) + throw new ElementNotFoundException("", tagName ~ " parent not found"); + + return t; + } + + /// swaps one child for a new thing. Returns the old child which is now parentless. + Element swapNode(Element child, Element replacement) { + foreach(ref c; this.children) + if(c is child) { + c.parentNode = null; + c = replacement; + c.parentNode = this; + return child; + } + assert(0); + } + + + Element getElementById(string id) { + foreach(e; tree) + if(e.id == id) + return e; + return null; + } + + final SomeElementType requireElementById(SomeElementType = Element)(string id) + if( + is(SomeElementType : Element) + ) + out(ret) { + assert(ret !is null); + } + body { + auto e = cast(SomeElementType) getElementById(id); + if(e is null) + throw new ElementNotFoundException(SomeElementType.stringof, "id=" ~ id); + return e; + } + + final SomeElementType requireSelector(SomeElementType = Element)(string selector) + if( + is(SomeElementType : Element) + ) + out(ret) { + assert(ret !is null); + } + body { + auto e = cast(SomeElementType) querySelector(selector); + if(e is null) + throw new ElementNotFoundException(SomeElementType.stringof, selector); + return e; + } + + Element querySelector(string selector) { + // FIXME: inefficient + auto list = getElementsBySelector(selector); + if(list.length == 0) + return null; + return list[0]; + } + + /// a more standards-compliant alias for getElementsBySelector + Element[] querySelectorAll(string selector) { + return getElementsBySelector(selector); + } + + Element[] getElementsBySelector(string selector) { + if(parentDocument && parentDocument.loose) + selector = selector.toLower; + + Element[] ret; + foreach(sel; parseSelectorString(selector)) + ret ~= sel.getElements(this); + return ret; + } + + Element[] getElementsByTagName(string tag) { + if(parentDocument && parentDocument.loose) + tag = tag.toLower(); + Element[] ret; + foreach(e; tree) + if(e.tagName == tag) + ret ~= e; + return ret; + } + + Element appendText(string text) { + Element e = new TextNode(parentDocument, text); + return appendChild(e); + } + + @property Element[] childElements() { + Element[] ret; + foreach(c; children) + if(c.nodeType == 1) + ret ~= c; + return ret; + } + + /* + Does a CSS selector + + * -- all, default if nothing else is there + + tag#id.class.class.class:pseudo[attrib=what][attrib=what] OP selector + + It is all additive + + OP + + space = descendant + > = direct descendant + + = sibling (E+F Matches any F element immediately preceded by a sibling element E) + + [foo] Foo is present as an attribute + [foo="warning"] Matches any E element whose "foo" attribute value is exactly equal to "warning". + E[foo~="warning"] Matches any E element whose "foo" attribute value is a list of space-separated values, one of which is exactly equal to "warning" + E[lang|="en"] Matches any E element whose "lang" attribute has a hyphen-separated list of values beginning (from the left) with "en". + + [item$=sdas] ends with + [item^-sdsad] begins with + + Quotes are optional here. + + Pseudos: + :first-child + :last-child + :link (same as a[href] for our purposes here) + + + There can be commas separating the selector. A comma separated list result is OR'd onto the main. + + + + This ONLY cares about elements. text, etc, are ignored + + + There should be two functions: given element, does it match the selector? and given a selector, give me all the elements + */ + + /// Appends the given html to the element, returning the elements appended + Element[] appendHtml(string html) { + Document d = new Document("" ~ html ~ ""); + return stealChildren(d.root); + } + + Element addClass(string c) { + string cn = getAttribute("class"); + if(cn is null) { + setAttribute("class", c); + return this; + } else { + setAttribute("class", cn ~ " " ~ c); + } + + return this; + } + + Element removeClass(string c) { + auto cn = className; + + className = cn.replace(c, "").strip; + + return this; + } + + bool hasClass(string c) { + auto cn = className; + + int idx = cn.indexOf(c); + if(idx == -1) + return false; + + foreach(cla; cn.split(" ")) + if(cla == c) + return true; + return false; + + /* + int rightSide = idx + c.length; + + bool checkRight() { + if(rightSide == cn.length) + return true; // it's the only class + else if(iswhite(cn[rightSide])) + return true; + return false; // this is a substring of something else.. + } + + if(idx == 0) { + return checkRight(); + } else { + if(!iswhite(cn[idx - 1])) + return false; // substring + return checkRight(); + } + + assert(0); + */ + } + + void reparent(Element newParent) + in { + assert(newParent !is null); + assert(parentNode !is null); + } + out { + assert(this.parentNode == newParent); + assert(isInArray(this, newParent.children)); + } + body { + parentNode.removeChild(this); + newParent.appendChild(this); + } + + void insertChildAfter(Element child, Element where) + in { + assert(child !is null); + assert(where !is null); + assert(where.parentNode is this); + assert(!selfClosed); + assert(isInArray(where, children)); + } + out { + assert(child.parentNode is this); + assert(where.parentNode is this); + assert(isInArray(where, children)); + assert(isInArray(child, children)); + } + body { + foreach(i, c; children) { + if(c is where) { + i++; + children = children[0..i] ~ child ~ children[i..$]; + child.parentNode = this; + break; + } + } + } + + Element[] stealChildren(Element e, Element position = null) + in { + assert(!selfClosed); + assert(e !is null); + if(position !is null) + assert(isInArray(position, children)); + } + out { + assert(e.children.length == 0); + } + body { + foreach(c; e.children) + c.parentNode = this; + if(position is null) + children ~= e.children; + else { + foreach(i, child; children) { + if(child is position) { + children = children[0..i] ~ + e.children ~ + children[i..$]; + break; + } + } + } + + auto ret = e.children.dup; + e.children.length = 0; + + return ret; + } + + /// Puts the current element first in our children list. The given element must not have a parent already. + Element prependChild(Element e) + in { + assert(e.parentNode is null); + assert(!selfClosed); + } + out { + assert(e.parentNode is this); + assert(children[0] is e); + } + body { + e.parentNode = this; + children = e ~ children; + return e; + } + + + /** + Provides easy access to attributes, like in javascript + */ + // name != "popFront" is so duck typing doesn't think it's a range + string opDispatch(string name)(string v = null) if(name != "popFront") { + if(v !is null) + setAttribute(name, v); + return getAttribute(name); + } + + /** + Returns the element's children. + */ + @property const(Element[]) childNodes() const { + return children; + } + + /// Mutable version of the same + @property Element[] childNodes() { // FIXME: the above should be inout + return children; + } + + + // should return int + @property int nodeType() const { + return 1; + } + + /** + Returns a string containing all child elements, formatted such that it could be pasted into + an XML file. + */ + @property string innerHTML() const { + string s = ""; + if(children is null) { + assert(s !is null); + return s; + } + foreach(child; children) { + assert(child !is null); + auto ts = child.toString(); + assert(ts !is null); + s ~= ts; + } + + assert(s !is null); + + return s; + } + + /** + Takes some html and replaces the element's children with the tree made from the string. + */ + @property void innerHTML(string html) { + if(html.length) + selfClosed = false; + + auto doc = new Document(); + doc.parse("" ~ html ~ ""); // FIXME: this should preserve the strictness of the parent document + + children = doc.root.children; + foreach(c; children) { + c.parentNode = this; + } + + auto newpd = this.parentDocument; + foreach(c; this.tree) { + c.parentDocument = newpd; + } + + doc.root.children = null; + } + + /** + Replaces this node with the given html string, which is parsed + + Note: this invalidates the this reference, since it is removed + from the tree. + + Returns the new children that replace this. + */ + @property Element[] outerHTML(string html) { + auto doc = new Document(); + doc.parse("" ~ html ~ ""); // FIXME: needs to preserve the strictness + + children = doc.root.children; + foreach(c; children) { + c.parentNode = this; + } + + + stripOut(); + + return doc.root.children; + } + + @property string outerHTML() { + return this.toString(); + } + + @property void innerRawSource(string rawSource) { + children.length = 0; + auto rs = new RawSource(parentDocument, rawSource); + rs.parentNode = this; + + children ~= rs; + } + + /** + Gets the given attribute value, or null if the + attribute is not set. + + Note that the returned string is decoded, so it no longer contains any xml entities. + */ + string getAttribute(string name) const { + if(parentDocument && parentDocument.loose) + name = name.toLower(); + auto e = name in attributes; + if(e) + return *e; + else + return null; + } + + /** + Sets an attribute. Returns this for easy chaining + */ + Element setAttribute(string name, string value) { + if(parentDocument && parentDocument.loose) + name = name.toLower(); + + // I never use this shit legitimately and neither should you + if(name.toLower == "href" || name.toLower == "src") { + if(value.strip.toLower.startsWith("vbscript:")) + value = value[9..$]; + if(value.strip.toLower.startsWith("javascript:")) + value = value[11..$]; + } + + attributes[name] = value; + + return this; + } + + /** + Extension + */ + bool hasAttribute(string name) { + if(parentDocument && parentDocument.loose) + name = name.toLower(); + + if(name in attributes) + return true; + else + return false; + } + + /** + Extension + */ + void removeAttribute(string name) { + if(parentDocument && parentDocument.loose) + name = name.toLower(); + if(name in attributes) + attributes.remove(name); + } + + /** + Gets the class attribute's contents. Returns + an empty string if it has no class. + */ + string className() const { + auto c = getAttribute("class"); + if(c is null) + return ""; + return c; + } + + Element className(string c) { + setAttribute("class", c); + return this; + } + + string nodeValue() const { + return ""; + } + + Element replaceChild(Element find, Element replace) + in { + assert(find !is null); + assert(replace !is null); + assert(replace.parentNode is null); + } + out { + assert(replace.parentNode is this); + assert(find.parentNode is null); + } + body { + for(int i = 0; i < children.length; i++) { + if(children[i] is find) { + replace.parentNode = this; + children[i].parentNode = null; + children[i] = replace; + return replace; + } + } + + throw new Exception("no such child"); + } + + /** + Removes the given child from this list. + + Returns the removed element. + */ + Element removeChild(Element c) + in { + assert(c !is null); + assert(c.parentNode is this); + } + out { + foreach(child; children) + assert(child !is c); + assert(c.parentNode is null); + } + body { + foreach(i, e; children) { + if(e is c) { + children = children[0..i] ~ children [i+1..$]; + c.parentNode = null; + return c; + } + } + + throw new Exception("no such child"); + } + + Element[] removeChildren() + out (ret) { + assert(children.length == 0); + foreach(r; ret) + assert(r.parentNode is null); + } + body { + Element[] oldChildren = children.dup; + foreach(c; oldChildren) + c.parentNode = null; + + children.length = 0; + + return oldChildren; + } + + /** + EXTENSION + + Replaces the given element with a whole group. + */ + void replaceChild(Element find, Element[] replace) + in { + assert(find !is null); + assert(replace !is null); + assert(find.parentNode is this); + foreach(r; replace) + assert(r.parentNode is null); + } + out { + assert(find.parentNode is null); + assert(children.length >= replace.length); + foreach(child; children) + assert(child !is find); + foreach(r; replace) + assert(r.parentNode is this); + } + body { + if(replace.length == 0) { + removeChild(find); + return; + } + assert(replace.length); + for(int i = 0; i < children.length; i++) { + if(children[i] is find) { + children[i].parentNode = null; // this element should now be dead + children[i] = replace[0]; + children = .insertAfter(children, i, replace[1..$]); + foreach(e; replace) + e.parentNode = this; + return; + } + } + + throw new Exception("no such child"); + } + + Element parentNode; + + /** + Strips this tag out of the document, putting its inner html + as children of the parent. + */ + void stripOut() + in { + assert(parentNode !is null); + } + out { + assert(parentNode is null); + assert(children.length == 0); + } + body { + foreach(c; children) + c.parentNode = null; // remove the parent + if(children.length) + parentNode.replaceChild(this, this.children); + else + parentNode.removeChild(this); + this.children.length = 0; // we reparented them all above + } + + Element replaceWith(Element e) { + if(e.parentNode !is null) + e.parentNode.removeChild(e); + this.parentNode.replaceChild(this, e); + return e; + } + + /** + INCOMPATIBLE -- extension + + Splits the className into an array of each class given + */ + string[] classNames() const { + return className().split(" "); + } + + /** + Fetches the first consecutive text nodes, concatenated together + */ + string firstInnerText() const { + string s; + foreach(child; children) { + if(child.nodeType != NodeType.Text) + break; + + s ~= child.nodeValue(); + } + return s; + } + + /** + Fetch the inside text, with all tags stripped out + */ + @property string innerText() const { + string s; + foreach(child; children) { + if(child.nodeType != NodeType.Text) + s ~= child.innerText; + else + s ~= child.nodeValue(); + } + return s; + } + + /** + Sets the inside text, replacing all children + */ + @property void innerText(string text) { + assert(!selfClosed); + Element e = new TextNode(parentDocument, text); + e.parentNode = this; + children = [e]; + } + + /** + Strips this node out of the document, replacing it with the given text + */ + @property void outerText(string text) { + parentNode.replaceChild(this, new TextNode(parentDocument, text)); + } + + /** + Same result as innerText; the tag with all tags stripped out + */ + @property string outerText() const { + return innerText(); + } + + + invariant () { + if(children !is null) + foreach(child; children) { + // assert(parentNode !is null); + assert(child !is null); + assert(child.parentNode is this, format("%s is not a parent of %s (it thought it was %s)", tagName, child.tagName, child.parentNode is null ? "null" : child.parentNode.tagName)); + assert(child !is this); + assert(child !is parentNode); + } + + //assert(parentDocument !is null); // no more; if it is present, we use it, but it is not required + // reason is so you can create these without needing a reference to the document + } + + /** + Turns the whole element, including tag, attributes, and children, into a string which could be pasted into + an XML file. + */ + override string toString() const { + assert(tagName !is null); + string s = "<" ~ tagName; + + foreach(n, v ; attributes) { + assert(n !is null); + //assert(v !is null); + s ~= " " ~ n ~ "=\"" ~ htmlEntitiesEncode(v) ~ "\""; + } + + if(selfClosed){ + s ~= " />"; + return s; + } + + s ~= ">"; + + s ~= innerHTML(); + + s ~= ""; + + assert(s !is null); + + return s; + } + + /** + Returns a lazy range of all its children, recursively. + */ + ElementStream tree() { + return new ElementStream(this); + } +} + +class DocumentFragment : Element { + this(Document _parentDocument) { + tagName = "#fragment"; + super(_parentDocument); + } + + override string toString() const { + return this.innerHTML; + } +} + +string htmlEntitiesEncode(string data) { + char[] output = "".dup; + foreach(dchar d; data) { + if(d == '&') + output ~= "&"; + else if (d == '<') + output ~= "<"; + else if (d == '>') + output ~= ">"; + else if (d == '\"') + output ~= """; + else if (d < 128 && d > 0) + output ~= d; + else + output ~= "&#" ~ std.conv.to!string(cast(int) d) ~ ";"; + } + + //assert(output !is null); // this fails on empty attributes..... + return assumeUnique(output); + +// data = data.replace("\u00a0", " "); +} + +string xmlEntitiesEncode(string data) { + return htmlEntitiesEncode(data); +} + +dchar parseEntity(in dchar[] entity) { + switch(entity[1..$-1]) { + case "quot": + return '"'; + case "apos": + return '\''; + case "lt": + return '<'; + case "gt": + return '>'; + // the next are html rather than xml + /* + case "cent": + case "pound": + case "sect": + case "deg": + case "micro" + */ + case "lsquo": + return '\u2018'; + case "rsquo": + return '\u2019'; + case "ldquo": + return '\u201c'; + case "rdquo": + return '\u201d'; + case "reg": + return '\u00ae'; + case "trade": + return '\u2122'; + case "nbsp": + return '\u00a0'; + case "amp": + return '&'; + case "copy": + return '\u00a9'; + case "eacute": + return '\u00e9'; + case "mdash": + return '\u2014'; + // and handling numeric entities + default: + if(entity[1] == '#') { + if(entity[2] == 'x' /*|| (!strict && entity[2] == 'X')*/) { + auto hex = entity[3..$-1]; + + auto p = intFromHex(to!string(hex).toLower()); + return cast(dchar) p; + } else { + auto decimal = entity[2..$-1]; + + auto p = std.conv.to!int(decimal); + return cast(dchar) p; + } + } else + return '?'; + } + + assert(0); +} + +import std.utf; + +string htmlEntitiesDecode(string data, bool strict = false) { + dchar[] a; + + bool tryingEntity = false; + dchar[] entityBeingTried; + int entityAttemptIndex = 0; + + foreach(dchar ch; data) { + if(tryingEntity) { + entityAttemptIndex++; + entityBeingTried ~= ch; + + if(ch == ';') { + tryingEntity = false; + a ~= parseEntity(entityBeingTried); + } else { + if(entityAttemptIndex >= 7) { + if(strict) + throw new Exception("unterminated entity at " ~ to!string(entityBeingTried)); + else { + tryingEntity = false; + a ~= entityBeingTried; + } + } + } + } else { + if(ch == '&') { + tryingEntity = true; + entityBeingTried = null; + entityBeingTried ~= ch; + entityAttemptIndex = 0; + } else { + a ~= ch; + } + } + } + + return std.conv.to!string(a); +} + +class RawSource : Element { + this(Document _parentDocument, string s) { + super(_parentDocument); + source = s; + tagName = "#raw"; + } + + override string nodeValue() const { + return this.toString(); + } + + override int nodeType() const { + return 100; + } + + override string toString() const { + return source; + } + + override Element appendChild(Element e) { + assert(0, "Cannot append to a text node"); + } + + + string source; +} + +enum NodeType { Text = 3} + +class TextNode : Element { + public: + this(Document _parentDocument, string e) { + super(_parentDocument); + contents = e; + tagName = "#text"; + } + + static TextNode fromUndecodedString(Document _parentDocument, string html) { + auto e = new TextNode(_parentDocument, ""); + e.contents = htmlEntitiesDecode(html, _parentDocument is null ? false : !_parentDocument.loose); + return e; + } + + override @property Element cloned() { + return new TextNode(parentDocument, contents); + } + + override string nodeValue() const { + return this.contents; //toString(); + } + + override int nodeType() const { + return NodeType.Text; + } + + override string toString() const { + string s; + if(contents.length) + s = htmlEntitiesEncode(contents); + else + s = ""; + + assert(s !is null); + return s; + } + + override Element appendChild(Element e) { + assert(0, "Cannot append to a text node"); + } + + string contents; +} + +/** + There are subclasses of Element offering improved helper + functions for the element in HTML. +*/ + +class Link : Element { + this(Document _parentDocument) { + super(_parentDocument); + } + + this(string href, string text) { + super("a"); + setAttribute("href", href); + innerText = text; + } +/+ + /// Returns everything in the href EXCEPT the query string + @property string targetSansQuery() { + + } + + @property string domainName() { + + } + + @property string path ++/ + /// This gets a variable from the URL's query string. + string getValue(string name) { + auto vars = variablesHash(); + if(name in vars) + return vars[name]; + return null; + } + + private string[string] variablesHash() { + string href = getAttribute("href"); + if(href is null) + return null; + + int ques = href.indexOf("?"); + string str = ""; + if(ques != -1) { + str = href[ques+1..$]; + + int fragment = str.indexOf("#"); + if(fragment != -1) + str = str[0..fragment]; + } + + string[] variables = str.split("&"); + + string[string] hash; + + foreach(var; variables) { + int index = var.indexOf("="); + if(index == -1) + hash[var] = ""; + else { + hash[decodeComponent(var[0..index])] = decodeComponent(var[index + 1 .. $]); + } + } + + return hash; + } + + /*private*/ void updateQueryString(string[string] vars) { + string href = getAttribute("href"); + + int question = href.indexOf("?"); + if(question != -1) + href = href[0..question]; + + string frag = ""; + int fragment = href.indexOf("#"); + if(fragment != -1) { + frag = href[fragment..$]; + href = href[0..fragment]; + } + + string query = "?"; + bool first = true; + foreach(name, value; vars) { + if(!first) + query ~= "&"; + else + first = false; + + query ~= encodeComponent(name); + if(value.length) + query ~= "=" ~ encodeComponent(value); + } + + if(query != "?") + href ~= query; + + href ~= frag; + + setAttribute("href", href); + } + + /// Sets or adds the variable with the given name to the given value + /// It automatically URI encodes the values and takes care of the ? and &. + void setValue(string name, string variable) { + auto vars = variablesHash(); + vars[name] = variable; + + updateQueryString(vars); + } + + /// Removes the given variable from the query string + void removeValue(string name) { + auto vars = variablesHash(); + vars.remove(name); + + updateQueryString(vars); + } + + /* + override string toString() { + + } + + override string getAttribute(string name) { + if(name == "href") { + + } else + return super.getAttribute(name); + } + */ +} + +class Form : Element { + this(Document _parentDocument) { + super(_parentDocument); + tagName = "form"; + } + + // FIXME: doesn't handle arrays; multiple fields can have the same name + + /// Set's the form field's value. For input boxes, this sets the value attribute. For + /// textareas, it sets the innerText. For radio boxes and select boxes, it removes + /// the checked/selected attribute from all, and adds it to the one matching the value. + /// For checkboxes, if the value is non-null and not empty, it checks the box. + + /// If you set a value that doesn't exist, it throws an exception if makeNew is false. + /// Otherwise, it makes a new input with type=hidden to keep the value. + void setValue(string field, string value, bool makeNew = true) { + auto eles = getField(field); + if(eles.length == 0) { + if(makeNew) { + addField(field, value); + return; + } else + throw new Exception("form field does not exist"); + } + + if(eles.length == 1) { + auto e = eles[0]; + switch(e.tagName) { + default: assert(0); + case "textarea": + e.innerText = value; + break; + case "input": + string type = e.getAttribute("type"); + if(type is null) { + e.value = value; + return; + } + switch(type) { + case "checkbox": + case "radio": + if(value.length) + e.setAttribute("checked", "checked"); + else + e.removeAttribute("checked"); + break; + default: + e.value = value; + return; + } + break; + case "select": + bool found = false; + foreach(child; e.tree) { + if(child.tagName != "option") + continue; + string val = child.getAttribute("value"); + if(val is null) + val = child.innerText; + if(val == value) { + child.setAttribute("selected", "selected"); + found = true; + } else + child.removeAttribute("selected"); + } + + if(!found) { + addChild("option", value) + .setAttribute("selected", "selected"); + } + break; + } + } else { + // assume radio boxes + foreach(e; eles) { + string val = e.getAttribute("value"); + //if(val is null) + // throw new Exception("don't know what to do with radio boxes with null value"); + if(val == value) + e.setAttribute("checked", "checked"); + else + e.removeAttribute("checked"); + } + } + } + + /// Gets the value of the field; what would be given if it submitted right now. (so + /// it handles select boxes and radio buttons too). For checkboxes, if a value isn't + /// given, but it is checked, it returns "checked", since null and "" are indistinguishable + string getValue(string field) { + auto eles = getField(field); + if(eles.length == 0) + return ""; + if(eles.length == 1) { + auto e = eles[0]; + switch(e.tagName) { + default: assert(0); + case "input": + if(e.type == "checkbox") { + if(e.checked) + return e.value.length ? e.value : "checked"; + return ""; + } else + return e.value; + case "textarea": + return e.innerText; + case "select": + foreach(child; e.tree) { + if(child.tagName != "option") + continue; + if(child.selected) + return child.value; + } + break; + } + } else { + // assuming radio + foreach(e; eles) { + if(e.checked) + return e.value; + } + } + + return ""; + } + + // FIXME: doesn't handle multiple elements with the same name (except radio buttons) + string getPostableData() { + bool[string] namesDone; + + string ret; + bool outputted = false; + + foreach(e; getElementsBySelector("[name]")) { + if(e.name in namesDone) + continue; + + if(outputted) + ret ~= "&"; + else + outputted = true; + + ret ~= std.uri.encodeComponent(e.name) ~ "=" ~ std.uri.encodeComponent(getValue(e.name)); + + namesDone[e.name] = true; + } + + return ret; + } + + /// Gets the actual elements with the given name + Element[] getField(string name) { + Element[] ret; + foreach(e; tree) { + if(e.name == name) + ret ~= e; + } + return ret; + } + + // Grabs the