module arsd.dom; import std.string; // import std.ascii; import std.exception; import std.uri; import std.array; import std.stdio; import arsd.characterencodings; // tag soup works for most the crap I know now! If you have two bad closing tags back to back, it might erase one, but meh // that's rarer than the flipped closing tags that hack fixes so I'm ok with it. (Odds are it should be erased anyway; it's // most likely a typo so I say kill kill kill. // Should I support Element.dataset? it does dash to camelcase for attribute "data-xxx-xxx" void sanitizeHtml(Document document) { foreach(e; document.root.tree) { } } ///. T[] insertAfter(T)(T[] arr, int position, T[] what) { assert(position < arr.length); T[] ret; ret.length = arr.length + what.length; int a = 0; foreach(i; arr[0..position+1]) ret[a++] = i; foreach(i; what) ret[a++] = i; foreach(i; arr[position+1..$]) ret[a++] = i; return ret; } ///. bool isInArray(T)(T item, T[] arr) { foreach(i; arr) if(item == i) return true; return false; } ///. final class Stack(T) { this() { internalLength = 0; arr = initialBuffer; } ///. void push(T t) { if(internalLength >= arr.length) { if(arr.length < 4096) arr = new T[arr.length * 2]; else arr = new T[arr.length + 4096]; } arr[internalLength] = t; internalLength++; } ///. T pop() { assert(internalLength); internalLength--; return arr[internalLength]; } ///. T peek() { assert(internalLength); return arr[internalLength - 1]; } ///. bool empty() { return internalLength ? false : true; } ///. private T[] arr; private size_t internalLength; private T[64] initialBuffer; // the static array is allocated with this object, so if we have a small stack (which we prolly do; dom trees usually aren't insanely deep), // using this saves us a bunch of trips to the GC. In my last profiling, I got about a 50x improvement in the push() // function thanks to this, and push() was actually one of the slowest individual functions in the code! } ///. final class ElementStream { ///. Element front() { return current.element; } ///. this(Element start) { current.element = start; current.childPosition = -1; isEmpty = false; stack = new Stack!(Current); } /* Handle it handle its children */ ///. void popFront() { more: if(isEmpty) return; // FIXME: the profiler says this function is somewhat slow (noticeable because it can be called a lot of times) current.childPosition++; if(current.childPosition >= current.element.children.length) { if(stack.empty()) isEmpty = true; else { current = stack.pop(); goto more; } } else { stack.push(current); current.element = current.element.children[current.childPosition]; current.childPosition = -1; } } ///. void currentKilled() { if(stack.empty) // should never happen isEmpty = true; else { current = stack.pop(); current.childPosition--; // when it is killed, the parent is brought back a lil so when we popFront, this is then right } } ///. bool empty() { return isEmpty; } ///. struct Current { Element element; int childPosition; } ///. Current current; ///. Stack!(Current) stack; ///. bool isEmpty; } ///. string[string] dup(in string[string] arr) { string[string] ret; foreach(k, v; arr) ret[k] = v; return ret; } /* swapNode cloneNode */ ///. class Element { ///. Element[] children; ///. string tagName; ///. string[string] attributes; ///. private bool selfClosed; /// Get the parent Document object that contains this element. /// It may be null, so remember to check for that. Document parentDocument; ///. this(Document _parentDocument, string _tagName, string[string] _attributes = null, bool _selfClosed = false) { parentDocument = _parentDocument; tagName = _tagName; if(_attributes !is null) attributes = _attributes; selfClosed = _selfClosed; } /// Removes all inner content from the tag; all child text and elements are gone. void removeAllChildren() out { assert(this.children.length == 0); } body { children = null; } ///. @property Element previousSibling(string tagName = null) { if(this.parentNode is null) return null; Element ps = null; foreach(e; this.parentNode.childNodes) { if(e is this) break; if(tagName is null || e.tagName == tagName) ps = e; } return ps; } ///. @property Element nextSibling(string tagName = null) { if(this.parentNode is null) return null; Element ns = null; bool mightBe = false; foreach(e; this.parentNode.childNodes) { if(e is this) { mightBe = true; continue; } if(mightBe) if(tagName is null || e.tagName == tagName) { ns = e; break; } } return ns; } // if you change something here, it won't apply... FIXME const? but changing it would be nice if it applies to the style attribute too though you should use style there. ///. @property CssStyle computedStyle() { if(_computedStyle is null) { auto style = this.getAttribute("style"); /* we'll treat shitty old html attributes as css here */ if(this.hasAttribute("width")) style ~= "; width: " ~ this.width; if(this.hasAttribute("height")) style ~= "; width: " ~ this.height; if(this.hasAttribute("bgcolor")) style ~= "; background-color: " ~ this.bgcolor; if(this.tagName == "body" && this.hasAttribute("text")) style ~= "; color: " ~ this.text; if(this.hasAttribute("color")) style ~= "; color: " ~ this.color; /* done */ _computedStyle = new CssStyle(null, style); // gives at least something to work with } return _computedStyle; } private CssStyle _computedStyle; /// These properties are useless in most cases, but if you write a layout engine on top of this lib, they may be good version(browser) { void* expansionHook; ///ditto int offsetWidth; ///ditto int offsetHeight; ///ditto int offsetLeft; ///ditto int offsetTop; ///ditto Element offsetParent; ///ditto bool hasLayout; ///ditto int zIndex; ///ditto ///ditto int absoluteLeft() { int a = offsetLeft; auto p = offsetParent; while(p) { a += p.offsetLeft; p = p.offsetParent; } return a; } ///ditto int absoluteTop() { int a = offsetTop; auto p = offsetParent; while(p) { a += p.offsetTop; p = p.offsetParent; } return a; } } // Back to the regular dom functions ///. @property Element cloned() out(ret) { assert(ret.children.length == this.children.length); assert(ret.tagName == this.tagName); } body { auto e = new Element(parentDocument, tagName, attributes.dup, selfClosed); foreach(child; children) { e.appendChild(child.cloned); } return e; } /// Returns the first child of this element. If it has no children, returns null. @property Element firstChild() { return children.length ? children[0] : null; } @property Element lastChild() { return children.length ? children[$ - 1] : null; } /// Convenience constructor when you don't care about the parentDocument. Note this might break things on the document. /// Note also that without a parent document, elements are always in strict, case-sensitive mode. this(string _tagName, string[string] _attributes = null) { tagName = _tagName; if(_attributes !is null) attributes = _attributes; selfClosed = tagName.isInArray(selfClosedElements); // this is meant to reserve some memory. It makes a small, but consistent improvement. //children.length = 8; //children.length = 0; } /* private this() { } */ private this(Document _parentDocument) { parentDocument = _parentDocument; } private void parseAttributes(string[] whichOnes = null) { /+ if(whichOnes is null) whichOnes = attributes.keys; foreach(attr; whichOnes) { switch(attr) { case "id": break; case "class": break; case "style": break; default: // we don't care about it } } +/ } public: /// Appends the given element to this one. The given element must not have a parent already. Element appendChild(Element e) in { assert(e !is null); assert(e.parentNode is null); } out (ret) { assert(e.parentNode is this); assert(e.parentDocument is this.parentDocument); assert(e is ret); } body { selfClosed = false; e.parentNode = this; e.parentDocument = this.parentDocument; children ~= e; return e; } /// . void appendChildren(Element[] children) { foreach(ele; children) appendChild(ele); } /// Inserts the second element to this node, right before the first param Element insertBefore(in Element where, Element what) in { assert(where !is null); assert(where.parentNode is this); assert(what !is null); assert(what.parentNode is null); } out (ret) { assert(where.parentNode is this); assert(what.parentNode is this); assert(what.parentDocument is this.parentDocument); assert(ret is what); } body { foreach(i, e; children) { if(e is where) { children = children[0..i] ~ what ~ children[i..$]; what.parentDocument = this.parentDocument; what.parentNode = this; return what; } } return what; assert(0); } ///. Element insertAfter(in Element where, Element what) in { assert(where !is null); assert(where.parentNode is this); assert(what !is null); assert(what.parentNode is null); } out (ret) { assert(where.parentNode is this); assert(what.parentNode is this); assert(what.parentDocument is this.parentDocument); assert(ret is what); } body { foreach(i, e; children) { if(e is where) { children = children[0 .. i + 1] ~ what ~ children[i + 1 .. $]; what.parentNode = this; what.parentDocument = this.parentDocument; return what; } } return what; assert(0); } /// Convenience function to try to do the right thing for HTML static Element make(string tagName, string childInfo = null, string childInfo2 = null) { bool selfClosed = tagName.isInArray(selfClosedElements); Element e; // want to create the right kind of object for the given tag... switch(tagName) { case "table": e = new Table(null); break; case "a": e = new Link(null); break; case "form": e = new Form(null); break; case "tr": e = new TableRow(null); break; case "td", "th": e = new TableCell(null, tagName); break; default: e = new Element(null, tagName, null, selfClosed); // parent document should be set elsewhere } // make sure all the stuff is constructed properly FIXME: should probably be in all the right constructors too e.tagName = tagName; e.selfClosed = selfClosed; if(childInfo !is null) switch(tagName) { /* html5 convenience tags */ case "audio": if(childInfo.length) e.addChild("source", childInfo); if(childInfo2 !is null) e.appendText(childInfo2); break; case "source": e.src = childInfo; if(childInfo2 !is null) e.type = childInfo2; break; /* regular html 4 stuff */ case "img": e.src = childInfo; if(childInfo2 !is null) e.alt = childInfo2; break; case "option": e.innerText = childInfo; if(childInfo2 !is null) e.value = childInfo2; break; case "input": e.type = "hidden"; e.name = childInfo; if(childInfo2 !is null) e.value = childInfo2; break; case "a": e.innerText = childInfo; if(childInfo2 !is null) e.href = childInfo2; break; case "script": case "style": e.innerRawSource = childInfo; break; case "meta": e.name = childInfo; if(childInfo2 !is null) e.content = childInfo2; break; /* generically, assume we were passed text and perhaps class */ default: e.innerText = childInfo; if(childInfo2.length) e.className = childInfo2; } return e; } /// convenience function to quickly add a tag with some text or /// other relevant info (for example, it's a src for an element /// instead of inner text) Element addChild(string tagName, string childInfo = null, string childInfo2 = null) in { assert(tagName !is null); } out(e) { assert(e.parentNode is this); assert(e.parentDocument is this.parentDocument); } body { auto e = Element.make(tagName, childInfo, childInfo2); return appendChild(e); } /// Convenience function to append text intermixed with other children. /// For example: div.addChildren("You can visit my website by ", new Link("mysite.com", "clicking here"), "."); /// or div.addChildren("Hello, ", user.name, "!"); /// See also: appendHtml. This might be a bit simpler though because you don't have to think about escaping. void addChildren(T...)(T t) { foreach(item; t) { static if(is(item : Element)) appendChild(item); else static if (is(isSomeString!(item))) appendText(to!string(item)); else static assert(0, "Cannot pass " ~ typeof(item).stringof ~ " to addChildren"); } } ///. Element addChild(string tagName, Element firstChild) in { assert(parentDocument !is null); assert(firstChild !is null); } out(ret) { assert(ret !is null); assert(ret.parentNode is this); assert(firstChild.parentNode is ret); assert(ret.parentDocument is this.parentDocument); assert(firstChild.parentDocument is this.parentDocument); } body { auto e = parentDocument.createElement(tagName); e.appendChild(firstChild); this.appendChild(e); return e; } Element addChild(string tagName, Html innerHtml) in { } out(ret) { assert(ret !is null); assert(ret.parentNode is this); assert(ret.parentDocument is this.parentDocument); } body { auto e = Element.make(tagName); this.appendChild(e); e.innerHTML = innerHtml.source; return e; } ///. T getParent(T)(string tagName = null) if(is(T : Element)) { if(tagName is null) { static if(is(T == Form)) tagName = "form"; else static if(is(T == Table)) tagName = "table"; else static if(is(T == Table)) tagName == "a"; } auto par = this.parentNode; while(par !is null) { if(tagName is null || par.tagName == tagName) break; par = par.parentNode; } auto t = cast(T) par; if(t is null) throw new ElementNotFoundException("", tagName ~ " parent not found"); return t; } /// swaps one child for a new thing. Returns the old child which is now parentless. Element swapNode(Element child, Element replacement) in { assert(child !is null); assert(replacement !is null); assert(child.parentNode is this); } out(ret) { assert(ret is child); assert(ret.parentNode is null); assert(replacement.parentNode is this); assert(replacement.parentDocument is this.parentDocument); } body { foreach(ref c; this.children) if(c is child) { c.parentNode = null; c = replacement; c.parentNode = this; c.parentDocument = this.parentDocument; return child; } assert(0); } ///. Element getElementById(string id) { // FIXME: I use this function a lot, and it's kinda slow // not terribly slow, but not great. foreach(e; tree) if(e.id == id) return e; return null; } ///. final SomeElementType requireElementById(SomeElementType = Element)(string id) if( is(SomeElementType : Element) ) out(ret) { assert(ret !is null); } body { auto e = cast(SomeElementType) getElementById(id); if(e is null) throw new ElementNotFoundException(SomeElementType.stringof, "id=" ~ id); return e; } ///. final SomeElementType requireSelector(SomeElementType = Element)(string selector) if( is(SomeElementType : Element) ) out(ret) { assert(ret !is null); } body { auto e = cast(SomeElementType) querySelector(selector); if(e is null) throw new ElementNotFoundException(SomeElementType.stringof, selector); return e; } /// Note: you can give multiple selectors, separated by commas. /// It will return the first match it finds. Element querySelector(string selector) { // FIXME: inefficient; it gets all results just to discard most of them auto list = getElementsBySelector(selector); if(list.length == 0) return null; return list[0]; } /// a more standards-compliant alias for getElementsBySelector Element[] querySelectorAll(string selector) { return getElementsBySelector(selector); } ///. Element[] getElementsBySelector(string selector) { // FIXME: this function could probably use some performance attention // ... but only mildly so according to the profiler in the big scheme of things; probably negligible in a big app. // POSSIBLE FIXME: this also sends attribute things to lower in the selector, // but the actual get selector check is still case sensitive... if(parentDocument && parentDocument.loose) selector = selector.toLower; Element[] ret; foreach(sel; parseSelectorString(selector)) ret ~= sel.getElements(this); return ret; } ///. Element[] getElementsByTagName(string tag) { if(parentDocument && parentDocument.loose) tag = tag.toLower(); Element[] ret; foreach(e; tree) if(e.tagName == tag) ret ~= e; return ret; } ///. Element appendText(string text) { Element e = new TextNode(parentDocument, text); return appendChild(e); } ///. @property Element[] childElements() { Element[] ret; foreach(c; children) if(c.nodeType == 1) ret ~= c; return ret; } /* Does a CSS selector * -- all, default if nothing else is there tag#id.class.class.class:pseudo[attrib=what][attrib=what] OP selector It is all additive OP space = descendant > = direct descendant + = sibling (E+F Matches any F element immediately preceded by a sibling element E) [foo] Foo is present as an attribute [foo="warning"] Matches any E element whose "foo" attribute value is exactly equal to "warning". E[foo~="warning"] Matches any E element whose "foo" attribute value is a list of space-separated values, one of which is exactly equal to "warning" E[lang|="en"] Matches any E element whose "lang" attribute has a hyphen-separated list of values beginning (from the left) with "en". [item$=sdas] ends with [item^-sdsad] begins with Quotes are optional here. Pseudos: :first-child :last-child :link (same as a[href] for our purposes here) There can be commas separating the selector. A comma separated list result is OR'd onto the main. This ONLY cares about elements. text, etc, are ignored There should be two functions: given element, does it match the selector? and given a selector, give me all the elements */ /// Appends the given html to the element, returning the elements appended Element[] appendHtml(string html) { Document d = new Document("" ~ html ~ ""); return stealChildren(d.root); } ///. Element addClass(string c) { string cn = getAttribute("class"); if(cn is null) { setAttribute("class", c); return this; } else { setAttribute("class", cn ~ " " ~ c); } return this; } ///. Element removeClass(string c) { auto cn = className; className = cn.replace(c, "").strip; return this; } ///. bool hasClass(string c) { auto cn = className; auto idx = cn.indexOf(c); if(idx == -1) return false; foreach(cla; cn.split(" ")) if(cla == c) return true; return false; /* int rightSide = idx + c.length; bool checkRight() { if(rightSide == cn.length) return true; // it's the only class else if(iswhite(cn[rightSide])) return true; return false; // this is a substring of something else.. } if(idx == 0) { return checkRight(); } else { if(!iswhite(cn[idx - 1])) return false; // substring return checkRight(); } assert(0); */ } ///. void reparent(Element newParent) in { assert(newParent !is null); assert(parentNode !is null); } out { assert(this.parentNode == newParent); assert(isInArray(this, newParent.children)); } body { parentNode.removeChild(this); newParent.appendChild(this); } ///. void insertChildAfter(Element child, Element where) in { assert(child !is null); assert(where !is null); assert(where.parentNode is this); assert(!selfClosed); assert(isInArray(where, children)); } out { assert(child.parentNode is this); assert(where.parentNode is this); assert(isInArray(where, children)); assert(isInArray(child, children)); } body { foreach(i, c; children) { if(c is where) { i++; children = children[0..i] ~ child ~ children[i..$]; child.parentNode = this; child.parentDocument = this.parentDocument; break; } } } ///. Element[] stealChildren(Element e, Element position = null) in { assert(!selfClosed); assert(e !is null); if(position !is null) assert(isInArray(position, children)); } out (ret) { assert(e.children.length == 0); debug foreach(child; ret) { assert(child.parentNode is this); assert(child.parentDocument is this.parentDocument); } } body { foreach(c; e.children) { c.parentNode = this; c.parentDocument = this.parentDocument; } if(position is null) children ~= e.children; else { foreach(i, child; children) { if(child is position) { children = children[0..i] ~ e.children ~ children[i..$]; break; } } } auto ret = e.children.dup; e.children.length = 0; return ret; } /// Puts the current element first in our children list. The given element must not have a parent already. Element prependChild(Element e) in { assert(e.parentNode is null); assert(!selfClosed); } out { assert(e.parentNode is this); assert(e.parentDocument is this.parentDocument); assert(children[0] is e); } body { e.parentNode = this; e.parentDocument = this.parentDocument; children = e ~ children; return e; } /** Provides easy access to attributes, like in javascript */ // name != "popFront" is so duck typing doesn't think it's a range string opDispatch(string name)(string v = null) if(name != "popFront") { if(v !is null) setAttribute(name, v); return getAttribute(name); } /** Returns the element's children. */ @property const(Element[]) childNodes() const { return children; } /// Mutable version of the same @property Element[] childNodes() { // FIXME: the above should be inout return children; } // should return int ///. @property int nodeType() const { return 1; } /** Returns a string containing all child elements, formatted such that it could be pasted into an XML file. */ @property string innerHTML(Appender!string where = appender!string()) const { if(children is null) return ""; auto start = where.data.length; foreach(child; children) { assert(child !is null); child.writeToAppender(where); } return where.data[start .. $]; } /** Takes some html and replaces the element's children with the tree made from the string. */ @property void innerHTML(string html) { if(html.length) selfClosed = false; if(html.length == 0) { // I often say innerHTML = ""; as a shortcut to clear it out, // so let's optimize that slightly. removeAllChildren(); return; } auto doc = new Document(); doc.parse("" ~ html ~ ""); // FIXME: this should preserve the strictness of the parent document children = doc.root.children; foreach(c; children) { c.parentNode = this; c.parentDocument = this.parentDocument; } reparentTreeDocuments(); doc.root.children = null; } /// ditto @property void innerHTML(Html html) { this.innerHTML = html.source; } private void reparentTreeDocuments() { foreach(c; this.tree) c.parentDocument = this.parentDocument; } /** Replaces this node with the given html string, which is parsed Note: this invalidates the this reference, since it is removed from the tree. Returns the new children that replace this. */ @property Element[] outerHTML(string html) { auto doc = new Document(); doc.parse("" ~ html ~ ""); // FIXME: needs to preserve the strictness children = doc.root.children; foreach(c; children) { c.parentNode = this; c.parentDocument = this.parentDocument; } reparentTreeDocuments(); stripOut(); return doc.root.children; } ///. @property string outerHTML() { return this.toString(); } ///. @property void innerRawSource(string rawSource) { children.length = 0; auto rs = new RawSource(parentDocument, rawSource); rs.parentNode = this; children ~= rs; } /** Gets the given attribute value, or null if the attribute is not set. Note that the returned string is decoded, so it no longer contains any xml entities. */ string getAttribute(string name) const { if(parentDocument && parentDocument.loose) name = name.toLower(); auto e = name in attributes; if(e) return *e; else return null; } /** Sets an attribute. Returns this for easy chaining */ Element setAttribute(string name, string value) { if(parentDocument && parentDocument.loose) name = name.toLower(); // I never use this shit legitimately and neither should you auto it = name.toLower; if(it == "href" || it == "src") { auto v = value.strip.toLower(); if(v.startsWith("vbscript:")) value = value[9..$]; if(v.startsWith("javascript:")) value = value[11..$]; } attributes[name] = value; return this; } /** Extension */ bool hasAttribute(string name) { if(parentDocument && parentDocument.loose) name = name.toLower(); if(name in attributes) return true; else return false; } /** Extension */ void removeAttribute(string name) { if(parentDocument && parentDocument.loose) name = name.toLower(); if(name in attributes) attributes.remove(name); } /** Gets the class attribute's contents. Returns an empty string if it has no class. */ string className() const { auto c = getAttribute("class"); if(c is null) return ""; return c; } ///. Element className(string c) { setAttribute("class", c); return this; } ///. string nodeValue() const { return ""; } ///. Element replaceChild(Element find, Element replace) in { assert(find !is null); assert(replace !is null); assert(replace.parentNode is null); } out(ret) { assert(ret is replace); assert(replace.parentNode is this); assert(replace.parentDocument is this.parentDocument); assert(find.parentNode is null); } body { for(int i = 0; i < children.length; i++) { if(children[i] is find) { replace.parentNode = this; children[i].parentNode = null; children[i] = replace; replace.parentDocument = this.parentDocument; return replace; } } throw new Exception("no such child"); } /** Removes the given child from this list. Returns the removed element. */ Element removeChild(Element c) in { assert(c !is null); assert(c.parentNode is this); } out { debug foreach(child; children) assert(child !is c); assert(c.parentNode is null); } body { foreach(i, e; children) { if(e is c) { children = children[0..i] ~ children [i+1..$]; c.parentNode = null; return c; } } throw new Exception("no such child"); } ///. Element[] removeChildren() out (ret) { assert(children.length == 0); debug foreach(r; ret) assert(r.parentNode is null); } body { Element[] oldChildren = children.dup; foreach(c; oldChildren) c.parentNode = null; children.length = 0; return oldChildren; } /** EXTENSION Replaces the given element with a whole group. */ void replaceChild(Element find, Element[] replace) in { assert(find !is null); assert(replace !is null); assert(find.parentNode is this); debug foreach(r; replace) assert(r.parentNode is null); } out { assert(find.parentNode is null); assert(children.length >= replace.length); debug foreach(child; children) assert(child !is find); debug foreach(r; replace) assert(r.parentNode is this); } body { if(replace.length == 0) { removeChild(find); return; } assert(replace.length); for(int i = 0; i < children.length; i++) { if(children[i] is find) { children[i].parentNode = null; // this element should now be dead children[i] = replace[0]; foreach(e; replace) { e.parentNode = this; e.parentDocument = this.parentDocument; } children = .insertAfter(children, i, replace[1..$]); return; } } throw new Exception("no such child"); } ///. Element parentNode; /** Strips this tag out of the document, putting its inner html as children of the parent. */ void stripOut() in { assert(parentNode !is null); } out { assert(parentNode is null); assert(children.length == 0); } body { foreach(c; children) c.parentNode = null; // remove the parent if(children.length) parentNode.replaceChild(this, this.children); else parentNode.removeChild(this); this.children.length = 0; // we reparented them all above } /// shorthand for this.parentNode.removeChild(this) with parentNode null check Element removeFromTree() in { } out(var) { assert(this.parentNode is null); assert(var is this); } body { if(this.parentNode is null) return this; this.parentNode.removeChild(this); return this; } /// Wraps this element inside the given element. /// It's like this.replaceWith(what); what.appendchild(this); Element wrapIn(Element what) in { assert(what !is null); } out(ret) { assert(this.parentNode is what); assert(ret is what); } body { this.replaceWith(what); what.appendChild(this); return what; } Element replaceWith(Element e) { if(e.parentNode !is null) e.parentNode.removeChild(e); this.parentNode.replaceChild(this, e); return e; } /** INCOMPATIBLE -- extension Splits the className into an array of each class given */ string[] classNames() const { return className().split(" "); } /** Fetches the first consecutive text nodes, concatenated together */ string firstInnerText() const { string s; foreach(child; children) { if(child.nodeType != NodeType.Text) break; s ~= child.nodeValue(); } return s; } /** Fetch the inside text, with all tags stripped out */ @property string innerText() const { string s; foreach(child; children) { if(child.nodeType != NodeType.Text) s ~= child.innerText; else s ~= child.nodeValue(); } return s; } /** Sets the inside text, replacing all children */ @property void innerText(string text) { selfClosed = false; Element e = new TextNode(parentDocument, text); e.parentNode = this; children = [e]; } /** Strips this node out of the document, replacing it with the given text */ @property void outerText(string text) { parentNode.replaceChild(this, new TextNode(parentDocument, text)); } /** Same result as innerText; the tag with all tags stripped out */ @property string outerText() const { return innerText(); } invariant () { if(children !is null) debug foreach(child; children) { // assert(parentNode !is null); assert(child !is null); assert(child.parentNode is this, format("%s is not a parent of %s (it thought it was %s)", tagName, child.tagName, child.parentNode is null ? "null" : child.parentNode.tagName)); assert(child !is this); assert(child !is parentNode); } /+ // only depend on parentNode's accuracy if you shuffle things around and use the top elements - where the contracts guarantee it on out if(parentNode !is null) { // if you have a parent, you should share the same parentDocument; this is appendChild()'s job auto lol = cast(TextNode) this; assert(parentDocument is parentNode.parentDocument, lol is null ? this.tagName : lol.contents); } +/ //assert(parentDocument !is null); // no more; if it is present, we use it, but it is not required // reason is so you can create these without needing a reference to the document } /** Turns the whole element, including tag, attributes, and children, into a string which could be pasted into an XML file. */ override string toString() const { return writeToAppender(); } /// This is the actual implementation used by toString. You can pass it a preallocated buffer to save some time. /// Returns the string it creates. string writeToAppender(Appender!string where = appender!string()) const { assert(tagName !is null); where.reserve((this.children.length + 1) * 512); auto start = where.data.length; where.put("<"); where.put(tagName); foreach(n, v ; attributes) { assert(n !is null); //assert(v !is null); where.put(" "); where.put(n); where.put("=\""); htmlEntitiesEncode(v, where); where.put("\""); } if(selfClosed){ where.put(" />"); return where.data[start .. $]; } where.put('>'); innerHTML(where); where.put("'); return where.data[start .. $]; } /** Returns a lazy range of all its children, recursively. */ ElementStream tree() { return new ElementStream(this); } } ///. class DocumentFragment : Element { ///. this(Document _parentDocument) { tagName = "#fragment"; super(_parentDocument); } ///. override string toString() const { return this.innerHTML; } } ///. string htmlEntitiesEncode(string data, Appender!string output = appender!string()) { // if there's no entities, we can save a lot of time by not bothering with the // decoding loop. This check cuts the net toString time by better than half in my test. // let me know if it made your tests worse though, since if you use an entity in just about // every location, the check will add time... but I suspect the average experience is like mine // since the check gives up as soon as it can anyway. bool shortcut = true; foreach(char c; data) { // non ascii chars are always higher than 127 in utf8; we'd better go to the full decoder if we see it. if(c == '<' || c == '>' || c == '"' || c == '&' || cast(uint) c > 127) { shortcut = false; // there's actual work to be done break; } } if(shortcut) { output.put(data); return data; } auto start = output.data.length; output.reserve(data.length + 64); // grab some extra space for the encoded entities foreach(dchar d; data) { if(d == '&') output.put("&"); else if (d == '<') output.put("<"); else if (d == '>') output.put(">"); else if (d == '\"') output.put("""); else if (d < 128 && d > 0) output.put(d); else output.put("&#" ~ std.conv.to!string(cast(int) d) ~ ";"); } //assert(output !is null); // this fails on empty attributes..... return output.data[start .. $]; // data = data.replace("\u00a0", " "); } ///. string xmlEntitiesEncode(string data) { return htmlEntitiesEncode(data); } ///. dchar parseEntity(in dchar[] entity) { switch(entity[1..$-1]) { case "quot": return '"'; case "apos": return '\''; case "lt": return '<'; case "gt": return '>'; // the next are html rather than xml /* case "cent": case "pound": case "sect": case "deg": case "micro" */ case "hellip": return '\u2026'; case "laquo": return '\u00ab'; case "raquo": return '\u00bb'; case "lsquo": return '\u2018'; case "rsquo": return '\u2019'; case "ldquo": return '\u201c'; case "rdquo": return '\u201d'; case "reg": return '\u00ae'; case "trade": return '\u2122'; case "nbsp": return '\u00a0'; case "amp": return '&'; case "copy": return '\u00a9'; case "eacute": return '\u00e9'; case "mdash": return '\u2014'; // and handling numeric entities default: if(entity[1] == '#') { if(entity[2] == 'x' /*|| (!strict && entity[2] == 'X')*/) { auto hex = entity[3..$-1]; auto p = intFromHex(to!string(hex).toLower()); return cast(dchar) p; } else { auto decimal = entity[2..$-1]; auto p = std.conv.to!int(decimal); return cast(dchar) p; } } else return '?'; } assert(0); } import std.utf; ///. string htmlEntitiesDecode(string data, bool strict = false) { // this check makes a *big* difference; about a 50% improvement of parse speed on my test. if(data.indexOf("&") == -1) // all html entities begin with & return data; // if there are no entities in here, we can return the original slice and save some time char[] a; // this seems to do a *better* job than appender! char[4] buffer; bool tryingEntity = false; dchar[] entityBeingTried; int entityAttemptIndex = 0; foreach(dchar ch; data) { if(tryingEntity) { entityAttemptIndex++; entityBeingTried ~= ch; if(ch == ';') { tryingEntity = false; a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried))]; } else { if(entityAttemptIndex >= 7) { if(strict) throw new Exception("unterminated entity at " ~ to!string(entityBeingTried)); else { tryingEntity = false; a ~= to!(char[])(entityBeingTried); } } } } else { if(ch == '&') { tryingEntity = true; entityBeingTried = null; entityBeingTried ~= ch; entityAttemptIndex = 0; } else { a ~= buffer[0 .. std.utf.encode(buffer, ch)]; } } } return cast(string) a; // assumeUnique is actually kinda slow, lol } ///. class RawSource : Element { ///. this(Document _parentDocument, string s) { super(_parentDocument); source = s; tagName = "#raw"; } ///. override string nodeValue() const { return this.toString(); } ///. override int nodeType() const { return 100; } ///. override string toString() const { return source; } ///. override Element appendChild(Element e) { assert(0, "Cannot append to a text node"); } ///. string source; } ///. enum NodeType { Text = 3} ///. class TextNode : Element { public: ///. this(Document _parentDocument, string e) { super(_parentDocument); contents = e; tagName = "#text"; } string opDispatch(string name)(string v = null) if(0) { return null; } // text nodes don't have attributes ///. static TextNode fromUndecodedString(Document _parentDocument, string html) { auto e = new TextNode(_parentDocument, ""); e.contents = htmlEntitiesDecode(html, _parentDocument is null ? false : !_parentDocument.loose); return e; } ///. override @property Element cloned() { return new TextNode(parentDocument, contents); } ///. override string nodeValue() const { return this.contents; //toString(); } ///. override int nodeType() const { return NodeType.Text; } ///. override string writeToAppender(Appender!string where = appender!string()) const { string s; if(contents.length) s = htmlEntitiesEncode(contents, where); else s = ""; assert(s !is null); return s; } ///. override Element appendChild(Element e) { assert(0, "Cannot append to a text node"); } ///. string contents; } /** There are subclasses of Element offering improved helper functions for the element in HTML. */ ///. class Link : Element { ///. this(Document _parentDocument) { super(_parentDocument); this.tagName = "a"; } ///. this(string href, string text) { super("a"); setAttribute("href", href); innerText = text; } /+ /// Returns everything in the href EXCEPT the query string @property string targetSansQuery() { } ///. @property string domainName() { } ///. @property string path +/ /// This gets a variable from the URL's query string. string getValue(string name) { auto vars = variablesHash(); if(name in vars) return vars[name]; return null; } private string[string] variablesHash() { string href = getAttribute("href"); if(href is null) return null; auto ques = href.indexOf("?"); string str = ""; if(ques != -1) { str = href[ques+1..$]; auto fragment = str.indexOf("#"); if(fragment != -1) str = str[0..fragment]; } string[] variables = str.split("&"); string[string] hash; foreach(var; variables) { auto index = var.indexOf("="); if(index == -1) hash[var] = ""; else { hash[decodeComponent(var[0..index])] = decodeComponent(var[index + 1 .. $]); } } return hash; } ///. /*private*/ void updateQueryString(string[string] vars) { string href = getAttribute("href"); auto question = href.indexOf("?"); if(question != -1) href = href[0..question]; string frag = ""; auto fragment = href.indexOf("#"); if(fragment != -1) { frag = href[fragment..$]; href = href[0..fragment]; } string query = "?"; bool first = true; foreach(name, value; vars) { if(!first) query ~= "&"; else first = false; query ~= encodeComponent(name); if(value.length) query ~= "=" ~ encodeComponent(value); } if(query != "?") href ~= query; href ~= frag; setAttribute("href", href); } /// Sets or adds the variable with the given name to the given value /// It automatically URI encodes the values and takes care of the ? and &. void setValue(string name, string variable) { auto vars = variablesHash(); vars[name] = variable; updateQueryString(vars); } /// Removes the given variable from the query string void removeValue(string name) { auto vars = variablesHash(); vars.remove(name); updateQueryString(vars); } /* ///. override string toString() { } ///. override string getAttribute(string name) { if(name == "href") { } else return super.getAttribute(name); } */ } ///. class Form : Element { ///. this(Document _parentDocument) { super(_parentDocument); tagName = "form"; } // FIXME: doesn't handle arrays; multiple fields can have the same name /// Set's the form field's value. For input boxes, this sets the value attribute. For /// textareas, it sets the innerText. For radio boxes and select boxes, it removes /// the checked/selected attribute from all, and adds it to the one matching the value. /// For checkboxes, if the value is non-null and not empty, it checks the box. /// If you set a value that doesn't exist, it throws an exception if makeNew is false. /// Otherwise, it makes a new input with type=hidden to keep the value. void setValue(string field, string value, bool makeNew = true) { auto eles = getField(field); if(eles.length == 0) { if(makeNew) { addField(field, value); return; } else throw new Exception("form field does not exist"); } if(eles.length == 1) { auto e = eles[0]; switch(e.tagName) { default: assert(0); case "textarea": e.innerText = value; break; case "input": string type = e.getAttribute("type"); if(type is null) { e.value = value; return; } switch(type) { case "checkbox": case "radio": if(value.length) e.setAttribute("checked", "checked"); else e.removeAttribute("checked"); break; default: e.value = value; return; } break; case "select": bool found = false; foreach(child; e.tree) { if(child.tagName != "option") continue; string val = child.getAttribute("value"); if(val is null) val = child.innerText; if(val == value) { child.setAttribute("selected", "selected"); found = true; } else child.removeAttribute("selected"); } if(!found) { e.addChild("option", value) .setAttribute("selected", "selected"); } break; } } else { // assume radio boxes foreach(e; eles) { string val = e.getAttribute("value"); //if(val is null) // throw new Exception("don't know what to do with radio boxes with null value"); if(val == value) e.setAttribute("checked", "checked"); else e.removeAttribute("checked"); } } } /// Gets the value of the field; what would be given if it submitted right now. (so /// it handles select boxes and radio buttons too). For checkboxes, if a value isn't /// given, but it is checked, it returns "checked", since null and "" are indistinguishable string getValue(string field) { auto eles = getField(field); if(eles.length == 0) return ""; if(eles.length == 1) { auto e = eles[0]; switch(e.tagName) { default: assert(0); case "input": if(e.type == "checkbox") { if(e.checked) return e.value.length ? e.value : "checked"; return ""; } else return e.value; case "textarea": return e.innerText; case "select": foreach(child; e.tree) { if(child.tagName != "option") continue; if(child.selected) return child.value; } break; } } else { // assuming radio foreach(e; eles) { if(e.checked) return e.value; } } return ""; } // FIXME: doesn't handle multiple elements with the same name (except radio buttons) ///. string getPostableData() { bool[string] namesDone; string ret; bool outputted = false; foreach(e; getElementsBySelector("[name]")) { if(e.name in namesDone) continue; if(outputted) ret ~= "&"; else outputted = true; ret ~= std.uri.encodeComponent(e.name) ~ "=" ~ std.uri.encodeComponent(getValue(e.name)); namesDone[e.name] = true; } return ret; } /// Gets the actual elements with the given name Element[] getField(string name) { Element[] ret; foreach(e; tree) { if(e.name == name) ret ~= e; } return ret; } /// Grabs the