module arsd.dom;
import std.string;
// import std.ascii;
import std.exception;
import std.uri;
import std.array;
import std.stdio;
import arsd.characterencodings;
/+
/* *
Does a printf into an html format string.
eprintf(div, "%s is awesome. %d.",
"Adam", 10);
*/
// is this even a useful idea now that I have add children and such?
void eprintf(T...)(Element parent, string format, T data) {
}
+/
// Biggest (known) fixme left for "tag soup":
....
in loose mode should close it on the second opening.
// Should I support Element.dataset? it does dash to camelcase for attribute "data-xxx-xxx"
/*
To pwn haml, it might be interesting to add a
getElementBySelectorAndMakeIfNotThere
It first does querySelector. If null, find the path that was closest to matching using
the weight rules or the left to right reading, whatever gets close.
Then make the elements so it works and return the first matching element.
virtual Element setMainPart() {} // usually does innertext but can be overridden by certain elements
The haml converter produces a mixin string that does getElementBySelectorAndMakeIfNotThere and calls
setMainPart on it. boom.
but meh
*/
void sanitizeHtml(Document document) {
foreach(e; document.root.tree) {
}
}
///.
T[] insertAfter(T)(T[] arr, int position, T[] what) {
assert(position < arr.length);
T[] ret;
ret.length = arr.length + what.length;
int a = 0;
foreach(i; arr[0..position+1])
ret[a++] = i;
foreach(i; what)
ret[a++] = i;
foreach(i; arr[position+1..$])
ret[a++] = i;
return ret;
}
///.
bool isInArray(T)(T item, T[] arr) {
foreach(i; arr)
if(item == i)
return true;
return false;
}
///.
final class Stack(T) {
this() {
internalLength = 0;
arr = initialBuffer;
}
///.
void push(T t) {
if(internalLength >= arr.length) {
if(arr.length < 4096)
arr = new T[arr.length * 2];
else
arr = new T[arr.length + 4096];
}
arr[internalLength] = t;
internalLength++;
}
///.
T pop() {
assert(internalLength);
internalLength--;
return arr[internalLength];
}
///.
T peek() {
assert(internalLength);
return arr[internalLength - 1];
}
///.
bool empty() {
return internalLength ? false : true;
}
///.
private T[] arr;
private size_t internalLength;
private T[64] initialBuffer;
// the static array is allocated with this object, so if we have a small stack (which we prolly do; dom trees usually aren't insanely deep),
// using this saves us a bunch of trips to the GC. In my last profiling, I got about a 50x improvement in the push()
// function thanks to this, and push() was actually one of the slowest individual functions in the code!
}
///.
final class ElementStream {
///.
Element front() {
return current.element;
}
///.
this(Element start) {
current.element = start;
current.childPosition = -1;
isEmpty = false;
stack = new Stack!(Current);
}
/*
Handle it
handle its children
*/
///.
void popFront() {
more:
if(isEmpty) return;
// FIXME: the profiler says this function is somewhat slow (noticeable because it can be called a lot of times)
current.childPosition++;
if(current.childPosition >= current.element.children.length) {
if(stack.empty())
isEmpty = true;
else {
current = stack.pop();
goto more;
}
} else {
stack.push(current);
current.element = current.element.children[current.childPosition];
current.childPosition = -1;
}
}
///.
void currentKilled() {
if(stack.empty) // should never happen
isEmpty = true;
else {
current = stack.pop();
current.childPosition--; // when it is killed, the parent is brought back a lil so when we popFront, this is then right
}
}
///.
bool empty() {
return isEmpty;
}
///.
struct Current {
Element element;
int childPosition;
}
///.
Current current;
///.
Stack!(Current) stack;
///.
bool isEmpty;
}
///.
string[string] dup(in string[string] arr) {
string[string] ret;
foreach(k, v; arr)
ret[k] = v;
return ret;
}
/*
swapNode
cloneNode
*/
///.
class Element {
///.
Element[] children;
///.
string tagName;
///.
string[string] attributes;
///.
private bool selfClosed;
/// Get the parent Document object that contains this element.
/// It may be null, so remember to check for that.
Document parentDocument;
///.
this(Document _parentDocument, string _tagName, string[string] _attributes = null, bool _selfClosed = false) {
parentDocument = _parentDocument;
tagName = _tagName;
if(_attributes !is null)
attributes = _attributes;
selfClosed = _selfClosed;
}
/// Removes all inner content from the tag; all child text and elements are gone.
void removeAllChildren()
out {
assert(this.children.length == 0);
}
body {
children = null;
}
///.
@property Element previousSibling(string tagName = null) {
if(this.parentNode is null)
return null;
Element ps = null;
foreach(e; this.parentNode.childNodes) {
if(e is this)
break;
if(tagName is null || e.tagName == tagName)
ps = e;
}
return ps;
}
///.
@property Element nextSibling(string tagName = null) {
if(this.parentNode is null)
return null;
Element ns = null;
bool mightBe = false;
foreach(e; this.parentNode.childNodes) {
if(e is this) {
mightBe = true;
continue;
}
if(mightBe)
if(tagName is null || e.tagName == tagName) {
ns = e;
break;
}
}
return ns;
}
// if you change something here, it won't apply... FIXME const? but changing it would be nice if it applies to the style attribute too though you should use style there.
///.
@property CssStyle computedStyle() {
if(_computedStyle is null) {
auto style = this.getAttribute("style");
/* we'll treat shitty old html attributes as css here */
if(this.hasAttribute("width"))
style ~= "; width: " ~ this.width;
if(this.hasAttribute("height"))
style ~= "; width: " ~ this.height;
if(this.hasAttribute("bgcolor"))
style ~= "; background-color: " ~ this.bgcolor;
if(this.tagName == "body" && this.hasAttribute("text"))
style ~= "; color: " ~ this.text;
if(this.hasAttribute("color"))
style ~= "; color: " ~ this.color;
/* done */
_computedStyle = new CssStyle(null, style); // gives at least something to work with
}
return _computedStyle;
}
private CssStyle _computedStyle;
/// These properties are useless in most cases, but if you write a layout engine on top of this lib, they may be good
version(browser) {
void* expansionHook; ///ditto
int offsetWidth; ///ditto
int offsetHeight; ///ditto
int offsetLeft; ///ditto
int offsetTop; ///ditto
Element offsetParent; ///ditto
bool hasLayout; ///ditto
int zIndex; ///ditto
///ditto
int absoluteLeft() {
int a = offsetLeft;
auto p = offsetParent;
while(p) {
a += p.offsetLeft;
p = p.offsetParent;
}
return a;
}
///ditto
int absoluteTop() {
int a = offsetTop;
auto p = offsetParent;
while(p) {
a += p.offsetTop;
p = p.offsetParent;
}
return a;
}
}
// Back to the regular dom functions
///.
@property Element cloned()
out(ret) {
assert(ret.children.length == this.children.length);
assert(ret.tagName == this.tagName);
}
body {
auto e = new Element(parentDocument, tagName, attributes.dup, selfClosed);
foreach(child; children) {
e.appendChild(child.cloned);
}
return e;
}
/// Returns the first child of this element. If it has no children, returns null.
@property Element firstChild() {
return children.length ? children[0] : null;
}
@property Element lastChild() {
return children.length ? children[$ - 1] : null;
}
/// Convenience constructor when you don't care about the parentDocument. Note this might break things on the document.
/// Note also that without a parent document, elements are always in strict, case-sensitive mode.
this(string _tagName, string[string] _attributes = null) {
tagName = _tagName;
if(_attributes !is null)
attributes = _attributes;
selfClosed = tagName.isInArray(selfClosedElements);
// this is meant to reserve some memory. It makes a small, but consistent improvement.
//children.length = 8;
//children.length = 0;
}
/*
private this() {
}
*/
private this(Document _parentDocument) {
parentDocument = _parentDocument;
}
private void parseAttributes(string[] whichOnes = null) {
/+
if(whichOnes is null)
whichOnes = attributes.keys;
foreach(attr; whichOnes) {
switch(attr) {
case "id":
break;
case "class":
break;
case "style":
break;
default:
// we don't care about it
}
}
+/
}
public:
/// Appends the given element to this one. The given element must not have a parent already.
Element appendChild(Element e)
in {
assert(e !is null);
assert(e.parentNode is null);
}
out (ret) {
assert(e.parentNode is this);
assert(e.parentDocument is this.parentDocument);
assert(e is ret);
}
body {
selfClosed = false;
e.parentNode = this;
e.parentDocument = this.parentDocument;
children ~= e;
return e;
}
/// .
void appendChildren(Element[] children) {
foreach(ele; children)
appendChild(ele);
}
/// Inserts the second element to this node, right before the first param
Element insertBefore(in Element where, Element what)
in {
assert(where !is null);
assert(where.parentNode is this);
assert(what !is null);
assert(what.parentNode is null);
}
out (ret) {
assert(where.parentNode is this);
assert(what.parentNode is this);
assert(what.parentDocument is this.parentDocument);
assert(ret is what);
}
body {
foreach(i, e; children) {
if(e is where) {
children = children[0..i] ~ what ~ children[i..$];
what.parentDocument = this.parentDocument;
what.parentNode = this;
return what;
}
}
return what;
assert(0);
}
///.
Element insertAfter(in Element where, Element what)
in {
assert(where !is null);
assert(where.parentNode is this);
assert(what !is null);
assert(what.parentNode is null);
}
out (ret) {
assert(where.parentNode is this);
assert(what.parentNode is this);
assert(what.parentDocument is this.parentDocument);
assert(ret is what);
}
body {
foreach(i, e; children) {
if(e is where) {
children = children[0 .. i + 1] ~ what ~ children[i + 1 .. $];
what.parentNode = this;
what.parentDocument = this.parentDocument;
return what;
}
}
return what;
assert(0);
}
/// Convenience function to try to do the right thing for HTML
static Element make(string tagName, string childInfo = null, string childInfo2 = null) {
bool selfClosed = tagName.isInArray(selfClosedElements);
Element e;
// want to create the right kind of object for the given tag...
switch(tagName) {
case "table":
e = new Table(null);
break;
case "a":
e = new Link(null);
break;
case "form":
e = new Form(null);
break;
case "tr":
e = new TableRow(null);
break;
case "td", "th":
e = new TableCell(null, tagName);
break;
default:
e = new Element(null, tagName, null, selfClosed); // parent document should be set elsewhere
}
// make sure all the stuff is constructed properly FIXME: should probably be in all the right constructors too
e.tagName = tagName;
e.selfClosed = selfClosed;
if(childInfo !is null)
switch(tagName) {
/* html5 convenience tags */
case "audio":
if(childInfo.length)
e.addChild("source", childInfo);
if(childInfo2 !is null)
e.appendText(childInfo2);
break;
case "source":
e.src = childInfo;
if(childInfo2 !is null)
e.type = childInfo2;
break;
/* regular html 4 stuff */
case "img":
e.src = childInfo;
if(childInfo2 !is null)
e.alt = childInfo2;
break;
case "option":
e.innerText = childInfo;
if(childInfo2 !is null)
e.value = childInfo2;
break;
case "input":
e.type = "hidden";
e.name = childInfo;
if(childInfo2 !is null)
e.value = childInfo2;
break;
case "a":
e.innerText = childInfo;
if(childInfo2 !is null)
e.href = childInfo2;
break;
case "script":
case "style":
e.innerRawSource = childInfo;
break;
case "meta":
e.name = childInfo;
if(childInfo2 !is null)
e.content = childInfo2;
break;
/* generically, assume we were passed text and perhaps class */
default:
e.innerText = childInfo;
if(childInfo2.length)
e.className = childInfo2;
}
return e;
}
/// convenience function to quickly add a tag with some text or
/// other relevant info (for example, it's a src for an element
/// instead of inner text)
Element addChild(string tagName, string childInfo = null, string childInfo2 = null)
in {
assert(tagName !is null);
}
out(e) {
assert(e.parentNode is this);
assert(e.parentDocument is this.parentDocument);
}
body {
auto e = Element.make(tagName, childInfo, childInfo2);
return appendChild(e);
}
/// Convenience function to append text intermixed with other children.
/// For example: div.addChildren("You can visit my website by ", new Link("mysite.com", "clicking here"), ".");
/// or div.addChildren("Hello, ", user.name, "!");
/// See also: appendHtml. This might be a bit simpler though because you don't have to think about escaping.
void addChildren(T...)(T t) {
foreach(item; t) {
static if(is(item : Element))
appendChild(item);
else static if (is(isSomeString!(item)))
appendText(to!string(item));
else static assert(0, "Cannot pass " ~ typeof(item).stringof ~ " to addChildren");
}
}
///.
Element addChild(string tagName, Element firstChild)
in {
assert(parentDocument !is null);
assert(firstChild !is null);
}
out(ret) {
assert(ret !is null);
assert(ret.parentNode is this);
assert(firstChild.parentNode is ret);
assert(ret.parentDocument is this.parentDocument);
assert(firstChild.parentDocument is this.parentDocument);
}
body {
auto e = parentDocument.createElement(tagName);
e.appendChild(firstChild);
this.appendChild(e);
return e;
}
Element addChild(string tagName, Html innerHtml)
in {
}
out(ret) {
assert(ret !is null);
assert(ret.parentNode is this);
assert(ret.parentDocument is this.parentDocument);
}
body {
auto e = Element.make(tagName);
this.appendChild(e);
e.innerHTML = innerHtml.source;
return e;
}
///.
T getParent(T)(string tagName = null) if(is(T : Element)) {
if(tagName is null) {
static if(is(T == Form))
tagName = "form";
else static if(is(T == Table))
tagName = "table";
else static if(is(T == Table))
tagName == "a";
}
auto par = this.parentNode;
while(par !is null) {
if(tagName is null || par.tagName == tagName)
break;
par = par.parentNode;
}
auto t = cast(T) par;
if(t is null)
throw new ElementNotFoundException("", tagName ~ " parent not found");
return t;
}
/// swaps one child for a new thing. Returns the old child which is now parentless.
Element swapNode(Element child, Element replacement)
in {
assert(child !is null);
assert(replacement !is null);
assert(child.parentNode is this);
}
out(ret) {
assert(ret is child);
assert(ret.parentNode is null);
assert(replacement.parentNode is this);
assert(replacement.parentDocument is this.parentDocument);
}
body {
foreach(ref c; this.children)
if(c is child) {
c.parentNode = null;
c = replacement;
c.parentNode = this;
c.parentDocument = this.parentDocument;
return child;
}
assert(0);
}
///.
Element getElementById(string id) {
// FIXME: I use this function a lot, and it's kinda slow
// not terribly slow, but not great.
foreach(e; tree)
if(e.id == id)
return e;
return null;
}
///.
final SomeElementType requireElementById(SomeElementType = Element)(string id)
if(
is(SomeElementType : Element)
)
out(ret) {
assert(ret !is null);
}
body {
auto e = cast(SomeElementType) getElementById(id);
if(e is null)
throw new ElementNotFoundException(SomeElementType.stringof, "id=" ~ id);
return e;
}
///.
final SomeElementType requireSelector(SomeElementType = Element)(string selector)
if(
is(SomeElementType : Element)
)
out(ret) {
assert(ret !is null);
}
body {
auto e = cast(SomeElementType) querySelector(selector);
if(e is null)
throw new ElementNotFoundException(SomeElementType.stringof, selector);
return e;
}
/// Note: you can give multiple selectors, separated by commas.
/// It will return the first match it finds.
Element querySelector(string selector) {
// FIXME: inefficient; it gets all results just to discard most of them
auto list = getElementsBySelector(selector);
if(list.length == 0)
return null;
return list[0];
}
/// a more standards-compliant alias for getElementsBySelector
Element[] querySelectorAll(string selector) {
return getElementsBySelector(selector);
}
///.
Element[] getElementsBySelector(string selector) {
// FIXME: this function could probably use some performance attention
// ... but only mildly so according to the profiler in the big scheme of things; probably negligible in a big app.
// POSSIBLE FIXME: this also sends attribute things to lower in the selector,
// but the actual get selector check is still case sensitive...
if(parentDocument && parentDocument.loose)
selector = selector.toLower;
Element[] ret;
foreach(sel; parseSelectorString(selector))
ret ~= sel.getElements(this);
return ret;
}
///.
Element[] getElementsByTagName(string tag) {
if(parentDocument && parentDocument.loose)
tag = tag.toLower();
Element[] ret;
foreach(e; tree)
if(e.tagName == tag)
ret ~= e;
return ret;
}
///.
Element appendText(string text) {
Element e = new TextNode(parentDocument, text);
return appendChild(e);
}
///.
@property Element[] childElements() {
Element[] ret;
foreach(c; children)
if(c.nodeType == 1)
ret ~= c;
return ret;
}
/*
Does a CSS selector
* -- all, default if nothing else is there
tag#id.class.class.class:pseudo[attrib=what][attrib=what] OP selector
It is all additive
OP
space = descendant
> = direct descendant
+ = sibling (E+F Matches any F element immediately preceded by a sibling element E)
[foo] Foo is present as an attribute
[foo="warning"] Matches any E element whose "foo" attribute value is exactly equal to "warning".
E[foo~="warning"] Matches any E element whose "foo" attribute value is a list of space-separated values, one of which is exactly equal to "warning"
E[lang|="en"] Matches any E element whose "lang" attribute has a hyphen-separated list of values beginning (from the left) with "en".
[item$=sdas] ends with
[item^-sdsad] begins with
Quotes are optional here.
Pseudos:
:first-child
:last-child
:link (same as a[href] for our purposes here)
There can be commas separating the selector. A comma separated list result is OR'd onto the main.
This ONLY cares about elements. text, etc, are ignored
There should be two functions: given element, does it match the selector? and given a selector, give me all the elements
*/
/// Appends the given html to the element, returning the elements appended
Element[] appendHtml(string html) {
Document d = new Document("" ~ html ~ "");
return stealChildren(d.root);
}
///.
Element addClass(string c) {
string cn = getAttribute("class");
if(cn is null) {
setAttribute("class", c);
return this;
} else {
setAttribute("class", cn ~ " " ~ c);
}
return this;
}
///.
Element removeClass(string c) {
auto cn = className;
className = cn.replace(c, "").strip;
return this;
}
///.
bool hasClass(string c) {
auto cn = className;
auto idx = cn.indexOf(c);
if(idx == -1)
return false;
foreach(cla; cn.split(" "))
if(cla == c)
return true;
return false;
/*
int rightSide = idx + c.length;
bool checkRight() {
if(rightSide == cn.length)
return true; // it's the only class
else if(iswhite(cn[rightSide]))
return true;
return false; // this is a substring of something else..
}
if(idx == 0) {
return checkRight();
} else {
if(!iswhite(cn[idx - 1]))
return false; // substring
return checkRight();
}
assert(0);
*/
}
///.
void reparent(Element newParent)
in {
assert(newParent !is null);
assert(parentNode !is null);
}
out {
assert(this.parentNode == newParent);
assert(isInArray(this, newParent.children));
}
body {
parentNode.removeChild(this);
newParent.appendChild(this);
}
///.
void insertChildAfter(Element child, Element where)
in {
assert(child !is null);
assert(where !is null);
assert(where.parentNode is this);
assert(!selfClosed);
assert(isInArray(where, children));
}
out {
assert(child.parentNode is this);
assert(where.parentNode is this);
assert(isInArray(where, children));
assert(isInArray(child, children));
}
body {
foreach(i, c; children) {
if(c is where) {
i++;
children = children[0..i] ~ child ~ children[i..$];
child.parentNode = this;
child.parentDocument = this.parentDocument;
break;
}
}
}
///.
Element[] stealChildren(Element e, Element position = null)
in {
assert(!selfClosed);
assert(e !is null);
if(position !is null)
assert(isInArray(position, children));
}
out (ret) {
assert(e.children.length == 0);
debug foreach(child; ret) {
assert(child.parentNode is this);
assert(child.parentDocument is this.parentDocument);
}
}
body {
foreach(c; e.children) {
c.parentNode = this;
c.parentDocument = this.parentDocument;
}
if(position is null)
children ~= e.children;
else {
foreach(i, child; children) {
if(child is position) {
children = children[0..i] ~
e.children ~
children[i..$];
break;
}
}
}
auto ret = e.children.dup;
e.children.length = 0;
return ret;
}
/// Puts the current element first in our children list. The given element must not have a parent already.
Element prependChild(Element e)
in {
assert(e.parentNode is null);
assert(!selfClosed);
}
out {
assert(e.parentNode is this);
assert(e.parentDocument is this.parentDocument);
assert(children[0] is e);
}
body {
e.parentNode = this;
e.parentDocument = this.parentDocument;
children = e ~ children;
return e;
}
/**
Provides easy access to attributes, like in javascript
*/
// name != "popFront" is so duck typing doesn't think it's a range
string opDispatch(string name)(string v = null) if(name != "popFront") {
if(v !is null)
setAttribute(name, v);
return getAttribute(name);
}
/**
Returns the element's children.
*/
@property const(Element[]) childNodes() const {
return children;
}
/// Mutable version of the same
@property Element[] childNodes() { // FIXME: the above should be inout
return children;
}
// should return int
///.
@property int nodeType() const {
return 1;
}
/**
Returns a string containing all child elements, formatted such that it could be pasted into
an XML file.
*/
@property string innerHTML(Appender!string where = appender!string()) const {
if(children is null)
return "";
auto start = where.data.length;
foreach(child; children) {
assert(child !is null);
child.writeToAppender(where);
}
return where.data[start .. $];
}
/**
Takes some html and replaces the element's children with the tree made from the string.
*/
@property void innerHTML(string html) {
if(html.length)
selfClosed = false;
if(html.length == 0) {
// I often say innerHTML = ""; as a shortcut to clear it out,
// so let's optimize that slightly.
removeAllChildren();
return;
}
auto doc = new Document();
doc.parse("" ~ html ~ ""); // FIXME: this should preserve the strictness of the parent document
children = doc.root.children;
foreach(c; children) {
c.parentNode = this;
c.parentDocument = this.parentDocument;
}
reparentTreeDocuments();
doc.root.children = null;
}
/// ditto
@property void innerHTML(Html html) {
this.innerHTML = html.source;
}
private void reparentTreeDocuments() {
foreach(c; this.tree)
c.parentDocument = this.parentDocument;
}
/**
Replaces this node with the given html string, which is parsed
Note: this invalidates the this reference, since it is removed
from the tree.
Returns the new children that replace this.
*/
@property Element[] outerHTML(string html) {
auto doc = new Document();
doc.parse("" ~ html ~ ""); // FIXME: needs to preserve the strictness
children = doc.root.children;
foreach(c; children) {
c.parentNode = this;
c.parentDocument = this.parentDocument;
}
reparentTreeDocuments();
stripOut();
return doc.root.children;
}
///.
@property string outerHTML() {
return this.toString();
}
///.
@property void innerRawSource(string rawSource) {
children.length = 0;
auto rs = new RawSource(parentDocument, rawSource);
rs.parentNode = this;
children ~= rs;
}
/**
Gets the given attribute value, or null if the
attribute is not set.
Note that the returned string is decoded, so it no longer contains any xml entities.
*/
string getAttribute(string name) const {
if(parentDocument && parentDocument.loose)
name = name.toLower();
auto e = name in attributes;
if(e)
return *e;
else
return null;
}
/**
Sets an attribute. Returns this for easy chaining
*/
Element setAttribute(string name, string value) {
if(parentDocument && parentDocument.loose)
name = name.toLower();
// I never use this shit legitimately and neither should you
auto it = name.toLower;
if(it == "href" || it == "src") {
auto v = value.strip.toLower();
if(v.startsWith("vbscript:"))
value = value[9..$];
if(v.startsWith("javascript:"))
value = value[11..$];
}
attributes[name] = value;
return this;
}
/**
Extension
*/
bool hasAttribute(string name) {
if(parentDocument && parentDocument.loose)
name = name.toLower();
if(name in attributes)
return true;
else
return false;
}
/**
Extension
*/
void removeAttribute(string name) {
if(parentDocument && parentDocument.loose)
name = name.toLower();
if(name in attributes)
attributes.remove(name);
}
/**
Gets the class attribute's contents. Returns
an empty string if it has no class.
*/
string className() const {
auto c = getAttribute("class");
if(c is null)
return "";
return c;
}
///.
Element className(string c) {
setAttribute("class", c);
return this;
}
///.
string nodeValue() const {
return "";
}
///.
Element replaceChild(Element find, Element replace)
in {
assert(find !is null);
assert(replace !is null);
assert(replace.parentNode is null);
}
out(ret) {
assert(ret is replace);
assert(replace.parentNode is this);
assert(replace.parentDocument is this.parentDocument);
assert(find.parentNode is null);
}
body {
for(int i = 0; i < children.length; i++) {
if(children[i] is find) {
replace.parentNode = this;
children[i].parentNode = null;
children[i] = replace;
replace.parentDocument = this.parentDocument;
return replace;
}
}
throw new Exception("no such child");
}
/**
Removes the given child from this list.
Returns the removed element.
*/
Element removeChild(Element c)
in {
assert(c !is null);
assert(c.parentNode is this);
}
out {
debug foreach(child; children)
assert(child !is c);
assert(c.parentNode is null);
}
body {
foreach(i, e; children) {
if(e is c) {
children = children[0..i] ~ children [i+1..$];
c.parentNode = null;
return c;
}
}
throw new Exception("no such child");
}
///.
Element[] removeChildren()
out (ret) {
assert(children.length == 0);
debug foreach(r; ret)
assert(r.parentNode is null);
}
body {
Element[] oldChildren = children.dup;
foreach(c; oldChildren)
c.parentNode = null;
children.length = 0;
return oldChildren;
}
/**
EXTENSION
Replaces the given element with a whole group.
*/
void replaceChild(Element find, Element[] replace)
in {
assert(find !is null);
assert(replace !is null);
assert(find.parentNode is this);
debug foreach(r; replace)
assert(r.parentNode is null);
}
out {
assert(find.parentNode is null);
assert(children.length >= replace.length);
debug foreach(child; children)
assert(child !is find);
debug foreach(r; replace)
assert(r.parentNode is this);
}
body {
if(replace.length == 0) {
removeChild(find);
return;
}
assert(replace.length);
for(int i = 0; i < children.length; i++) {
if(children[i] is find) {
children[i].parentNode = null; // this element should now be dead
children[i] = replace[0];
foreach(e; replace) {
e.parentNode = this;
e.parentDocument = this.parentDocument;
}
children = .insertAfter(children, i, replace[1..$]);
return;
}
}
throw new Exception("no such child");
}
///.
Element parentNode;
/**
Strips this tag out of the document, putting its inner html
as children of the parent.
*/
void stripOut()
in {
assert(parentNode !is null);
}
out {
assert(parentNode is null);
assert(children.length == 0);
}
body {
foreach(c; children)
c.parentNode = null; // remove the parent
if(children.length)
parentNode.replaceChild(this, this.children);
else
parentNode.removeChild(this);
this.children.length = 0; // we reparented them all above
}
/// shorthand for this.parentNode.removeChild(this) with parentNode null check
Element removeFromTree()
in {
}
out(var) {
assert(this.parentNode is null);
assert(var is this);
}
body {
if(this.parentNode is null)
return this;
this.parentNode.removeChild(this);
return this;
}
/// Wraps this element inside the given element.
/// It's like this.replaceWith(what); what.appendchild(this);
Element wrapIn(Element what)
in {
assert(what !is null);
}
out(ret) {
assert(this.parentNode is what);
assert(ret is what);
}
body {
this.replaceWith(what);
what.appendChild(this);
return what;
}
Element replaceWith(Element e) {
if(e.parentNode !is null)
e.parentNode.removeChild(e);
this.parentNode.replaceChild(this, e);
return e;
}
/**
INCOMPATIBLE -- extension
Splits the className into an array of each class given
*/
string[] classNames() const {
return className().split(" ");
}
/**
Fetches the first consecutive text nodes, concatenated together
*/
string firstInnerText() const {
string s;
foreach(child; children) {
if(child.nodeType != NodeType.Text)
break;
s ~= child.nodeValue();
}
return s;
}
/**
Fetch the inside text, with all tags stripped out
*/
@property string innerText() const {
string s;
foreach(child; children) {
if(child.nodeType != NodeType.Text)
s ~= child.innerText;
else
s ~= child.nodeValue();
}
return s;
}
/**
Sets the inside text, replacing all children
*/
@property void innerText(string text) {
selfClosed = false;
Element e = new TextNode(parentDocument, text);
e.parentNode = this;
children = [e];
}
/**
Strips this node out of the document, replacing it with the given text
*/
@property void outerText(string text) {
parentNode.replaceChild(this, new TextNode(parentDocument, text));
}
/**
Same result as innerText; the tag with all tags stripped out
*/
@property string outerText() const {
return innerText();
}
invariant () {
if(children !is null)
debug foreach(child; children) {
// assert(parentNode !is null);
assert(child !is null);
assert(child.parentNode is this, format("%s is not a parent of %s (it thought it was %s)", tagName, child.tagName, child.parentNode is null ? "null" : child.parentNode.tagName));
assert(child !is this);
assert(child !is parentNode);
}
/+ // only depend on parentNode's accuracy if you shuffle things around and use the top elements - where the contracts guarantee it on out
if(parentNode !is null) {
// if you have a parent, you should share the same parentDocument; this is appendChild()'s job
auto lol = cast(TextNode) this;
assert(parentDocument is parentNode.parentDocument, lol is null ? this.tagName : lol.contents);
}
+/
//assert(parentDocument !is null); // no more; if it is present, we use it, but it is not required
// reason is so you can create these without needing a reference to the document
}
/**
Turns the whole element, including tag, attributes, and children, into a string which could be pasted into
an XML file.
*/
override string toString() const {
return writeToAppender();
}
/// This is the actual implementation used by toString. You can pass it a preallocated buffer to save some time.
/// Returns the string it creates.
string writeToAppender(Appender!string where = appender!string()) const {
assert(tagName !is null);
where.reserve((this.children.length + 1) * 512);
auto start = where.data.length;
where.put("<");
where.put(tagName);
foreach(n, v ; attributes) {
assert(n !is null);
//assert(v !is null);
where.put(" ");
where.put(n);
where.put("=\"");
htmlEntitiesEncode(v, where);
where.put("\"");
}
if(selfClosed){
where.put(" />");
return where.data[start .. $];
}
where.put('>');
innerHTML(where);
where.put("");
where.put(tagName);
where.put('>');
return where.data[start .. $];
}
/**
Returns a lazy range of all its children, recursively.
*/
ElementStream tree() {
return new ElementStream(this);
}
}
///.
class DocumentFragment : Element {
///.
this(Document _parentDocument) {
tagName = "#fragment";
super(_parentDocument);
}
///.
override string toString() const {
return this.innerHTML;
}
}
///.
string htmlEntitiesEncode(string data, Appender!string output = appender!string()) {
// if there's no entities, we can save a lot of time by not bothering with the
// decoding loop. This check cuts the net toString time by better than half in my test.
// let me know if it made your tests worse though, since if you use an entity in just about
// every location, the check will add time... but I suspect the average experience is like mine
// since the check gives up as soon as it can anyway.
bool shortcut = true;
foreach(char c; data) {
// non ascii chars are always higher than 127 in utf8; we'd better go to the full decoder if we see it.
if(c == '<' || c == '>' || c == '"' || c == '&' || cast(uint) c > 127) {
shortcut = false; // there's actual work to be done
break;
}
}
if(shortcut) {
output.put(data);
return data;
}
auto start = output.data.length;
output.reserve(data.length + 64); // grab some extra space for the encoded entities
foreach(dchar d; data) {
if(d == '&')
output.put("&");
else if (d == '<')
output.put("<");
else if (d == '>')
output.put(">");
else if (d == '\"')
output.put(""");
else if (d < 128 && d > 0)
output.put(d);
else
output.put("" ~ std.conv.to!string(cast(int) d) ~ ";");
}
//assert(output !is null); // this fails on empty attributes.....
return output.data[start .. $];
// data = data.replace("\u00a0", " ");
}
///.
string xmlEntitiesEncode(string data) {
return htmlEntitiesEncode(data);
}
///.
dchar parseEntity(in dchar[] entity) {
switch(entity[1..$-1]) {
case "quot":
return '"';
case "apos":
return '\'';
case "lt":
return '<';
case "gt":
return '>';
// the next are html rather than xml
/*
case "cent":
case "pound":
case "sect":
case "deg":
case "micro"
*/
case "hellip":
return '\u2026';
case "laquo":
return '\u00ab';
case "raquo":
return '\u00bb';
case "lsquo":
return '\u2018';
case "rsquo":
return '\u2019';
case "ldquo":
return '\u201c';
case "rdquo":
return '\u201d';
case "reg":
return '\u00ae';
case "trade":
return '\u2122';
case "nbsp":
return '\u00a0';
case "amp":
return '&';
case "copy":
return '\u00a9';
case "eacute":
return '\u00e9';
case "mdash":
return '\u2014';
// and handling numeric entities
default:
if(entity[1] == '#') {
if(entity[2] == 'x' /*|| (!strict && entity[2] == 'X')*/) {
auto hex = entity[3..$-1];
auto p = intFromHex(to!string(hex).toLower());
return cast(dchar) p;
} else {
auto decimal = entity[2..$-1];
auto p = std.conv.to!int(decimal);
return cast(dchar) p;
}
} else
return '?';
}
assert(0);
}
import std.utf;
///.
string htmlEntitiesDecode(string data, bool strict = false) {
// this check makes a *big* difference; about a 50% improvement of parse speed on my test.
if(data.indexOf("&") == -1) // all html entities begin with &
return data; // if there are no entities in here, we can return the original slice and save some time
char[] a; // this seems to do a *better* job than appender!
char[4] buffer;
bool tryingEntity = false;
dchar[] entityBeingTried;
int entityAttemptIndex = 0;
foreach(dchar ch; data) {
if(tryingEntity) {
entityAttemptIndex++;
entityBeingTried ~= ch;
if(ch == ';') {
tryingEntity = false;
a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried))];
} else {
if(entityAttemptIndex >= 7) {
if(strict)
throw new Exception("unterminated entity at " ~ to!string(entityBeingTried));
else {
tryingEntity = false;
a ~= to!(char[])(entityBeingTried);
}
}
}
} else {
if(ch == '&') {
tryingEntity = true;
entityBeingTried = null;
entityBeingTried ~= ch;
entityAttemptIndex = 0;
} else {
a ~= buffer[0 .. std.utf.encode(buffer, ch)];
}
}
}
return cast(string) a; // assumeUnique is actually kinda slow, lol
}
///.
class RawSource : Element {
///.
this(Document _parentDocument, string s) {
super(_parentDocument);
source = s;
tagName = "#raw";
}
///.
override string nodeValue() const {
return this.toString();
}
///.
override int nodeType() const {
return 100;
}
///.
override string toString() const {
return source;
}
///.
override Element appendChild(Element e) {
assert(0, "Cannot append to a text node");
}
///.
string source;
}
///.
enum NodeType { Text = 3}
///.
class TextNode : Element {
public:
///.
this(Document _parentDocument, string e) {
super(_parentDocument);
contents = e;
tagName = "#text";
}
string opDispatch(string name)(string v = null) if(0) { return null; } // text nodes don't have attributes
///.
static TextNode fromUndecodedString(Document _parentDocument, string html) {
auto e = new TextNode(_parentDocument, "");
e.contents = htmlEntitiesDecode(html, _parentDocument is null ? false : !_parentDocument.loose);
return e;
}
///.
override @property Element cloned() {
return new TextNode(parentDocument, contents);
}
///.
override string nodeValue() const {
return this.contents; //toString();
}
///.
override int nodeType() const {
return NodeType.Text;
}
///.
override string writeToAppender(Appender!string where = appender!string()) const {
string s;
if(contents.length)
s = htmlEntitiesEncode(contents, where);
else
s = "";
assert(s !is null);
return s;
}
///.
override Element appendChild(Element e) {
assert(0, "Cannot append to a text node");
}
///.
string contents;
}
/**
There are subclasses of Element offering improved helper
functions for the element in HTML.
*/
///.
class Link : Element {
///.
this(Document _parentDocument) {
super(_parentDocument);
this.tagName = "a";
}
///.
this(string href, string text) {
super("a");
setAttribute("href", href);
innerText = text;
}
/+
/// Returns everything in the href EXCEPT the query string
@property string targetSansQuery() {
}
///.
@property string domainName() {
}
///.
@property string path
+/
/// This gets a variable from the URL's query string.
string getValue(string name) {
auto vars = variablesHash();
if(name in vars)
return vars[name];
return null;
}
private string[string] variablesHash() {
string href = getAttribute("href");
if(href is null)
return null;
auto ques = href.indexOf("?");
string str = "";
if(ques != -1) {
str = href[ques+1..$];
auto fragment = str.indexOf("#");
if(fragment != -1)
str = str[0..fragment];
}
string[] variables = str.split("&");
string[string] hash;
foreach(var; variables) {
auto index = var.indexOf("=");
if(index == -1)
hash[var] = "";
else {
hash[decodeComponent(var[0..index])] = decodeComponent(var[index + 1 .. $]);
}
}
return hash;
}
///.
/*private*/ void updateQueryString(string[string] vars) {
string href = getAttribute("href");
auto question = href.indexOf("?");
if(question != -1)
href = href[0..question];
string frag = "";
auto fragment = href.indexOf("#");
if(fragment != -1) {
frag = href[fragment..$];
href = href[0..fragment];
}
string query = "?";
bool first = true;
foreach(name, value; vars) {
if(!first)
query ~= "&";
else
first = false;
query ~= encodeComponent(name);
if(value.length)
query ~= "=" ~ encodeComponent(value);
}
if(query != "?")
href ~= query;
href ~= frag;
setAttribute("href", href);
}
/// Sets or adds the variable with the given name to the given value
/// It automatically URI encodes the values and takes care of the ? and &.
void setValue(string name, string variable) {
auto vars = variablesHash();
vars[name] = variable;
updateQueryString(vars);
}
/// Removes the given variable from the query string
void removeValue(string name) {
auto vars = variablesHash();
vars.remove(name);
updateQueryString(vars);
}
/*
///.
override string toString() {
}
///.
override string getAttribute(string name) {
if(name == "href") {
} else
return super.getAttribute(name);
}
*/
}
///.
class Form : Element {
///.
this(Document _parentDocument) {
super(_parentDocument);
tagName = "form";
}
// FIXME: doesn't handle arrays; multiple fields can have the same name
/// Set's the form field's value. For input boxes, this sets the value attribute. For
/// textareas, it sets the innerText. For radio boxes and select boxes, it removes
/// the checked/selected attribute from all, and adds it to the one matching the value.
/// For checkboxes, if the value is non-null and not empty, it checks the box.
/// If you set a value that doesn't exist, it throws an exception if makeNew is false.
/// Otherwise, it makes a new input with type=hidden to keep the value.
void setValue(string field, string value, bool makeNew = true) {
auto eles = getField(field);
if(eles.length == 0) {
if(makeNew) {
addField(field, value);
return;
} else
throw new Exception("form field does not exist");
}
if(eles.length == 1) {
auto e = eles[0];
switch(e.tagName) {
default: assert(0);
case "textarea":
e.innerText = value;
break;
case "input":
string type = e.getAttribute("type");
if(type is null) {
e.value = value;
return;
}
switch(type) {
case "checkbox":
case "radio":
if(value.length)
e.setAttribute("checked", "checked");
else
e.removeAttribute("checked");
break;
default:
e.value = value;
return;
}
break;
case "select":
bool found = false;
foreach(child; e.tree) {
if(child.tagName != "option")
continue;
string val = child.getAttribute("value");
if(val is null)
val = child.innerText;
if(val == value) {
child.setAttribute("selected", "selected");
found = true;
} else
child.removeAttribute("selected");
}
if(!found) {
e.addChild("option", value)
.setAttribute("selected", "selected");
}
break;
}
} else {
// assume radio boxes
foreach(e; eles) {
string val = e.getAttribute("value");
//if(val is null)
// throw new Exception("don't know what to do with radio boxes with null value");
if(val == value)
e.setAttribute("checked", "checked");
else
e.removeAttribute("checked");
}
}
}
/// Gets the value of the field; what would be given if it submitted right now. (so
/// it handles select boxes and radio buttons too). For checkboxes, if a value isn't
/// given, but it is checked, it returns "checked", since null and "" are indistinguishable
string getValue(string field) {
auto eles = getField(field);
if(eles.length == 0)
return "";
if(eles.length == 1) {
auto e = eles[0];
switch(e.tagName) {
default: assert(0);
case "input":
if(e.type == "checkbox") {
if(e.checked)
return e.value.length ? e.value : "checked";
return "";
} else
return e.value;
case "textarea":
return e.innerText;
case "select":
foreach(child; e.tree) {
if(child.tagName != "option")
continue;
if(child.selected)
return child.value;
}
break;
}
} else {
// assuming radio
foreach(e; eles) {
if(e.checked)
return e.value;
}
}
return "";
}
// FIXME: doesn't handle multiple elements with the same name (except radio buttons)
///.
string getPostableData() {
bool[string] namesDone;
string ret;
bool outputted = false;
foreach(e; getElementsBySelector("[name]")) {
if(e.name in namesDone)
continue;
if(outputted)
ret ~= "&";
else
outputted = true;
ret ~= std.uri.encodeComponent(e.name) ~ "=" ~ std.uri.encodeComponent(getValue(e.name));
namesDone[e.name] = true;
}
return ret;
}
/// Gets the actual elements with the given name
Element[] getField(string name) {
Element[] ret;
foreach(e; tree) {
if(e.name == name)
ret ~= e;
}
return ret;
}
/// Grabs the