/**
This is an html DOM implementation, started with cloning
what the browser offers in Javascript, but going well beyond
it in convenience.
If you can do it in Javascript, you can probably do it with
this module.
And much more.
Note: some of the documentation here writes html with added
spaces. That's because ddoc doesn't bother encoding html output,
and adding spaces is easier than using LT macros everywhere.
BTW: this file optionally depends on arsd.characterencodings, to
help it correctly read files from the internet. You should be able to
get characterencodings.d from the same place you got this file.
If you want it to stand alone, just always use the `parseUtf8` function.
*/
module arsd.dom;
// FIXME: do parent selector picking in get selector
// FIXME: do :has too... or instead, :has is quite nice.
version(with_arsd_jsvar)
import arsd.jsvar;
else {
enum Scriptable;
}
// this is only meant to be used at compile time, as a filter for opDispatch
// lists the attributes we want to allow without the use of .attr
bool isConvenientAttribute(string name) {
static immutable list = [
"name", "id", "href", "value",
"checked", "selected", "type",
"src", "content", "pattern",
"placeholder", "required", "alt",
"rel",
"method", "action", "enctype"
];
foreach(l; list)
if(name == l) return true;
return false;
}
// FIXME: might be worth doing Element.attrs and taking opDispatch off that
// so more UFCS works.
// FIXME: something like
spam with no closing should read the second tag as the closer in garbage mode
// FIXME: failing to close a paragraph sometimes messes things up too
// FIXME: it would be kinda cool to have some support for internal DTDs
// and maybe XPath as well, to some extent
/*
we could do
meh this sux
auto xpath = XPath(element);
// get the first p
xpath.p[0].a["href"]
*/
// public import arsd.domconvenience; // merged for now
/* domconvenience follows { */
import std.string;
// the reason this is separated is so I can plug it into D->JS as well, which uses a different base Element class
import arsd.dom;
mixin template DomConvenienceFunctions() {
/// Calls getElementById, but throws instead of returning null if the element is not found. You can also ask for a specific subclass of Element to dynamically cast to, which also throws if it cannot be done.
final SomeElementType requireElementById(SomeElementType = Element)(string id, string file = __FILE__, size_t line = __LINE__)
if(
is(SomeElementType : Element)
)
out(ret) {
assert(ret !is null);
}
body {
auto e = cast(SomeElementType) getElementById(id);
if(e is null)
throw new ElementNotFoundException(SomeElementType.stringof, "id=" ~ id, file, line);
return e;
}
/// ditto but with selectors instead of ids
final SomeElementType requireSelector(SomeElementType = Element)(string selector, string file = __FILE__, size_t line = __LINE__)
if(
is(SomeElementType : Element)
)
out(ret) {
assert(ret !is null);
}
body {
auto e = cast(SomeElementType) querySelector(selector);
if(e is null)
throw new ElementNotFoundException(SomeElementType.stringof, selector, file, line);
return e;
}
/// get all the classes on this element
@property string[] classes() {
return split(className, " ");
}
/// Adds a string to the class attribute. The class attribute is used a lot in CSS.
Element addClass(string c) {
if(hasClass(c))
return this; // don't add it twice
string cn = getAttribute("class");
if(cn.length == 0) {
setAttribute("class", c);
return this;
} else {
setAttribute("class", cn ~ " " ~ c);
}
return this;
}
/// Removes a particular class name.
Element removeClass(string c) {
if(!hasClass(c))
return this;
string n;
foreach(name; classes) {
if(c == name)
continue; // cut it out
if(n.length)
n ~= " ";
n ~= name;
}
className = n.strip();
return this;
}
/// Returns whether the given class appears in this element.
bool hasClass(string c) {
string cn = className;
auto idx = cn.indexOf(c);
if(idx == -1)
return false;
foreach(cla; cn.split(" "))
if(cla == c)
return true;
return false;
/*
int rightSide = idx + c.length;
bool checkRight() {
if(rightSide == cn.length)
return true; // it's the only class
else if(iswhite(cn[rightSide]))
return true;
return false; // this is a substring of something else..
}
if(idx == 0) {
return checkRight();
} else {
if(!iswhite(cn[idx - 1]))
return false; // substring
return checkRight();
}
assert(0);
*/
}
/* *******************************
DOM Mutation
*********************************/
/// Removes all inner content from the tag; all child text and elements are gone.
void removeAllChildren()
out {
assert(this.children.length == 0);
}
body {
children = null;
}
/// convenience function to quickly add a tag with some text or
/// other relevant info (for example, it's a src for an element
/// instead of inner text)
Element addChild(string tagName, string childInfo = null, string childInfo2 = null)
in {
assert(tagName !is null);
}
out(e) {
assert(e.parentNode is this);
assert(e.parentDocument is this.parentDocument);
}
body {
auto e = Element.make(tagName, childInfo, childInfo2);
// FIXME (maybe): if the thing is self closed, we might want to go ahead and
// return the parent. That will break existing code though.
return appendChild(e);
}
/// Another convenience function. Adds a child directly after the current one, returning
/// the new child.
///
/// Between this, addChild, and parentNode, you can build a tree as a single expression.
Element addSibling(string tagName, string childInfo = null, string childInfo2 = null)
in {
assert(tagName !is null);
assert(parentNode !is null);
}
out(e) {
assert(e.parentNode is this.parentNode);
assert(e.parentDocument is this.parentDocument);
}
body {
auto e = Element.make(tagName, childInfo, childInfo2);
return parentNode.insertAfter(this, e);
}
Element addSibling(Element e) {
return parentNode.insertAfter(this, e);
}
Element addChild(Element e) {
return this.appendChild(e);
}
/// Convenience function to append text intermixed with other children.
/// For example: div.addChildren("You can visit my website by ", new Link("mysite.com", "clicking here"), ".");
/// or div.addChildren("Hello, ", user.name, "!");
/// See also: appendHtml. This might be a bit simpler though because you don't have to think about escaping.
void addChildren(T...)(T t) {
foreach(item; t) {
static if(is(item : Element))
appendChild(item);
else static if (is(isSomeString!(item)))
appendText(to!string(item));
else static assert(0, "Cannot pass " ~ typeof(item).stringof ~ " to addChildren");
}
}
///.
Element addChild(string tagName, Element firstChild, string info2 = null)
in {
assert(firstChild !is null);
}
out(ret) {
assert(ret !is null);
assert(ret.parentNode is this);
assert(firstChild.parentNode is ret);
assert(ret.parentDocument is this.parentDocument);
//assert(firstChild.parentDocument is this.parentDocument);
}
body {
auto e = Element.make(tagName, "", info2);
e.appendChild(firstChild);
this.appendChild(e);
return e;
}
Element addChild(string tagName, in Html innerHtml, string info2 = null)
in {
}
out(ret) {
assert(ret !is null);
assert(ret.parentNode is this);
assert(ret.parentDocument is this.parentDocument);
}
body {
auto e = Element.make(tagName, "", info2);
this.appendChild(e);
e.innerHTML = innerHtml.source;
return e;
}
/// .
void appendChildren(Element[] children) {
foreach(ele; children)
appendChild(ele);
}
///.
void reparent(Element newParent)
in {
assert(newParent !is null);
assert(parentNode !is null);
}
out {
assert(this.parentNode is newParent);
//assert(isInArray(this, newParent.children));
}
body {
parentNode.removeChild(this);
newParent.appendChild(this);
}
/**
Strips this tag out of the document, putting its inner html
as children of the parent.
For example, given:
hello there
, if you
call stripOut() on the b element, you'll be left with
hello there
.
The idea here is to make it easy to get rid of garbage
markup you aren't interested in.
*/
void stripOut()
in {
assert(parentNode !is null);
}
out {
assert(parentNode is null);
assert(children.length == 0);
}
body {
foreach(c; children)
c.parentNode = null; // remove the parent
if(children.length)
parentNode.replaceChild(this, this.children);
else
parentNode.removeChild(this);
this.children.length = 0; // we reparented them all above
}
/// shorthand for this.parentNode.removeChild(this) with parentNode null check
/// if the element already isn't in a tree, it does nothing.
Element removeFromTree()
in {
}
out(var) {
assert(this.parentNode is null);
assert(var is this);
}
body {
if(this.parentNode is null)
return this;
this.parentNode.removeChild(this);
return this;
}
/// Wraps this element inside the given element.
/// It's like this.replaceWith(what); what.appendchild(this);
///
/// Given: < b >cool b >, if you call b.wrapIn(new Link("site.com", "my site is "));
/// you'll end up with: < a href="site.com">my site is < b >cool< /b > a >.
Element wrapIn(Element what)
in {
assert(what !is null);
}
out(ret) {
assert(this.parentNode is what);
assert(ret is what);
}
body {
this.replaceWith(what);
what.appendChild(this);
return what;
}
/// Replaces this element with something else in the tree.
Element replaceWith(Element e)
in {
assert(this.parentNode !is null);
}
body {
e.removeFromTree();
this.parentNode.replaceChild(this, e);
return e;
}
/**
Splits the className into an array of each class given
*/
string[] classNames() const {
return className().split(" ");
}
/**
Fetches the first consecutive nodes, if text nodes, concatenated together
If the first node is not text, returns null.
See also: directText, innerText
*/
string firstInnerText() const {
string s;
foreach(child; children) {
if(child.nodeType != NodeType.Text)
break;
s ~= child.nodeValue();
}
return s;
}
/**
Returns the text directly under this element,
not recursively like innerText.
See also: firstInnerText
*/
@property string directText() {
string ret;
foreach(e; children) {
if(e.nodeType == NodeType.Text)
ret ~= e.nodeValue();
}
return ret;
}
/**
Sets the direct text, keeping the same place.
Unlike innerText, this does *not* remove existing
elements in the element.
It only replaces the first text node it sees.
If there are no text nodes, it calls appendText
So, given (ignore the spaces in the tags):
< div > < img > text here < /div >
it will keep the img, and replace the "text here".
*/
@property void directText(string text) {
foreach(e; children) {
if(e.nodeType == NodeType.Text) {
auto it = cast(TextNode) e;
it.contents = text;
return;
}
}
appendText(text);
}
}
/// finds comments that match the given txt. Case insensitive, strips whitespace.
Element[] findComments(Document document, string txt) {
return findComments(document.root, txt);
}
/// ditto
Element[] findComments(Element element, string txt) {
txt = txt.strip().toLower();
Element[] ret;
foreach(comment; element.getElementsByTagName("#comment")) {
string t = comment.nodeValue().strip().toLower();
if(t == txt)
ret ~= comment;
}
return ret;
}
// I'm just dicking around with this
struct ElementCollection {
this(Element e) {
elements = [e];
}
this(Element e, string selector) {
elements = e.querySelectorAll(selector);
}
this(Element[] e) {
elements = e;
}
Element[] elements;
//alias elements this; // let it implicitly convert to the underlying array
ElementCollection opIndex(string selector) {
ElementCollection ec;
foreach(e; elements)
ec.elements ~= e.getElementsBySelector(selector);
return ec;
}
/// if you slice it, give the underlying array for easy forwarding of the
/// collection to range expecting algorithms or looping over.
Element[] opSlice() {
return elements;
}
/// And input range primitives so we can foreach over this
void popFront() {
elements = elements[1..$];
}
/// ditto
Element front() {
return elements[0];
}
/// ditto
bool empty() {
return !elements.length;
}
/// Forward method calls to each individual element of the collection
/// returns this so it can be chained.
ElementCollection opDispatch(string name, T...)(T t) {
foreach(e; elements) {
mixin("e." ~ name)(t);
}
return this;
}
ElementCollection opBinary(string op : "~")(ElementCollection rhs) {
return ElementCollection(this.elements ~ rhs.elements);
}
}
// this puts in operators and opDispatch to handle string indexes and properties, forwarding to get and set functions.
mixin template JavascriptStyleDispatch() {
string opDispatch(string name)(string v = null) if(name != "popFront") { // popFront will make this look like a range. Do not want.
if(v !is null)
return set(name, v);
return get(name);
}
string opIndex(string key) const {
return get(key);
}
string opIndexAssign(string value, string field) {
return set(field, value);
}
// FIXME: doesn't seem to work
string* opBinary(string op)(string key) if(op == "in") {
return key in fields;
}
}
/// A proxy object to do the Element class' dataset property. See Element.dataset for more info.
///
/// Do not create this object directly.
struct DataSet {
this(Element e) {
this._element = e;
}
private Element _element;
string set(string name, string value) {
_element.setAttribute("data-" ~ unCamelCase(name), value);
return value;
}
string get(string name) const {
return _element.getAttribute("data-" ~ unCamelCase(name));
}
mixin JavascriptStyleDispatch!();
}
/// Proxy object for attributes which will replace the main opDispatch eventually
struct AttributeSet {
this(Element e) {
this._element = e;
}
private Element _element;
string set(string name, string value) {
_element.setAttribute(name, value);
return value;
}
string get(string name) const {
return _element.getAttribute(name);
}
mixin JavascriptStyleDispatch!();
}
/// for style, i want to be able to set it with a string like a plain attribute,
/// but also be able to do properties Javascript style.
struct ElementStyle {
this(Element parent) {
_element = parent;
}
Element _element;
@property ref inout(string) _attribute() inout {
auto s = "style" in _element.attributes;
if(s is null) {
auto e = cast() _element; // const_cast
e.attributes["style"] = ""; // we need something to reference
s = cast(inout) ("style" in e.attributes);
}
assert(s !is null);
return *s;
}
alias _attribute this; // this is meant to allow element.style = element.style ~ " string "; to still work.
string set(string name, string value) {
if(name.length == 0)
return value;
if(name == "cssFloat")
name = "float";
else
name = unCamelCase(name);
auto r = rules();
r[name] = value;
_attribute = "";
foreach(k, v; r) {
if(v is null || v.length == 0) /* css can't do empty rules anyway so we'll use that to remove */
continue;
if(_attribute.length)
_attribute ~= " ";
_attribute ~= k ~ ": " ~ v ~ ";";
}
_element.setAttribute("style", _attribute); // this is to trigger the observer call
return value;
}
string get(string name) const {
if(name == "cssFloat")
name = "float";
else
name = unCamelCase(name);
auto r = rules();
if(name in r)
return r[name];
return null;
}
string[string] rules() const {
string[string] ret;
foreach(rule; _attribute.split(";")) {
rule = rule.strip();
if(rule.length == 0)
continue;
auto idx = rule.indexOf(":");
if(idx == -1)
ret[rule] = "";
else {
auto name = rule[0 .. idx].strip();
auto value = rule[idx + 1 .. $].strip();
ret[name] = value;
}
}
return ret;
}
mixin JavascriptStyleDispatch!();
}
/// Converts a camel cased propertyName to a css style dashed property-name
string unCamelCase(string a) {
string ret;
foreach(c; a)
if((c >= 'A' && c <= 'Z'))
ret ~= "-" ~ toLower("" ~ c)[0];
else
ret ~= c;
return ret;
}
/// Translates a css style property-name to a camel cased propertyName
string camelCase(string a) {
string ret;
bool justSawDash = false;
foreach(c; a)
if(c == '-') {
justSawDash = true;
} else {
if(justSawDash) {
justSawDash = false;
ret ~= toUpper("" ~ c);
} else
ret ~= c;
}
return ret;
}
// domconvenience ends }
// @safe:
// NOTE: do *NOT* override toString on Element subclasses. It won't work.
// Instead, override writeToAppender();
// FIXME: should I keep processing instructions like and (comments too lol)? I *want* them stripped out of most my output, but I want to be able to parse and create them too.
// Stripping them is useful for reading php as html.... but adding them
// is good for building php.
// I need to maintain compatibility with the way it is now too.
import std.string;
import std.exception;
import std.uri;
import std.array;
import std.range;
//import std.stdio;
// tag soup works for most the crap I know now! If you have two bad closing tags back to back, it might erase one, but meh
// that's rarer than the flipped closing tags that hack fixes so I'm ok with it. (Odds are it should be erased anyway; it's
// most likely a typo so I say kill kill kill.
/// This might belong in another module, but it represents a file with a mime type and some data.
/// Document implements this interface with type = text/html (see Document.contentType for more info)
/// and data = document.toString, so you can return Documents anywhere web.d expects FileResources.
interface FileResource {
@property string contentType() const; /// the content-type of the file. e.g. "text/html; charset=utf-8" or "image/png"
immutable(ubyte)[] getData() const; /// the data
}
///.
enum NodeType { Text = 3 }
/// You can use this to do an easy null check or a dynamic cast+null check on any element.
T require(T = Element, string file = __FILE__, int line = __LINE__)(Element e) if(is(T : Element))
in {}
out(ret) { assert(ret !is null); }
body {
auto ret = cast(T) e;
if(ret is null)
throw new ElementNotFoundException(T.stringof, "passed value", file, line);
return ret;
}
/// This represents almost everything in the DOM.
class Element {
mixin DomConvenienceFunctions!();
// do nothing, this is primarily a virtual hook
// for links and forms
void setValue(string field, string value) { }
// this is a thing so i can remove observer support if it gets slow
// I have not implemented all these yet
private void sendObserverEvent(DomMutationOperations operation, string s1 = null, string s2 = null, Element r = null, Element r2 = null) {
if(parentDocument is null) return;
DomMutationEvent me;
me.operation = operation;
me.target = this;
me.relatedString = s1;
me.relatedString2 = s2;
me.related = r;
me.related2 = r2;
parentDocument.dispatchMutationEvent(me);
}
// putting all the members up front
// this ought to be private. don't use it directly.
Element[] children;
/// The name of the tag. Remember, changing this doesn't change the dynamic type of the object.
string tagName;
/// This is where the attributes are actually stored. You should use getAttribute, setAttribute, and hasAttribute instead.
string[string] attributes;
/// In XML, it is valid to write for all elements with no children, but that breaks HTML, so I don't do it here.
/// Instead, this flag tells if it should be. It is based on the source document's notation and a html element list.
private bool selfClosed;
/// Get the parent Document object that contains this element.
/// It may be null, so remember to check for that.
Document parentDocument;
///.
Element parentNode;
// the next few methods are for implementing interactive kind of things
private CssStyle _computedStyle;
// these are here for event handlers. Don't forget that this library never fires events.
// (I'm thinking about putting this in a version statement so you don't have the baggage. The instance size of this class is 56 bytes right now.)
EventHandler[][string] bubblingEventHandlers;
EventHandler[][string] capturingEventHandlers;
EventHandler[string] defaultEventHandlers;
void addEventListener(string event, EventHandler handler, bool useCapture = false) {
if(event.length > 2 && event[0..2] == "on")
event = event[2 .. $];
if(useCapture)
capturingEventHandlers[event] ~= handler;
else
bubblingEventHandlers[event] ~= handler;
}
// and now methods
/// Convenience function to try to do the right thing for HTML. This is the main
/// way I create elements.
static Element make(string tagName, string childInfo = null, string childInfo2 = null) {
bool selfClosed = tagName.isInArray(selfClosedElements);
Element e;
// want to create the right kind of object for the given tag...
switch(tagName) {
case "#text":
e = new TextNode(null, childInfo);
return e;
// break;
case "table":
e = new Table(null);
break;
case "a":
e = new Link(null);
break;
case "form":
e = new Form(null);
break;
case "tr":
e = new TableRow(null);
break;
case "td", "th":
e = new TableCell(null, tagName);
break;
default:
e = new Element(null, tagName, null, selfClosed); // parent document should be set elsewhere
}
// make sure all the stuff is constructed properly FIXME: should probably be in all the right constructors too
e.tagName = tagName;
e.selfClosed = selfClosed;
if(childInfo !is null)
switch(tagName) {
/* html5 convenience tags */
case "audio":
if(childInfo.length)
e.addChild("source", childInfo);
if(childInfo2 !is null)
e.appendText(childInfo2);
break;
case "source":
e.src = childInfo;
if(childInfo2 !is null)
e.type = childInfo2;
break;
/* regular html 4 stuff */
case "img":
e.src = childInfo;
if(childInfo2 !is null)
e.alt = childInfo2;
break;
case "link":
e.href = childInfo;
if(childInfo2 !is null)
e.rel = childInfo2;
break;
case "option":
e.innerText = childInfo;
if(childInfo2 !is null)
e.value = childInfo2;
break;
case "input":
e.type = "hidden";
e.name = childInfo;
if(childInfo2 !is null)
e.value = childInfo2;
break;
case "button":
e.innerText = childInfo;
if(childInfo2 !is null)
e.type = childInfo2;
break;
case "a":
e.innerText = childInfo;
if(childInfo2 !is null)
e.href = childInfo2;
break;
case "script":
case "style":
e.innerRawSource = childInfo;
break;
case "meta":
e.name = childInfo;
if(childInfo2 !is null)
e.content = childInfo2;
break;
/* generically, assume we were passed text and perhaps class */
default:
e.innerText = childInfo;
if(childInfo2.length)
e.className = childInfo2;
}
return e;
}
static Element make(string tagName, in Html innerHtml, string childInfo2 = null) {
// FIXME: childInfo2 is ignored when info1 is null
auto m = Element.make(tagName, cast(string) null, childInfo2);
m.innerHTML = innerHtml.source;
return m;
}
static Element make(string tagName, Element child, string childInfo2 = null) {
auto m = Element.make(tagName, cast(string) null, childInfo2);
m.appendChild(child);
return m;
}
/// Generally, you don't want to call this yourself - use Element.make or document.createElement instead.
this(Document _parentDocument, string _tagName, string[string] _attributes = null, bool _selfClosed = false) {
parentDocument = _parentDocument;
tagName = _tagName;
if(_attributes !is null)
attributes = _attributes;
selfClosed = _selfClosed;
version(dom_node_indexes)
this.dataset.nodeIndex = to!string(&(this.attributes));
assert(_tagName.indexOf(" ") == -1);//, "<" ~ _tagName ~ "> is invalid");
}
/// Convenience constructor when you don't care about the parentDocument. Note this might break things on the document.
/// Note also that without a parent document, elements are always in strict, case-sensitive mode.
this(string _tagName, string[string] _attributes = null) {
tagName = _tagName;
if(_attributes !is null)
attributes = _attributes;
selfClosed = tagName.isInArray(selfClosedElements);
// this is meant to reserve some memory. It makes a small, but consistent improvement.
//children.length = 8;
//children.length = 0;
version(dom_node_indexes)
this.dataset.nodeIndex = to!string(&(this.attributes));
}
private this(Document _parentDocument) {
parentDocument = _parentDocument;
version(dom_node_indexes)
this.dataset.nodeIndex = to!string(&(this.attributes));
}
/* *******************************
Navigating the DOM
*********************************/
/// Returns the first child of this element. If it has no children, returns null.
/// Remember, text nodes are children too.
@property Element firstChild() {
return children.length ? children[0] : null;
}
///
@property Element lastChild() {
return children.length ? children[$ - 1] : null;
}
///.
@property Element previousSibling(string tagName = null) {
if(this.parentNode is null)
return null;
Element ps = null;
foreach(e; this.parentNode.childNodes) {
if(e is this)
break;
if(tagName == "*" && e.nodeType != NodeType.Text) {
ps = e;
break;
}
if(tagName is null || e.tagName == tagName)
ps = e;
}
return ps;
}
///.
@property Element nextSibling(string tagName = null) {
if(this.parentNode is null)
return null;
Element ns = null;
bool mightBe = false;
foreach(e; this.parentNode.childNodes) {
if(e is this) {
mightBe = true;
continue;
}
if(mightBe) {
if(tagName == "*" && e.nodeType != NodeType.Text) {
ns = e;
break;
}
if(tagName is null || e.tagName == tagName) {
ns = e;
break;
}
}
}
return ns;
}
/// Gets the nearest node, going up the chain, with the given tagName
/// May return null or throw.
T getParent(T = Element)(string tagName = null) if(is(T : Element)) {
if(tagName is null) {
static if(is(T == Form))
tagName = "form";
else static if(is(T == Table))
tagName = "table";
else static if(is(T == Link))
tagName == "a";
}
auto par = this.parentNode;
while(par !is null) {
if(tagName is null || par.tagName == tagName)
break;
par = par.parentNode;
}
static if(!is(T == Element)) {
auto t = cast(T) par;
if(t is null)
throw new ElementNotFoundException("", tagName ~ " parent not found");
} else
auto t = par;
return t;
}
///.
Element getElementById(string id) {
// FIXME: I use this function a lot, and it's kinda slow
// not terribly slow, but not great.
foreach(e; tree)
if(e.id == id)
return e;
return null;
}
/// Note: you can give multiple selectors, separated by commas.
/// It will return the first match it finds.
Element querySelector(string selector) {
// FIXME: inefficient; it gets all results just to discard most of them
auto list = getElementsBySelector(selector);
if(list.length == 0)
return null;
return list[0];
}
/// a more standards-compliant alias for getElementsBySelector
Element[] querySelectorAll(string selector) {
return getElementsBySelector(selector);
}
/**
Does a CSS selector
* -- all, default if nothing else is there
tag#id.class.class.class:pseudo[attrib=what][attrib=what] OP selector
It is all additive
OP
space = descendant
> = direct descendant
+ = sibling (E+F Matches any F element immediately preceded by a sibling element E)
[foo] Foo is present as an attribute
[foo="warning"] Matches any E element whose "foo" attribute value is exactly equal to "warning".
E[foo~="warning"] Matches any E element whose "foo" attribute value is a list of space-separated values, one of which is exactly equal to "warning"
E[lang|="en"] Matches any E element whose "lang" attribute has a hyphen-separated list of values beginning (from the left) with "en".
[item$=sdas] ends with
[item^-sdsad] begins with
Quotes are optional here.
Pseudos:
:first-child
:last-child
:link (same as a[href] for our purposes here)
There can be commas separating the selector. A comma separated list result is OR'd onto the main.
This ONLY cares about elements. text, etc, are ignored
There should be two functions: given element, does it match the selector? and given a selector, give me all the elements
*/
Element[] getElementsBySelector(string selector) {
// FIXME: this function could probably use some performance attention
// ... but only mildly so according to the profiler in the big scheme of things; probably negligible in a big app.
bool caseSensitiveTags = true;
if(parentDocument && parentDocument.loose)
caseSensitiveTags = false;
Element[] ret;
foreach(sel; parseSelectorString(selector, caseSensitiveTags))
ret ~= sel.getElements(this);
return ret;
}
/// .
Element[] getElementsByClassName(string cn) {
// is this correct?
return getElementsBySelector("." ~ cn);
}
///.
Element[] getElementsByTagName(string tag) {
if(parentDocument && parentDocument.loose)
tag = tag.toLower();
Element[] ret;
foreach(e; tree)
if(e.tagName == tag)
ret ~= e;
return ret;
}
/* *******************************
Attributes
*********************************/
/**
Gets the given attribute value, or null if the
attribute is not set.
Note that the returned string is decoded, so it no longer contains any xml entities.
*/
string getAttribute(string name) const {
if(parentDocument && parentDocument.loose)
name = name.toLower();
auto e = name in attributes;
if(e)
return *e;
else
return null;
}
/**
Sets an attribute. Returns this for easy chaining
*/
Element setAttribute(string name, string value) {
if(parentDocument && parentDocument.loose)
name = name.toLower();
// I never use this shit legitimately and neither should you
auto it = name.toLower();
if(it == "href" || it == "src") {
auto v = value.strip().toLower();
if(v.startsWith("vbscript:"))
value = value[9..$];
if(v.startsWith("javascript:"))
value = value[11..$];
}
attributes[name] = value;
sendObserverEvent(DomMutationOperations.setAttribute, name, value);
return this;
}
/**
Returns if the attribute exists.
*/
bool hasAttribute(string name) {
if(parentDocument && parentDocument.loose)
name = name.toLower();
if(name in attributes)
return true;
else
return false;
}
/**
Removes the given attribute from the element.
*/
Element removeAttribute(string name)
out(ret) {
assert(ret is this);
}
body {
if(parentDocument && parentDocument.loose)
name = name.toLower();
if(name in attributes)
attributes.remove(name);
sendObserverEvent(DomMutationOperations.removeAttribute, name);
return this;
}
/**
Gets the class attribute's contents. Returns
an empty string if it has no class.
*/
@property string className() const {
auto c = getAttribute("class");
if(c is null)
return "";
return c;
}
///.
@property Element className(string c) {
setAttribute("class", c);
return this;
}
/**
Provides easy access to attributes, object style.
auto element = Element.make("a");
a.href = "cool.html"; // this is the same as a.setAttribute("href", "cool.html");
string where = a.href; // same as a.getAttribute("href");
*/
@property string opDispatch(string name)(string v = null) if(isConvenientAttribute(name)) {
if(v !is null)
setAttribute(name, v);
return getAttribute(name);
}
/**
DEPRECATED: generally open opDispatch caused a lot of unforeseen trouble with compile time duck typing and UFCS extensions.
so I want to remove it. A small whitelist of attributes is still allowed, but others are not.
Instead, use element.attrs.attribute, element.attrs["attribute"],
or element.getAttribute("attribute")/element.setAttribute("attribute").
*/
@property string opDispatch(string name)(string v = null) if(!isConvenientAttribute(name)) {
static assert(0, "Don't use " ~ name ~ " direct on Element, instead use element.attrs.attributeName");
}
/*
// this would be nice for convenience, but it broke the getter above.
@property void opDispatch(string name)(bool boolean) if(name != "popFront") {
if(boolean)
setAttribute(name, name);
else
removeAttribute(name);
}
*/
/**
Returns the element's children.
*/
@property const(Element[]) childNodes() const {
return children;
}
/// Mutable version of the same
@property Element[] childNodes() { // FIXME: the above should be inout
return children;
}
/// HTML5's dataset property. It is an alternate view into attributes with the data- prefix.
///
/// Given:
///
/// We get: assert(a.dataset.myProperty == "cool");
@property DataSet dataset() {
return DataSet(this);
}
/// Gives dot/opIndex access to attributes
/// ele.attrs.largeSrc = "foo"; // same as ele.setAttribute("largeSrc", "foo")
@property AttributeSet attrs() {
return AttributeSet(this);
}
/// Provides both string and object style (like in Javascript) access to the style attribute.
@property ElementStyle style() {
return ElementStyle(this);
}
/// This sets the style attribute with a string.
@property ElementStyle style(string s) {
this.setAttribute("style", s);
return this.style;
}
private void parseAttributes(string[] whichOnes = null) {
/+
if(whichOnes is null)
whichOnes = attributes.keys;
foreach(attr; whichOnes) {
switch(attr) {
case "id":
break;
case "class":
break;
case "style":
break;
default:
// we don't care about it
}
}
+/
}
// if you change something here, it won't apply... FIXME const? but changing it would be nice if it applies to the style attribute too though you should use style there.
///.
@property CssStyle computedStyle() {
if(_computedStyle is null) {
auto style = this.getAttribute("style");
/* we'll treat shitty old html attributes as css here */
if(this.hasAttribute("width"))
style ~= "; width: " ~ this.attrs.width;
if(this.hasAttribute("height"))
style ~= "; height: " ~ this.attrs.height;
if(this.hasAttribute("bgcolor"))
style ~= "; background-color: " ~ this.attrs.bgcolor;
if(this.tagName == "body" && this.hasAttribute("text"))
style ~= "; color: " ~ this.attrs.text;
if(this.hasAttribute("color"))
style ~= "; color: " ~ this.attrs.color;
/* done */
_computedStyle = new CssStyle(null, style); // gives at least something to work with
}
return _computedStyle;
}
/// These properties are useless in most cases, but if you write a layout engine on top of this lib, they may be good
version(browser) {
void* expansionHook; ///ditto
int offsetWidth; ///ditto
int offsetHeight; ///ditto
int offsetLeft; ///ditto
int offsetTop; ///ditto
Element offsetParent; ///ditto
bool hasLayout; ///ditto
int zIndex; ///ditto
///ditto
int absoluteLeft() {
int a = offsetLeft;
auto p = offsetParent;
while(p) {
a += p.offsetLeft;
p = p.offsetParent;
}
return a;
}
///ditto
int absoluteTop() {
int a = offsetTop;
auto p = offsetParent;
while(p) {
a += p.offsetTop;
p = p.offsetParent;
}
return a;
}
}
// Back to the regular dom functions
public:
/* *******************************
DOM Mutation
*********************************/
/// Removes all inner content from the tag; all child text and elements are gone.
void removeAllChildren()
out {
assert(this.children.length == 0);
}
body {
children = null;
}
/// Appends the given element to this one. The given element must not have a parent already.
Element appendChild(Element e)
in {
assert(e !is null);
assert(e.parentNode is null);
}
out (ret) {
assert(e.parentNode is this);
assert(e.parentDocument is this.parentDocument);
assert(e is ret);
}
body {
selfClosed = false;
e.parentNode = this;
e.parentDocument = this.parentDocument;
children ~= e;
sendObserverEvent(DomMutationOperations.appendChild, null, null, e);
return e;
}
/// Inserts the second element to this node, right before the first param
Element insertBefore(in Element where, Element what)
in {
assert(where !is null);
assert(where.parentNode is this);
assert(what !is null);
assert(what.parentNode is null);
}
out (ret) {
assert(where.parentNode is this);
assert(what.parentNode is this);
assert(what.parentDocument is this.parentDocument);
assert(ret is what);
}
body {
foreach(i, e; children) {
if(e is where) {
children = children[0..i] ~ what ~ children[i..$];
what.parentDocument = this.parentDocument;
what.parentNode = this;
return what;
}
}
return what;
assert(0);
}
///.
Element insertAfter(in Element where, Element what)
in {
assert(where !is null);
assert(where.parentNode is this);
assert(what !is null);
assert(what.parentNode is null);
}
out (ret) {
assert(where.parentNode is this);
assert(what.parentNode is this);
assert(what.parentDocument is this.parentDocument);
assert(ret is what);
}
body {
foreach(i, e; children) {
if(e is where) {
children = children[0 .. i + 1] ~ what ~ children[i + 1 .. $];
what.parentNode = this;
what.parentDocument = this.parentDocument;
return what;
}
}
return what;
assert(0);
}
/// swaps one child for a new thing. Returns the old child which is now parentless.
Element swapNode(Element child, Element replacement)
in {
assert(child !is null);
assert(replacement !is null);
assert(child.parentNode is this);
}
out(ret) {
assert(ret is child);
assert(ret.parentNode is null);
assert(replacement.parentNode is this);
assert(replacement.parentDocument is this.parentDocument);
}
body {
foreach(ref c; this.children)
if(c is child) {
c.parentNode = null;
c = replacement;
c.parentNode = this;
c.parentDocument = this.parentDocument;
return child;
}
assert(0);
}
///.
Element appendText(string text) {
Element e = new TextNode(parentDocument, text);
appendChild(e);
return this;
}
///.
@property Element[] childElements() {
Element[] ret;
foreach(c; children)
if(c.nodeType == 1)
ret ~= c;
return ret;
}
/// Appends the given html to the element, returning the elements appended
Element[] appendHtml(string html) {
Document d = new Document("" ~ html ~ "");
return stealChildren(d.root);
}
///.
void insertChildAfter(Element child, Element where)
in {
assert(child !is null);
assert(where !is null);
assert(where.parentNode is this);
assert(!selfClosed);
//assert(isInArray(where, children));
}
out {
assert(child.parentNode is this);
assert(where.parentNode is this);
//assert(isInArray(where, children));
//assert(isInArray(child, children));
}
body {
foreach(ref i, c; children) {
if(c is where) {
i++;
children = children[0..i] ~ child ~ children[i..$];
child.parentNode = this;
child.parentDocument = this.parentDocument;
break;
}
}
}
///.
Element[] stealChildren(Element e, Element position = null)
in {
assert(!selfClosed);
assert(e !is null);
//if(position !is null)
//assert(isInArray(position, children));
}
out (ret) {
assert(e.children.length == 0);
debug foreach(child; ret) {
assert(child.parentNode is this);
assert(child.parentDocument is this.parentDocument);
}
}
body {
foreach(c; e.children) {
c.parentNode = this;
c.parentDocument = this.parentDocument;
}
if(position is null)
children ~= e.children;
else {
foreach(i, child; children) {
if(child is position) {
children = children[0..i] ~
e.children ~
children[i..$];
break;
}
}
}
auto ret = e.children.dup;
e.children.length = 0;
return ret;
}
/// Puts the current element first in our children list. The given element must not have a parent already.
Element prependChild(Element e)
in {
assert(e.parentNode is null);
assert(!selfClosed);
}
out {
assert(e.parentNode is this);
assert(e.parentDocument is this.parentDocument);
assert(children[0] is e);
}
body {
e.parentNode = this;
e.parentDocument = this.parentDocument;
children = e ~ children;
return e;
}
/**
Returns a string containing all child elements, formatted such that it could be pasted into
an XML file.
*/
@property string innerHTML(Appender!string where = appender!string()) const {
if(children is null)
return "";
auto start = where.data.length;
foreach(child; children) {
assert(child !is null);
child.writeToAppender(where);
}
return where.data[start .. $];
}
/**
Takes some html and replaces the element's children with the tree made from the string.
*/
@property Element innerHTML(string html, bool strict = false) {
if(html.length)
selfClosed = false;
if(html.length == 0) {
// I often say innerHTML = ""; as a shortcut to clear it out,
// so let's optimize that slightly.
removeAllChildren();
return this;
}
auto doc = new Document();
doc.parseUtf8("" ~ html ~ "", strict, strict); // FIXME: this should preserve the strictness of the parent document
children = doc.root.children;
foreach(c; children) {
c.parentNode = this;
c.parentDocument = this.parentDocument;
}
reparentTreeDocuments();
doc.root.children = null;
return this;
}
/// ditto
@property Element innerHTML(Html html) {
return this.innerHTML = html.source;
}
private void reparentTreeDocuments() {
foreach(c; this.tree)
c.parentDocument = this.parentDocument;
}
/**
Replaces this node with the given html string, which is parsed
Note: this invalidates the this reference, since it is removed
from the tree.
Returns the new children that replace this.
*/
@property Element[] outerHTML(string html) {
auto doc = new Document();
doc.parseUtf8("" ~ html ~ ""); // FIXME: needs to preserve the strictness
children = doc.root.children;
foreach(c; children) {
c.parentNode = this;
c.parentDocument = this.parentDocument;
}
reparentTreeDocuments();
stripOut();
return doc.root.children;
}
/// Returns all the html for this element, including the tag itself.
/// This is equivalent to calling toString().
@property string outerHTML() {
return this.toString();
}
/// This sets the inner content of the element *without* trying to parse it.
/// You can inject any code in there; this serves as an escape hatch from the dom.
///
/// The only times you might actually need it are for < style > and < script > tags in html.
/// Other than that, innerHTML and/or innerText should do the job.
@property void innerRawSource(string rawSource) {
children.length = 0;
auto rs = new RawSource(parentDocument, rawSource);
rs.parentNode = this;
children ~= rs;
}
///.
Element replaceChild(Element find, Element replace)
in {
assert(find !is null);
assert(replace !is null);
assert(replace.parentNode is null);
}
out(ret) {
assert(ret is replace);
assert(replace.parentNode is this);
assert(replace.parentDocument is this.parentDocument);
assert(find.parentNode is null);
}
body {
for(int i = 0; i < children.length; i++) {
if(children[i] is find) {
replace.parentNode = this;
children[i].parentNode = null;
children[i] = replace;
replace.parentDocument = this.parentDocument;
return replace;
}
}
throw new Exception("no such child");
}
/**
Replaces the given element with a whole group.
*/
void replaceChild(Element find, Element[] replace)
in {
assert(find !is null);
assert(replace !is null);
assert(find.parentNode is this);
debug foreach(r; replace)
assert(r.parentNode is null);
}
out {
assert(find.parentNode is null);
assert(children.length >= replace.length);
debug foreach(child; children)
assert(child !is find);
debug foreach(r; replace)
assert(r.parentNode is this);
}
body {
if(replace.length == 0) {
removeChild(find);
return;
}
assert(replace.length);
for(int i = 0; i < children.length; i++) {
if(children[i] is find) {
children[i].parentNode = null; // this element should now be dead
children[i] = replace[0];
foreach(e; replace) {
e.parentNode = this;
e.parentDocument = this.parentDocument;
}
children = .insertAfter(children, i, replace[1..$]);
return;
}
}
throw new Exception("no such child");
}
/**
Removes the given child from this list.
Returns the removed element.
*/
Element removeChild(Element c)
in {
assert(c !is null);
assert(c.parentNode is this);
}
out {
debug foreach(child; children)
assert(child !is c);
assert(c.parentNode is null);
}
body {
foreach(i, e; children) {
if(e is c) {
children = children[0..i] ~ children [i+1..$];
c.parentNode = null;
return c;
}
}
throw new Exception("no such child");
}
/// This removes all the children from this element, returning the old list.
Element[] removeChildren()
out (ret) {
assert(children.length == 0);
debug foreach(r; ret)
assert(r.parentNode is null);
}
body {
Element[] oldChildren = children.dup;
foreach(c; oldChildren)
c.parentNode = null;
children.length = 0;
return oldChildren;
}
/**
Fetch the inside text, with all tags stripped out.
cool api & code dude
innerText of that is "cool api & code dude".
This does not match what real innerText does!
http://perfectionkills.com/the-poor-misunderstood-innerText/
It is more like textContent.
*/
@property string innerText() const {
string s;
foreach(child; children) {
if(child.nodeType != NodeType.Text)
s ~= child.innerText;
else
s ~= child.nodeValue();
}
return s;
}
/**
Sets the inside text, replacing all children. You don't
have to worry about entity encoding.
*/
@property void innerText(string text) {
selfClosed = false;
Element e = new TextNode(parentDocument, text);
e.parentNode = this;
children = [e];
}
/**
Strips this node out of the document, replacing it with the given text
*/
@property void outerText(string text) {
parentNode.replaceChild(this, new TextNode(parentDocument, text));
}
/**
Same result as innerText; the tag with all inner tags stripped out
*/
string outerText() const {
return innerText;
}
/* *******************************
Miscellaneous
*********************************/
/// This is a full clone of the element
@property Element cloned()
/+
out(ret) {
// FIXME: not sure why these fail...
assert(ret.children.length == this.children.length, format("%d %d", ret.children.length, this.children.length));
assert(ret.tagName == this.tagName);
}
body {
+/
{
auto e = Element.make(this.tagName);
e.parentDocument = this.parentDocument;
e.attributes = this.attributes.aadup;
e.selfClosed = this.selfClosed;
foreach(child; children) {
e.appendChild(child.cloned);
}
return e;
}
/// Clones the node. If deepClone is true, clone all inner tags too. If false, only do this tag (and its attributes), but it will have no contents.
Element cloneNode(bool deepClone) {
if(deepClone)
return this.cloned;
// shallow clone
auto e = Element.make(this.tagName);
e.parentDocument = this.parentDocument;
e.attributes = this.attributes.aadup;
e.selfClosed = this.selfClosed;
return e;
}
///.
string nodeValue() const {
return "";
}
// should return int
///.
@property int nodeType() const {
return 1;
}
invariant () {
assert(tagName.indexOf(" ") == -1);
if(children !is null)
debug foreach(child; children) {
// assert(parentNode !is null);
assert(child !is null);
assert(child.parentNode is this, format("%s is not a parent of %s (it thought it was %s)", tagName, child.tagName, child.parentNode is null ? "null" : child.parentNode.tagName));
assert(child !is this);
assert(child !is parentNode);
}
/+ // only depend on parentNode's accuracy if you shuffle things around and use the top elements - where the contracts guarantee it on out
if(parentNode !is null) {
// if you have a parent, you should share the same parentDocument; this is appendChild()'s job
auto lol = cast(TextNode) this;
assert(parentDocument is parentNode.parentDocument, lol is null ? this.tagName : lol.contents);
}
+/
//assert(parentDocument !is null); // no more; if it is present, we use it, but it is not required
// reason is so you can create these without needing a reference to the document
}
/**
Turns the whole element, including tag, attributes, and children, into a string which could be pasted into
an XML file.
*/
override string toString() const {
return writeToAppender();
}
/// This is the actual implementation used by toString. You can pass it a preallocated buffer to save some time.
/// Returns the string it creates.
string writeToAppender(Appender!string where = appender!string()) const {
assert(tagName !is null);
where.reserve((this.children.length + 1) * 512);
auto start = where.data.length;
where.put("<");
where.put(tagName);
foreach(n, v ; attributes) {
assert(n !is null);
//assert(v !is null);
where.put(" ");
where.put(n);
where.put("=\"");
htmlEntitiesEncode(v, where);
where.put("\"");
}
if(selfClosed){
where.put(" />");
return where.data[start .. $];
}
where.put('>');
innerHTML(where);
where.put("");
where.put(tagName);
where.put('>');
return where.data[start .. $];
}
/**
Returns a lazy range of all its children, recursively.
*/
@property ElementStream tree() {
return new ElementStream(this);
}
// I moved these from Form because they are generally useful.
// Ideally, I'd put them in arsd.html and use UFCS, but that doesn't work with the opDispatch here.
/// Tags: HTML, HTML5
// FIXME: add overloads for other label types...
Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) {
auto fs = this;
auto i = fs.addChild("label");
if(!(type == "checkbox" || type == "radio"))
i.addChild("span", label);
Element input;
if(type == "textarea")
input = i.addChild("textarea").
setAttribute("name", name).
setAttribute("rows", "6");
else
input = i.addChild("input").
setAttribute("name", name).
setAttribute("type", type);
if(type == "checkbox" || type == "radio")
i.addChild("span", label);
// these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later.
fieldOptions.applyToElement(input);
return i;
}
Element addField(Element label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) {
auto fs = this;
auto i = fs.addChild("label");
i.addChild(label);
Element input;
if(type == "textarea")
input = i.addChild("textarea").
setAttribute("name", name).
setAttribute("rows", "6");
else
input = i.addChild("input").
setAttribute("name", name).
setAttribute("type", type);
// these are html 5 attributes; you'll have to implement fallbacks elsewhere. In Javascript or maybe I'll add a magic thing to html.d later.
fieldOptions.applyToElement(input);
return i;
}
Element addField(string label, string name, FormFieldOptions fieldOptions) {
return addField(label, name, "text", fieldOptions);
}
Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) {
auto fs = this;
auto i = fs.addChild("label");
i.addChild("span", label);
auto sel = i.addChild("select").setAttribute("name", name);
foreach(k, opt; options)
sel.addChild("option", opt, k);
// FIXME: implement requirements somehow
return i;
}
Element addSubmitButton(string label = null) {
auto t = this;
auto holder = t.addChild("div");
holder.addClass("submit-holder");
auto i = holder.addChild("input");
i.type = "submit";
if(label.length)
i.value = label;
return holder;
}
}
///.
class DocumentFragment : Element {
///.
this(Document _parentDocument) {
tagName = "#fragment";
super(_parentDocument);
}
///.
override string writeToAppender(Appender!string where = appender!string()) const {
return this.innerHTML(where);
}
}
/// Given text, encode all html entities on it - &, <, >, and ". This function also
/// encodes all 8 bit characters as entities, thus ensuring the resultant text will work
/// even if your charset isn't set right.
///
/// The output parameter can be given to append to an existing buffer. You don't have to
/// pass one; regardless, the return value will be usable for you, with just the data encoded.
string htmlEntitiesEncode(string data, Appender!string output = appender!string()) {
// if there's no entities, we can save a lot of time by not bothering with the
// decoding loop. This check cuts the net toString time by better than half in my test.
// let me know if it made your tests worse though, since if you use an entity in just about
// every location, the check will add time... but I suspect the average experience is like mine
// since the check gives up as soon as it can anyway.
bool shortcut = true;
foreach(char c; data) {
// non ascii chars are always higher than 127 in utf8; we'd better go to the full decoder if we see it.
if(c == '<' || c == '>' || c == '"' || c == '&' || cast(uint) c > 127) {
shortcut = false; // there's actual work to be done
break;
}
}
if(shortcut) {
output.put(data);
return data;
}
auto start = output.data.length;
output.reserve(data.length + 64); // grab some extra space for the encoded entities
foreach(dchar d; data) {
if(d == '&')
output.put("&");
else if (d == '<')
output.put("<");
else if (d == '>')
output.put(">");
else if (d == '\"')
output.put(""");
// else if (d == '\'')
// output.put("'"); // if you are in an attribute, it might be important to encode for the same reason as double quotes
// FIXME: should I encode apostrophes too? as '... I could also do space but if your html is so bad that it doesn't
// quote attributes at all, maybe you deserve the xss. Encoding spaces will make everything really ugly so meh
// idk about apostrophes though. Might be worth it, might not.
else if (d < 128 && d > 0)
output.put(d);
else
output.put("" ~ std.conv.to!string(cast(int) d) ~ ";");
}
//assert(output !is null); // this fails on empty attributes.....
return output.data[start .. $];
// data = data.replace("\u00a0", " ");
}
/// An alias for htmlEntitiesEncode; it works for xml too
string xmlEntitiesEncode(string data) {
return htmlEntitiesEncode(data);
}
/// This helper function is used for decoding html entities. It has a hard-coded list of entities and characters.
dchar parseEntity(in dchar[] entity) {
switch(entity[1..$-1]) {
case "quot":
return '"';
case "apos":
return '\'';
case "lt":
return '<';
case "gt":
return '>';
case "amp":
return '&';
// the next are html rather than xml
case "Agrave": return '\u00C0';
case "Aacute": return '\u00C1';
case "Acirc": return '\u00C2';
case "Atilde": return '\u00C3';
case "Auml": return '\u00C4';
case "Aring": return '\u00C5';
case "AElig": return '\u00C6';
case "Ccedil": return '\u00C7';
case "Egrave": return '\u00C8';
case "Eacute": return '\u00C9';
case "Ecirc": return '\u00CA';
case "Euml": return '\u00CB';
case "Igrave": return '\u00CC';
case "Iacute": return '\u00CD';
case "Icirc": return '\u00CE';
case "Iuml": return '\u00CF';
case "ETH": return '\u00D0';
case "Ntilde": return '\u00D1';
case "Ograve": return '\u00D2';
case "Oacute": return '\u00D3';
case "Ocirc": return '\u00D4';
case "Otilde": return '\u00D5';
case "Ouml": return '\u00D6';
case "Oslash": return '\u00D8';
case "Ugrave": return '\u00D9';
case "Uacute": return '\u00DA';
case "Ucirc": return '\u00DB';
case "Uuml": return '\u00DC';
case "Yacute": return '\u00DD';
case "THORN": return '\u00DE';
case "szlig": return '\u00DF';
case "agrave": return '\u00E0';
case "aacute": return '\u00E1';
case "acirc": return '\u00E2';
case "atilde": return '\u00E3';
case "auml": return '\u00E4';
case "aring": return '\u00E5';
case "aelig": return '\u00E6';
case "ccedil": return '\u00E7';
case "egrave": return '\u00E8';
case "eacute": return '\u00E9';
case "ecirc": return '\u00EA';
case "euml": return '\u00EB';
case "igrave": return '\u00EC';
case "iacute": return '\u00ED';
case "icirc": return '\u00EE';
case "iuml": return '\u00EF';
case "eth": return '\u00F0';
case "ntilde": return '\u00F1';
case "ograve": return '\u00F2';
case "oacute": return '\u00F3';
case "ocirc": return '\u00F4';
case "otilde": return '\u00F5';
case "ouml": return '\u00F6';
case "oslash": return '\u00F8';
case "ugrave": return '\u00F9';
case "uacute": return '\u00FA';
case "ucirc": return '\u00FB';
case "uuml": return '\u00FC';
case "yacute": return '\u00FD';
case "thorn": return '\u00FE';
case "yuml": return '\u00FF';
case "nbsp": return '\u00A0';
case "iexcl": return '\u00A1';
case "cent": return '\u00A2';
case "pound": return '\u00A3';
case "curren": return '\u00A4';
case "yen": return '\u00A5';
case "brvbar": return '\u00A6';
case "sect": return '\u00A7';
case "uml": return '\u00A8';
case "copy": return '\u00A9';
case "ordf": return '\u00AA';
case "laquo": return '\u00AB';
case "not": return '\u00AC';
case "shy": return '\u00AD';
case "reg": return '\u00AE';
case "ldquo": return '\u201c';
case "rdquo": return '\u201d';
case "macr": return '\u00AF';
case "deg": return '\u00B0';
case "plusmn": return '\u00B1';
case "sup2": return '\u00B2';
case "sup3": return '\u00B3';
case "acute": return '\u00B4';
case "micro": return '\u00B5';
case "para": return '\u00B6';
case "middot": return '\u00B7';
case "cedil": return '\u00B8';
case "sup1": return '\u00B9';
case "ordm": return '\u00BA';
case "raquo": return '\u00BB';
case "frac14": return '\u00BC';
case "frac12": return '\u00BD';
case "frac34": return '\u00BE';
case "iquest": return '\u00BF';
case "times": return '\u00D7';
case "divide": return '\u00F7';
case "OElig": return '\u0152';
case "oelig": return '\u0153';
case "Scaron": return '\u0160';
case "scaron": return '\u0161';
case "Yuml": return '\u0178';
case "fnof": return '\u0192';
case "circ": return '\u02C6';
case "tilde": return '\u02DC';
case "trade": return '\u2122';
case "hellip": return '\u2026';
case "ndash": return '\u2013';
case "mdash": return '\u2014';
case "lsquo": return '\u2018';
case "rsquo": return '\u2019';
case "Omicron": return '\u039f';
case "omicron": return '\u03bf';
// and handling numeric entities
default:
if(entity[1] == '#') {
if(entity[2] == 'x' /*|| (!strict && entity[2] == 'X')*/) {
auto hex = entity[3..$-1];
auto p = intFromHex(to!string(hex).toLower());
return cast(dchar) p;
} else {
auto decimal = entity[2..$-1];
// dealing with broken html entities
while(decimal.length && (decimal[0] < '0' || decimal[0] > '9'))
decimal = decimal[1 .. $];
if(decimal.length == 0)
return ' '; // this is really broken html
// done with dealing with broken stuff
auto p = std.conv.to!int(decimal);
return cast(dchar) p;
}
} else
return '\ufffd'; // replacement character diamond thing
}
assert(0);
}
import std.utf;
import std.stdio;
/// This takes a string of raw HTML and decodes the entities into a nice D utf-8 string.
/// By default, it uses loose mode - it will try to return a useful string from garbage input too.
/// Set the second parameter to true if you'd prefer it to strictly throw exceptions on garbage input.
string htmlEntitiesDecode(string data, bool strict = false) {
// this check makes a *big* difference; about a 50% improvement of parse speed on my test.
if(data.indexOf("&") == -1) // all html entities begin with &
return data; // if there are no entities in here, we can return the original slice and save some time
char[] a; // this seems to do a *better* job than appender!
char[4] buffer;
bool tryingEntity = false;
dchar[] entityBeingTried;
int entityAttemptIndex = 0;
foreach(dchar ch; data) {
if(tryingEntity) {
entityAttemptIndex++;
entityBeingTried ~= ch;
// I saw some crappy html in the wild that looked like &0ї this tries to handle that.
if(ch == '&') {
if(strict)
throw new Exception("unterminated entity; & inside another at " ~ to!string(entityBeingTried));
// if not strict, let's try to parse both.
if(entityBeingTried == "&&")
a ~= "&"; // double amp means keep the first one, still try to parse the next one
else
a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried))];
// tryingEntity is still true
entityBeingTried = entityBeingTried[0 .. 1]; // keep the &
entityAttemptIndex = 0; // restarting o this
} else
if(ch == ';') {
tryingEntity = false;
a ~= buffer[0.. std.utf.encode(buffer, parseEntity(entityBeingTried))];
} else if(ch == ' ') {
// e.g. you & i
if(strict)
throw new Exception("unterminated entity at " ~ to!string(entityBeingTried));
else {
tryingEntity = false;
a ~= to!(char[])(entityBeingTried);
}
} else {
if(entityAttemptIndex >= 9) {
if(strict)
throw new Exception("unterminated entity at " ~ to!string(entityBeingTried));
else {
tryingEntity = false;
a ~= to!(char[])(entityBeingTried);
}
}
}
} else {
if(ch == '&') {
tryingEntity = true;
entityBeingTried = null;
entityBeingTried ~= ch;
entityAttemptIndex = 0;
} else {
a ~= buffer[0 .. std.utf.encode(buffer, ch)];
}
}
}
if(tryingEntity) {
if(strict)
throw new Exception("unterminated entity at " ~ to!string(entityBeingTried));
// otherwise, let's try to recover, at least so we don't drop any data
a ~= to!string(entityBeingTried);
// FIXME: what if we have "cool &"? should we try to parse it?
}
return cast(string) a; // assumeUnique is actually kinda slow, lol
}
abstract class SpecialElement : Element {
this(Document _parentDocument) {
super(_parentDocument);
}
///.
override Element appendChild(Element e) {
assert(0, "Cannot append to a special node");
}
///.
@property override int nodeType() const {
return 100;
}
}
///.
class RawSource : SpecialElement {
///.
this(Document _parentDocument, string s) {
super(_parentDocument);
source = s;
tagName = "#raw";
}
///.
override string nodeValue() const {
return this.toString();
}
///.
override string writeToAppender(Appender!string where = appender!string()) const {
where.put(source);
return source;
}
///.
string source;
}
abstract class ServerSideCode : SpecialElement {
this(Document _parentDocument, string type) {
super(_parentDocument);
tagName = "#" ~ type;
}
///.
override string nodeValue() const {
return this.source;
}
///.
override string writeToAppender(Appender!string where = appender!string()) const {
auto start = where.data.length;
where.put("<");
where.put(source);
where.put(">");
return where.data[start .. $];
}
///.
string source;
}
///.
class PhpCode : ServerSideCode {
///.
this(Document _parentDocument, string s) {
super(_parentDocument, "php");
source = s;
}
}
///.
class AspCode : ServerSideCode {
///.
this(Document _parentDocument, string s) {
super(_parentDocument, "asp");
source = s;
}
}
///.
class BangInstruction : SpecialElement {
///.
this(Document _parentDocument, string s) {
super(_parentDocument);
source = s;
tagName = "#bpi";
}
///.
override string nodeValue() const {
return this.source;
}
///.
override string writeToAppender(Appender!string where = appender!string()) const {
auto start = where.data.length;
where.put("");
return where.data[start .. $];
}
///.
string source;
}
///.
class QuestionInstruction : SpecialElement {
///.
this(Document _parentDocument, string s) {
super(_parentDocument);
source = s;
tagName = "#qpi";
}
///.
override string nodeValue() const {
return this.source;
}
///.
override string writeToAppender(Appender!string where = appender!string()) const {
auto start = where.data.length;
where.put("<");
where.put(source);
where.put(">");
return where.data[start .. $];
}
///.
string source;
}
///.
class HtmlComment : SpecialElement {
///.
this(Document _parentDocument, string s) {
super(_parentDocument);
source = s;
tagName = "#comment";
}
///.
override string nodeValue() const {
return this.source;
}
///.
override string writeToAppender(Appender!string where = appender!string()) const {
auto start = where.data.length;
where.put("");
return where.data[start .. $];
}
///.
string source;
}
///.
class TextNode : Element {
public:
///.
this(Document _parentDocument, string e) {
super(_parentDocument);
contents = e;
tagName = "#text";
}
string opDispatch(string name)(string v = null) if(0) { return null; } // text nodes don't have attributes
///.
static TextNode fromUndecodedString(Document _parentDocument, string html) {
auto e = new TextNode(_parentDocument, "");
e.contents = htmlEntitiesDecode(html, _parentDocument is null ? false : !_parentDocument.loose);
return e;
}
///.
override @property Element cloned() {
auto n = new TextNode(parentDocument, contents);
return n;
}
///.
override string nodeValue() const {
return this.contents; //toString();
}
///.
@property override int nodeType() const {
return NodeType.Text;
}
///.
override string writeToAppender(Appender!string where = appender!string()) const {
string s;
if(contents.length)
s = htmlEntitiesEncode(contents, where);
else
s = "";
assert(s !is null);
return s;
}
///.
override Element appendChild(Element e) {
assert(0, "Cannot append to a text node");
}
///.
string contents;
// alias contents content; // I just mistype this a lot,
}
/**
There are subclasses of Element offering improved helper
functions for the element in HTML.
*/
///.
class Link : Element {
///.
this(Document _parentDocument) {
super(_parentDocument);
this.tagName = "a";
}
///.
this(string href, string text) {
super("a");
setAttribute("href", href);
innerText = text;
}
/+
/// Returns everything in the href EXCEPT the query string
@property string targetSansQuery() {
}
///.
@property string domainName() {
}
///.
@property string path
+/
/// This gets a variable from the URL's query string.
string getValue(string name) {
auto vars = variablesHash();
if(name in vars)
return vars[name];
return null;
}
private string[string] variablesHash() {
string href = getAttribute("href");
if(href is null)
return null;
auto ques = href.indexOf("?");
string str = "";
if(ques != -1) {
str = href[ques+1..$];
auto fragment = str.indexOf("#");
if(fragment != -1)
str = str[0..fragment];
}
string[] variables = str.split("&");
string[string] hash;
foreach(var; variables) {
auto index = var.indexOf("=");
if(index == -1)
hash[var] = "";
else {
hash[decodeComponent(var[0..index])] = decodeComponent(var[index + 1 .. $]);
}
}
return hash;
}
///.
/*private*/ void updateQueryString(string[string] vars) {
string href = getAttribute("href");
auto question = href.indexOf("?");
if(question != -1)
href = href[0..question];
string frag = "";
auto fragment = href.indexOf("#");
if(fragment != -1) {
frag = href[fragment..$];
href = href[0..fragment];
}
string query = "?";
bool first = true;
foreach(name, value; vars) {
if(!first)
query ~= "&";
else
first = false;
query ~= encodeComponent(name);
if(value.length)
query ~= "=" ~ encodeComponent(value);
}
if(query != "?")
href ~= query;
href ~= frag;
setAttribute("href", href);
}
/// Sets or adds the variable with the given name to the given value
/// It automatically URI encodes the values and takes care of the ? and &.
override void setValue(string name, string variable) {
auto vars = variablesHash();
vars[name] = variable;
updateQueryString(vars);
}
/// Removes the given variable from the query string
void removeValue(string name) {
auto vars = variablesHash();
vars.remove(name);
updateQueryString(vars);
}
/*
///.
override string toString() {
}
///.
override string getAttribute(string name) {
if(name == "href") {
} else
return super.getAttribute(name);
}
*/
}
///.
class Form : Element {
///.
this(Document _parentDocument) {
super(_parentDocument);
tagName = "form";
}
override Element addField(string label, string name, string type = "text", FormFieldOptions fieldOptions = FormFieldOptions.none) {
auto t = this.querySelector("fieldset div");
if(t is null)
return super.addField(label, name, type, fieldOptions);
else
return t.addField(label, name, type, fieldOptions);
}
override Element addField(string label, string name, FormFieldOptions fieldOptions) {
auto type = "text";
auto t = this.querySelector("fieldset div");
if(t is null)
return super.addField(label, name, type, fieldOptions);
else
return t.addField(label, name, type, fieldOptions);
}
override Element addField(string label, string name, string[string] options, FormFieldOptions fieldOptions = FormFieldOptions.none) {
auto t = this.querySelector("fieldset div");
if(t is null)
return super.addField(label, name, options, fieldOptions);
else
return t.addField(label, name, options, fieldOptions);
}
override void setValue(string field, string value) {
setValue(field, value, true);
}
// FIXME: doesn't handle arrays; multiple fields can have the same name
/// Set's the form field's value. For input boxes, this sets the value attribute. For
/// textareas, it sets the innerText. For radio boxes and select boxes, it removes
/// the checked/selected attribute from all, and adds it to the one matching the value.
/// For checkboxes, if the value is non-null and not empty, it checks the box.
/// If you set a value that doesn't exist, it throws an exception if makeNew is false.
/// Otherwise, it makes a new input with type=hidden to keep the value.
void setValue(string field, string value, bool makeNew) {
auto eles = getField(field);
if(eles.length == 0) {
if(makeNew) {
addInput(field, value);
return;
} else
throw new Exception("form field does not exist");
}
if(eles.length == 1) {
auto e = eles[0];
switch(e.tagName) {
default: assert(0);
case "textarea":
e.innerText = value;
break;
case "input":
string type = e.getAttribute("type");
if(type is null) {
e.value = value;
return;
}
switch(type) {
case "checkbox":
case "radio":
if(value.length)
e.setAttribute("checked", "checked");
else
e.removeAttribute("checked");
break;
default:
e.value = value;
return;
}
break;
case "select":
bool found = false;
foreach(child; e.tree) {
if(child.tagName != "option")
continue;
string val = child.getAttribute("value");
if(val is null)
val = child.innerText;
if(val == value) {
child.setAttribute("selected", "selected");
found = true;
} else
child.removeAttribute("selected");
}
if(!found) {
e.addChild("option", value)
.setAttribute("selected", "selected");
}
break;
}
} else {
// assume radio boxes
foreach(e; eles) {
string val = e.getAttribute("value");
//if(val is null)
// throw new Exception("don't know what to do with radio boxes with null value");
if(val == value)
e.setAttribute("checked", "checked");
else
e.removeAttribute("checked");
}
}
}
/// This takes an array of strings and adds hidden elements for each one of them. Unlike setValue,
/// it makes no attempt to find and modify existing elements in the form to the new values.
void addValueArray(string key, string[] arrayOfValues) {
foreach(arr; arrayOfValues)
addChild("input", key, arr);
}
/// Gets the value of the field; what would be given if it submitted right now. (so
/// it handles select boxes and radio buttons too). For checkboxes, if a value isn't
/// given, but it is checked, it returns "checked", since null and "" are indistinguishable
string getValue(string field) {
auto eles = getField(field);
if(eles.length == 0)
return "";
if(eles.length == 1) {
auto e = eles[0];
switch(e.tagName) {
default: assert(0);
case "input":
if(e.type == "checkbox") {
if(e.checked)
return e.value.length ? e.value : "checked";
return "";
} else
return e.value;
case "textarea":
return e.innerText;
case "select":
foreach(child; e.tree) {
if(child.tagName != "option")
continue;
if(child.selected)
return child.value;
}
break;
}
} else {
// assuming radio
foreach(e; eles) {
if(e.checked)
return e.value;
}
}
return "";
}
// FIXME: doesn't handle multiple elements with the same name (except radio buttons)
///.
string getPostableData() {
bool[string] namesDone;
string ret;
bool outputted = false;
foreach(e; getElementsBySelector("[name]")) {
if(e.name in namesDone)
continue;
if(outputted)
ret ~= "&";
else
outputted = true;
ret ~= std.uri.encodeComponent(e.name) ~ "=" ~ std.uri.encodeComponent(getValue(e.name));
namesDone[e.name] = true;
}
return ret;
}
/// Gets the actual elements with the given name
Element[] getField(string name) {
Element[] ret;
foreach(e; tree) {
if(e.name == name)
ret ~= e;
}
return ret;
}
/// Grabs the