characterencodings.d is now optional again

2015-01-14 11:01:49 -05:00 · 2015-01-14 11:01:49 -05:00 · ba7b65a804
parent e2f3ab92ec
commit ba7b65a804
1 changed files with 57 additions and 11 deletions
--- a/dom.d
+++ b/dom.d
@ -487,6 +487,27 @@ struct ElementCollection {
 		return ec;
 	}
 	/// if you slice it, give the underlying array for easy forwarding of the
 	/// collection to range expecting algorithms or looping over.
 	Element[] opSlice() {
 		return elements;
 	}
 	/// And input range primitives so we can foreach over this
 	void popFront() {
 		elements = elements[1..$];
 	}
 	/// ditto
 	Element front() {
 		return elements[0];
 	}
 	/// ditto
 	bool empty() {
 		return !elements.length;
 	}
 	/// Forward method calls to each individual element of the collection
 	/// returns this so it can be chained.
 	ElementCollection opDispatch(string name, T...)(T t) {
@ -707,8 +728,6 @@ string camelCase(string a) {
 // I need to maintain compatibility with the way it is now too.
 import arsd.characterencodings;
 import std.string;
 import std.exception;
 import std.uri;
@ -1660,7 +1679,7 @@ class Element {
 		}
 		auto doc = new Document();
-		doc.parse("<innerhtml>" ~ html ~ "</innerhtml>", strict, strict); // FIXME: this should preserve the strictness of the parent document
+		doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>", strict, strict); // FIXME: this should preserve the strictness of the parent document
 		children = doc.root.children;
 		foreach(c; children) {
@ -1695,7 +1714,7 @@ class Element {
 	*/
 	@property Element[] outerHTML(string html) {
 		auto doc = new Document();
-		doc.parse("<innerhtml>" ~ html ~ "</innerhtml>"); // FIXME: needs to preserve the strictness
+		doc.parseUtf8("<innerhtml>" ~ html ~ "</innerhtml>"); // FIXME: needs to preserve the strictness
 		children = doc.root.children;
 		foreach(c; children) {
@ -3344,7 +3363,7 @@ struct Html {
 class Document : FileResource {
 	///.
 	this(string data, bool caseSensitive = false, bool strict = false) {
-		parse(data, caseSensitive, strict);
+		parseUtf8(data, caseSensitive, strict);
 	}
 	/**
@ -3451,17 +3470,30 @@ class Document : FileResource {
 	/// (Case-insensitive, non-strict, determine character encoding from the data.)
 	/// NOTE: this makes no attempt at added security.
-	void parseGarbage(string data) {
+	///
 	/// It is a template so it lazily imports characterencodings.
 	void parseGarbage()(string data) {
 		parse(data, false, false, null);
 	}
 	/// Parses well-formed UTF-8, case-sensitive, XML or XHTML
 	/// Will throw exceptions on things like unclosed tags.
 	void parseStrict(string data) {
-		parse(data, true, true);
+		parseStream(toUtf8Stream(data), true, true);
 	}
-	Utf8Stream handleDataEncoding(in string rawdata, string dataEncoding, bool strict) {
+	/// Parses well-formed UTF-8 in loose mode (by default). Tries to correct
 	/// tag soup, but does NOT try to correct bad character encodings.
 	///
 	/// They will still throw an exception.
 	void parseUtf8(string data, bool caseSensitive = false, bool strict = false) {
 		parseStream(toUtf8Stream(data), caseSensitive, strict);
 	}
 	// this is a template so we get lazy import behavior
 	Utf8Stream handleDataEncoding()(in string rawdata, string dataEncoding, bool strict) {
 		static assert(0);
 		import arsd.characterencodings;
 		// gotta determine the data encoding. If you know it, pass it in above to skip all this.
 		if(dataEncoding is null) {
 			dataEncoding = tryToDetermineEncoding(cast(const(ubyte[])) rawdata);
@ -3552,6 +3584,12 @@ class Document : FileResource {
 		} else
 			data = rawdata;
 		return toUtf8Stream(data);
 	}
 	private
 	Utf8Stream toUtf8Stream(in string rawdata) {
 		string data = rawdata;
 		static if(is(Utf8Stream == string))
 			return data;
 		else
@ -3590,8 +3628,16 @@ class Document : FileResource {
 		But, if you want the best behavior on wild data - figuring it out from the document
 		instead of assuming - you'll probably want to change that argument to null.
 		This is a template so it lazily imports arsd.characterencodings, which is required
 		to fix up data encodings.
 		If you are sure the encoding is good, try parseUtf8 or parseStrict to avoid the
 		dependency. If it is data from the Internet though, a random website, the encoding
 		is often a lie. This function, if dataEncoding == null, can correct for that, or
 		you can try parseGarbage. In those cases, arsd.characterencodings is required to
 		compile.
 	*/
-	void parse(in string rawdata, bool caseSensitive = false, bool strict = false, string dataEncoding = "UTF-8") {
+	void parse()(in string rawdata, bool caseSensitive = false, bool strict = false, string dataEncoding = "UTF-8") {
 		auto data = handleDataEncoding(rawdata, dataEncoding, strict);
 		parseStream(data, caseSensitive, strict);
 	}
@ -4294,7 +4340,7 @@ class Document : FileResource {
 			if(strict)
 				assert(0, "empty document should be impossible in strict mode");
 			else
-				parse(`<html><head></head><body></body></html>`); // fill in a dummy document in loose mode since that's what browsers do
+				parseUtf8(`<html><head></head><body></body></html>`); // fill in a dummy document in loose mode since that's what browsers do
 		}
 		if(paragraphHackfixRequired) {
@ -4557,7 +4603,7 @@ class XmlDocument : Document {
 		contentType = "text/xml; charset=utf-8";
 		_prolog = `<?xml version="1.0" encoding="UTF-8"?>` ~ "\n";
-		parse(data, true, true);
+		parseStrict(data);
 	}
 }