mirror of https://github.com/adamdruppe/arsd.git
some streaming capability
This commit is contained in:
parent
2b3017f963
commit
5d63348946
186
dom.d
186
dom.d
|
@ -3153,8 +3153,8 @@ class TableCell : Element {
|
||||||
class MarkupException : Exception {
|
class MarkupException : Exception {
|
||||||
|
|
||||||
///.
|
///.
|
||||||
this(string message) {
|
this(string message, string file = __FILE__, size_t line = __LINE__) {
|
||||||
super(message);
|
super(message, file, line);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3249,43 +3249,7 @@ class Document : FileResource {
|
||||||
parse(data, false, false, null);
|
parse(data, false, false, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
Utf8Stream handleDataEncoding(in string rawdata, string dataEncoding, bool strict) {
|
||||||
Take XMLish data and try to make the DOM tree out of it.
|
|
||||||
|
|
||||||
The goal isn't to be perfect, but to just be good enough to
|
|
||||||
approximate Javascript's behavior.
|
|
||||||
|
|
||||||
If strict, it throws on something that doesn't make sense.
|
|
||||||
(Examples: mismatched tags. It doesn't validate!)
|
|
||||||
If not strict, it tries to recover anyway, and only throws
|
|
||||||
when something is REALLY unworkable.
|
|
||||||
|
|
||||||
If strict is false, it uses a magic list of tags that needn't
|
|
||||||
be closed. If you are writing a document specifically for this,
|
|
||||||
try to avoid such - use self closed tags at least. Easier to parse.
|
|
||||||
|
|
||||||
The dataEncoding argument can be used to pass a specific
|
|
||||||
charset encoding for automatic conversion. If null (which is NOT
|
|
||||||
the default!), it tries to determine from the data itself,
|
|
||||||
using the xml prolog or meta tags, and assumes UTF-8 if unsure.
|
|
||||||
|
|
||||||
If this assumption is wrong, it can throw on non-ascii
|
|
||||||
characters!
|
|
||||||
|
|
||||||
|
|
||||||
Note that it previously assumed the data was encoded as UTF-8, which
|
|
||||||
is why the dataEncoding argument defaults to that.
|
|
||||||
|
|
||||||
So it shouldn't break backward compatibility.
|
|
||||||
|
|
||||||
But, if you want the best behavior on wild data - figuring it out from the document
|
|
||||||
instead of assuming - you'll probably want to change that argument to null.
|
|
||||||
|
|
||||||
*/
|
|
||||||
void parse(in string rawdata, bool caseSensitive = false, bool strict = false, string dataEncoding = "UTF-8") {
|
|
||||||
// FIXME: this parser could be faster; it's in the top ten biggest tree times according to the profiler
|
|
||||||
// of my big app.
|
|
||||||
|
|
||||||
// gotta determine the data encoding. If you know it, pass it in above to skip all this.
|
// gotta determine the data encoding. If you know it, pass it in above to skip all this.
|
||||||
if(dataEncoding is null) {
|
if(dataEncoding is null) {
|
||||||
dataEncoding = tryToDetermineEncoding(cast(const(ubyte[])) rawdata);
|
dataEncoding = tryToDetermineEncoding(cast(const(ubyte[])) rawdata);
|
||||||
|
@ -3375,6 +3339,53 @@ class Document : FileResource {
|
||||||
}
|
}
|
||||||
} else
|
} else
|
||||||
data = rawdata;
|
data = rawdata;
|
||||||
|
|
||||||
|
return new Utf8Stream(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
Take XMLish data and try to make the DOM tree out of it.
|
||||||
|
|
||||||
|
The goal isn't to be perfect, but to just be good enough to
|
||||||
|
approximate Javascript's behavior.
|
||||||
|
|
||||||
|
If strict, it throws on something that doesn't make sense.
|
||||||
|
(Examples: mismatched tags. It doesn't validate!)
|
||||||
|
If not strict, it tries to recover anyway, and only throws
|
||||||
|
when something is REALLY unworkable.
|
||||||
|
|
||||||
|
If strict is false, it uses a magic list of tags that needn't
|
||||||
|
be closed. If you are writing a document specifically for this,
|
||||||
|
try to avoid such - use self closed tags at least. Easier to parse.
|
||||||
|
|
||||||
|
The dataEncoding argument can be used to pass a specific
|
||||||
|
charset encoding for automatic conversion. If null (which is NOT
|
||||||
|
the default!), it tries to determine from the data itself,
|
||||||
|
using the xml prolog or meta tags, and assumes UTF-8 if unsure.
|
||||||
|
|
||||||
|
If this assumption is wrong, it can throw on non-ascii
|
||||||
|
characters!
|
||||||
|
|
||||||
|
|
||||||
|
Note that it previously assumed the data was encoded as UTF-8, which
|
||||||
|
is why the dataEncoding argument defaults to that.
|
||||||
|
|
||||||
|
So it shouldn't break backward compatibility.
|
||||||
|
|
||||||
|
But, if you want the best behavior on wild data - figuring it out from the document
|
||||||
|
instead of assuming - you'll probably want to change that argument to null.
|
||||||
|
|
||||||
|
*/
|
||||||
|
void parse(in string rawdata, bool caseSensitive = false, bool strict = false, string dataEncoding = "UTF-8") {
|
||||||
|
auto data = handleDataEncoding(rawdata, dataEncoding, strict);
|
||||||
|
parseStream(data, caseSensitive, strict);
|
||||||
|
}
|
||||||
|
|
||||||
|
// note: this work best in strict mode, unless data is just a simple string wrapper
|
||||||
|
void parseStream(Utf8Stream data, bool caseSensitive = false, bool strict = false) {
|
||||||
|
// FIXME: this parser could be faster; it's in the top ten biggest tree times according to the profiler
|
||||||
|
// of my big app.
|
||||||
|
|
||||||
assert(data !is null);
|
assert(data !is null);
|
||||||
|
|
||||||
// go through character by character.
|
// go through character by character.
|
||||||
|
@ -5469,12 +5480,105 @@ struct FormFieldOptions {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// this needs to look just like a string, but can expand as needed
|
||||||
|
class Utf8Stream {
|
||||||
|
protected:
|
||||||
|
// these two should be overridden in subclasses to actually do the stream magic
|
||||||
|
string getMore() {
|
||||||
|
if(getMoreHelper !is null)
|
||||||
|
return getMoreHelper();
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool hasMore() {
|
||||||
|
if(hasMoreHelper !is null)
|
||||||
|
return hasMoreHelper();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// the rest should be ok
|
||||||
|
|
||||||
|
public:
|
||||||
|
this(string d) {
|
||||||
|
this.data = d;
|
||||||
|
}
|
||||||
|
|
||||||
|
this(string delegate() getMoreHelper, bool delegate() hasMoreHelper) {
|
||||||
|
this.getMoreHelper = getMoreHelper;
|
||||||
|
this.hasMoreHelper = hasMoreHelper;
|
||||||
|
|
||||||
|
if(hasMore())
|
||||||
|
this.data ~= getMore();
|
||||||
|
|
||||||
|
stdout.flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
final size_t length() {
|
||||||
|
// the parser checks length primarily directly before accessing the next character
|
||||||
|
// so this is the place we'll hook to append more if possible and needed.
|
||||||
|
if(lastIdx + 1 >= data.length && hasMore()) {
|
||||||
|
data ~= getMore();
|
||||||
|
}
|
||||||
|
return data.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
final char opIndex(size_t idx) {
|
||||||
|
if(idx > lastIdx)
|
||||||
|
lastIdx = idx;
|
||||||
|
return data[idx];
|
||||||
|
}
|
||||||
|
|
||||||
|
final string opSlice(size_t start, size_t end) {
|
||||||
|
if(end > lastIdx)
|
||||||
|
lastIdx = end;
|
||||||
|
return data[start .. end];
|
||||||
|
}
|
||||||
|
|
||||||
|
final size_t opDollar() {
|
||||||
|
return length();
|
||||||
|
}
|
||||||
|
|
||||||
|
final Utf8Stream opBinary(string op : "~")(string s) {
|
||||||
|
this.data ~= s;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
final Utf8Stream opOpAssign(string op : "~")(string s) {
|
||||||
|
this.data ~= s;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
final Utf8Stream opAssign(string rhs) {
|
||||||
|
this.data = rhs;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
string data;
|
||||||
|
|
||||||
|
size_t lastIdx;
|
||||||
|
|
||||||
|
bool delegate() hasMoreHelper;
|
||||||
|
string delegate() getMoreHelper;
|
||||||
|
|
||||||
|
|
||||||
|
/+
|
||||||
|
// used to maybe clear some old stuff
|
||||||
|
// you might have to remove elements parsed with it too since they can hold slices into the
|
||||||
|
// old stuff, preventing gc
|
||||||
|
void dropFront(int bytes) {
|
||||||
|
posAdjustment += bytes;
|
||||||
|
data = data[bytes .. $];
|
||||||
|
}
|
||||||
|
|
||||||
|
int posAdjustment;
|
||||||
|
+/
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Copyright: Adam D. Ruppe, 2010 - 2012
|
Copyright: Adam D. Ruppe, 2010 - 2013
|
||||||
License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>.
|
License: <a href="http://www.boost.org/LICENSE_1_0.txt">Boost License 1.0</a>.
|
||||||
Authors: Adam D. Ruppe, with contributions by Nick Sabalausky and Trass3r
|
Authors: Adam D. Ruppe, with contributions by Nick Sabalausky and Trass3r
|
||||||
|
|
||||||
Copyright Adam D. Ruppe 2010-2012.
|
Copyright Adam D. Ruppe 2010-2013.
|
||||||
Distributed under the Boost Software License, Version 1.0.
|
Distributed under the Boost Software License, Version 1.0.
|
||||||
(See accompanying file LICENSE_1_0.txt or copy at
|
(See accompanying file LICENSE_1_0.txt or copy at
|
||||||
http://www.boost.org/LICENSE_1_0.txt)
|
http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
|
Loading…
Reference in New Issue