mirror of
https://github.com/dlang/phobos.git
synced 2025-04-26 21:22:20 +03:00
2974 lines
82 KiB
D
2974 lines
82 KiB
D
// Written in the D programming language.
|
|
|
|
/**
|
|
$(RED Warning: This module is considered out-dated and not up to Phobos'
|
|
current standards. It will remain until we have a suitable replacement,
|
|
but be aware that it will not remain long term.)
|
|
|
|
Classes and functions for creating and parsing XML
|
|
|
|
The basic architecture of this module is that there are standalone functions,
|
|
classes for constructing an XML document from scratch (Tag, Element and
|
|
Document), and also classes for parsing a pre-existing XML file (ElementParser
|
|
and DocumentParser). The parsing classes <i>may</i> be used to build a
|
|
Document, but that is not their primary purpose. The handling capabilities of
|
|
DocumentParser and ElementParser are sufficiently customizable that you can
|
|
make them do pretty much whatever you want.
|
|
|
|
Example: This example creates a DOM (Document Object Model) tree
|
|
from an XML file.
|
|
------------------------------------------------------------------------------
|
|
import std.xml;
|
|
import std.stdio;
|
|
import std.string;
|
|
import std.file;
|
|
|
|
// books.xml is used in various samples throughout the Microsoft XML Core
|
|
// Services (MSXML) SDK.
|
|
//
|
|
// See http://msdn2.microsoft.com/en-us/library/ms762271(VS.85).aspx
|
|
|
|
void main()
|
|
{
|
|
string s = cast(string)std.file.read("books.xml");
|
|
|
|
// Check for well-formedness
|
|
check(s);
|
|
|
|
// Make a DOM tree
|
|
auto doc = new Document(s);
|
|
|
|
// Plain-print it
|
|
writeln(doc);
|
|
}
|
|
------------------------------------------------------------------------------
|
|
|
|
Example: This example does much the same thing, except that the file is
|
|
deconstructed and reconstructed by hand. This is more work, but the
|
|
techniques involved offer vastly more power.
|
|
------------------------------------------------------------------------------
|
|
import std.xml;
|
|
import std.stdio;
|
|
import std.string;
|
|
|
|
struct Book
|
|
{
|
|
string id;
|
|
string author;
|
|
string title;
|
|
string genre;
|
|
string price;
|
|
string pubDate;
|
|
string description;
|
|
}
|
|
|
|
void main()
|
|
{
|
|
string s = cast(string)std.file.read("books.xml");
|
|
|
|
// Check for well-formedness
|
|
check(s);
|
|
|
|
// Take it apart
|
|
Book[] books;
|
|
|
|
auto xml = new DocumentParser(s);
|
|
xml.onStartTag["book"] = (ElementParser xml)
|
|
{
|
|
Book book;
|
|
book.id = xml.tag.attr["id"];
|
|
|
|
xml.onEndTag["author"] = (in Element e) { book.author = e.text(); };
|
|
xml.onEndTag["title"] = (in Element e) { book.title = e.text(); };
|
|
xml.onEndTag["genre"] = (in Element e) { book.genre = e.text(); };
|
|
xml.onEndTag["price"] = (in Element e) { book.price = e.text(); };
|
|
xml.onEndTag["publish-date"] = (in Element e) { book.pubDate = e.text(); };
|
|
xml.onEndTag["description"] = (in Element e) { book.description = e.text(); };
|
|
|
|
xml.parse();
|
|
|
|
books ~= book;
|
|
};
|
|
xml.parse();
|
|
|
|
// Put it back together again;
|
|
auto doc = new Document(new Tag("catalog"));
|
|
foreach(book;books)
|
|
{
|
|
auto element = new Element("book");
|
|
element.tag.attr["id"] = book.id;
|
|
|
|
element ~= new Element("author", book.author);
|
|
element ~= new Element("title", book.title);
|
|
element ~= new Element("genre", book.genre);
|
|
element ~= new Element("price", book.price);
|
|
element ~= new Element("publish-date",book.pubDate);
|
|
element ~= new Element("description", book.description);
|
|
|
|
doc ~= element;
|
|
}
|
|
|
|
// Pretty-print it
|
|
writefln(join(doc.pretty(3),"\n"));
|
|
}
|
|
-------------------------------------------------------------------------------
|
|
Macros:
|
|
WIKI=Phobos/StdXml
|
|
|
|
Copyright: Copyright Janice Caron 2008 - 2009.
|
|
License: $(WEB www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
|
|
Authors: Janice Caron
|
|
Source: $(PHOBOSSRC std/_xml.d)
|
|
*/
|
|
/*
|
|
Copyright Janice Caron 2008 - 2009.
|
|
Distributed under the Boost Software License, Version 1.0.
|
|
(See accompanying file LICENSE_1_0.txt or copy at
|
|
http://www.boost.org/LICENSE_1_0.txt)
|
|
*/
|
|
module std.xml;
|
|
|
|
import std.algorithm : count, startsWith;
|
|
import std.array;
|
|
import std.ascii;
|
|
import std.string;
|
|
import std.encoding;
|
|
|
|
enum cdata = "<![CDATA[";
|
|
|
|
/**
|
|
* Returns true if the character is a character according to the XML standard
|
|
*
|
|
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
|
|
*
|
|
* Params:
|
|
* c = the character to be tested
|
|
*/
|
|
bool isChar(dchar c) // rule 2
|
|
{
|
|
if (c <= 0xD7FF)
|
|
{
|
|
if (c >= 0x20)
|
|
return true;
|
|
switch(c)
|
|
{
|
|
case 0xA:
|
|
case 0x9:
|
|
case 0xD:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
else if (0xE000 <= c && c <= 0x10FFFF)
|
|
{
|
|
if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
// const CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD,
|
|
// 0x10000,0x10FFFF];
|
|
assert(!isChar(cast(dchar)0x8));
|
|
assert( isChar(cast(dchar)0x9));
|
|
assert( isChar(cast(dchar)0xA));
|
|
assert(!isChar(cast(dchar)0xB));
|
|
assert(!isChar(cast(dchar)0xC));
|
|
assert( isChar(cast(dchar)0xD));
|
|
assert(!isChar(cast(dchar)0xE));
|
|
assert(!isChar(cast(dchar)0x1F));
|
|
assert( isChar(cast(dchar)0x20));
|
|
assert( isChar('J'));
|
|
assert( isChar(cast(dchar)0xD7FF));
|
|
assert(!isChar(cast(dchar)0xD800));
|
|
assert(!isChar(cast(dchar)0xDFFF));
|
|
assert( isChar(cast(dchar)0xE000));
|
|
assert( isChar(cast(dchar)0xFFFD));
|
|
assert(!isChar(cast(dchar)0xFFFE));
|
|
assert(!isChar(cast(dchar)0xFFFF));
|
|
assert( isChar(cast(dchar)0x10000));
|
|
assert( isChar(cast(dchar)0x10FFFF));
|
|
assert(!isChar(cast(dchar)0x110000));
|
|
|
|
debug (stdxml_TestHardcodedChecks)
|
|
{
|
|
foreach (c; 0 .. dchar.max + 1)
|
|
assert(isChar(c) == lookup(CharTable, c));
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns true if the character is whitespace according to the XML standard
|
|
*
|
|
* Only the following characters are considered whitespace in XML - space, tab,
|
|
* carriage return and linefeed
|
|
*
|
|
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
|
|
*
|
|
* Params:
|
|
* c = the character to be tested
|
|
*/
|
|
bool isSpace(dchar c)
|
|
{
|
|
return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D';
|
|
}
|
|
|
|
/**
|
|
* Returns true if the character is a digit according to the XML standard
|
|
*
|
|
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
|
|
*
|
|
* Params:
|
|
* c = the character to be tested
|
|
*/
|
|
bool isDigit(dchar c)
|
|
{
|
|
if (c <= 0x0039 && c >= 0x0030)
|
|
return true;
|
|
else
|
|
return lookup(DigitTable,c);
|
|
}
|
|
|
|
unittest
|
|
{
|
|
debug (stdxml_TestHardcodedChecks)
|
|
{
|
|
foreach (c; 0 .. dchar.max + 1)
|
|
assert(isDigit(c) == lookup(DigitTable, c));
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns true if the character is a letter according to the XML standard
|
|
*
|
|
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
|
|
*
|
|
* Params:
|
|
* c = the character to be tested
|
|
*/
|
|
bool isLetter(dchar c) // rule 84
|
|
{
|
|
return isIdeographic(c) || isBaseChar(c);
|
|
}
|
|
|
|
/**
|
|
* Returns true if the character is an ideographic character according to the
|
|
* XML standard
|
|
*
|
|
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
|
|
*
|
|
* Params:
|
|
* c = the character to be tested
|
|
*/
|
|
bool isIdeographic(dchar c)
|
|
{
|
|
if (c == 0x3007)
|
|
return true;
|
|
if (c <= 0x3029 && c >= 0x3021 )
|
|
return true;
|
|
if (c <= 0x9FA5 && c >= 0x4E00)
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
assert(isIdeographic('\u4E00'));
|
|
assert(isIdeographic('\u9FA5'));
|
|
assert(isIdeographic('\u3007'));
|
|
assert(isIdeographic('\u3021'));
|
|
assert(isIdeographic('\u3029'));
|
|
|
|
debug (stdxml_TestHardcodedChecks)
|
|
{
|
|
foreach (c; 0 .. dchar.max + 1)
|
|
assert(isIdeographic(c) == lookup(IdeographicTable, c));
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns true if the character is a base character according to the XML
|
|
* standard
|
|
*
|
|
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
|
|
*
|
|
* Params:
|
|
* c = the character to be tested
|
|
*/
|
|
bool isBaseChar(dchar c)
|
|
{
|
|
return lookup(BaseCharTable,c);
|
|
}
|
|
|
|
/**
|
|
* Returns true if the character is a combining character according to the
|
|
* XML standard
|
|
*
|
|
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
|
|
*
|
|
* Params:
|
|
* c = the character to be tested
|
|
*/
|
|
bool isCombiningChar(dchar c)
|
|
{
|
|
return lookup(CombiningCharTable,c);
|
|
}
|
|
|
|
/**
|
|
* Returns true if the character is an extender according to the XML standard
|
|
*
|
|
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
|
|
*
|
|
* Params:
|
|
* c = the character to be tested
|
|
*/
|
|
bool isExtender(dchar c)
|
|
{
|
|
return lookup(ExtenderTable,c);
|
|
}
|
|
|
|
/**
|
|
* Encodes a string by replacing all characters which need to be escaped with
|
|
* appropriate predefined XML entities.
|
|
*
|
|
* encode() escapes certain characters (ampersand, quote, apostrophe, less-than
|
|
* and greater-than), and similarly, decode() unescapes them. These functions
|
|
* are provided for convenience only. You do not need to use them when using
|
|
* the std.xml classes, because then all the encoding and decoding will be done
|
|
* for you automatically.
|
|
*
|
|
* If the string is not modified, the original will be returned.
|
|
*
|
|
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
|
|
*
|
|
* Params:
|
|
* s = The string to be encoded
|
|
*
|
|
* Returns: The encoded string
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* writefln(encode("a > b")); // writes "a > b"
|
|
* --------------
|
|
*/
|
|
S encode(S)(S s)
|
|
{
|
|
string r;
|
|
size_t lastI;
|
|
auto result = appender!S();
|
|
|
|
foreach (i, c; s)
|
|
{
|
|
switch (c)
|
|
{
|
|
case '&': r = "&"; break;
|
|
case '"': r = """; break;
|
|
case '\'': r = "'"; break;
|
|
case '<': r = "<"; break;
|
|
case '>': r = ">"; break;
|
|
default: continue;
|
|
}
|
|
// Replace with r
|
|
result.put(s[lastI .. i]);
|
|
result.put(r);
|
|
lastI = i + 1;
|
|
}
|
|
|
|
if (!result.data.ptr) return s;
|
|
result.put(s[lastI .. $]);
|
|
return result.data;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
auto s = "hello";
|
|
assert(encode(s) is s);
|
|
assert(encode("a > b") == "a > b", encode("a > b"));
|
|
assert(encode("a < b") == "a < b");
|
|
assert(encode("don't") == "don't");
|
|
assert(encode("\"hi\"") == ""hi"", encode("\"hi\""));
|
|
assert(encode("cat & dog") == "cat & dog");
|
|
}
|
|
|
|
/**
|
|
* Mode to use for decoding.
|
|
*
|
|
* $(DDOC_ENUM_MEMBERS NONE) Do not decode
|
|
* $(DDOC_ENUM_MEMBERS LOOSE) Decode, but ignore errors
|
|
* $(DDOC_ENUM_MEMBERS STRICT) Decode, and throw exception on error
|
|
*/
|
|
enum DecodeMode
|
|
{
|
|
NONE, LOOSE, STRICT
|
|
}
|
|
|
|
/**
|
|
* Decodes a string by unescaping all predefined XML entities.
|
|
*
|
|
* encode() escapes certain characters (ampersand, quote, apostrophe, less-than
|
|
* and greater-than), and similarly, decode() unescapes them. These functions
|
|
* are provided for convenience only. You do not need to use them when using
|
|
* the std.xml classes, because then all the encoding and decoding will be done
|
|
* for you automatically.
|
|
*
|
|
* This function decodes the entities &amp;, &quot;, &apos;,
|
|
* &lt; and &gt,
|
|
* as well as decimal and hexadecimal entities such as &#x20AC;
|
|
*
|
|
* If the string does not contain an ampersand, the original will be returned.
|
|
*
|
|
* Note that the "mode" parameter can be one of DecodeMode.NONE (do not
|
|
* decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT
|
|
* (decode, and throw a DecodeException in the event of an error).
|
|
*
|
|
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
|
|
*
|
|
* Params:
|
|
* s = The string to be decoded
|
|
* mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
|
|
*
|
|
* Throws: DecodeException if mode == DecodeMode.STRICT and decode fails
|
|
*
|
|
* Returns: The decoded string
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* writefln(decode("a > b")); // writes "a > b"
|
|
* --------------
|
|
*/
|
|
string decode(string s, DecodeMode mode=DecodeMode.LOOSE)
|
|
{
|
|
import std.utf : encode;
|
|
|
|
if (mode == DecodeMode.NONE) return s;
|
|
|
|
char[] buffer;
|
|
foreach (ref i; 0 .. s.length)
|
|
{
|
|
char c = s[i];
|
|
if (c != '&')
|
|
{
|
|
if (buffer.length != 0) buffer ~= c;
|
|
}
|
|
else
|
|
{
|
|
if (buffer.length == 0)
|
|
{
|
|
buffer = s[0 .. i].dup;
|
|
}
|
|
if (startsWith(s[i..$],"&#"))
|
|
{
|
|
try
|
|
{
|
|
dchar d;
|
|
string t = s[i..$];
|
|
checkCharRef(t, d);
|
|
char[4] temp;
|
|
buffer ~= temp[0 .. std.utf.encode(temp, d)];
|
|
i = s.length - t.length - 1;
|
|
}
|
|
catch(Err e)
|
|
{
|
|
if (mode == DecodeMode.STRICT)
|
|
throw new DecodeException("Unescaped &");
|
|
buffer ~= '&';
|
|
}
|
|
}
|
|
else if (startsWith(s[i..$],"&" )) { buffer ~= '&'; i += 4; }
|
|
else if (startsWith(s[i..$],""")) { buffer ~= '"'; i += 5; }
|
|
else if (startsWith(s[i..$],"'")) { buffer ~= '\''; i += 5; }
|
|
else if (startsWith(s[i..$],"<" )) { buffer ~= '<'; i += 3; }
|
|
else if (startsWith(s[i..$],">" )) { buffer ~= '>'; i += 3; }
|
|
else
|
|
{
|
|
if (mode == DecodeMode.STRICT)
|
|
throw new DecodeException("Unescaped &");
|
|
buffer ~= '&';
|
|
}
|
|
}
|
|
}
|
|
return (buffer.length == 0) ? s : cast(string)buffer;
|
|
}
|
|
|
|
unittest
|
|
{
|
|
void assertNot(string s)
|
|
{
|
|
bool b = false;
|
|
try { decode(s,DecodeMode.STRICT); }
|
|
catch (DecodeException e) { b = true; }
|
|
assert(b,s);
|
|
}
|
|
|
|
// Assert that things that should work, do
|
|
auto s = "hello";
|
|
assert(decode(s, DecodeMode.STRICT) is s);
|
|
assert(decode("a > b", DecodeMode.STRICT) == "a > b");
|
|
assert(decode("a < b", DecodeMode.STRICT) == "a < b");
|
|
assert(decode("don't", DecodeMode.STRICT) == "don't");
|
|
assert(decode(""hi"", DecodeMode.STRICT) == "\"hi\"");
|
|
assert(decode("cat & dog", DecodeMode.STRICT) == "cat & dog");
|
|
assert(decode("*", DecodeMode.STRICT) == "*");
|
|
assert(decode("*", DecodeMode.STRICT) == "*");
|
|
assert(decode("cat & dog", DecodeMode.LOOSE) == "cat & dog");
|
|
assert(decode("a > b", DecodeMode.LOOSE) == "a > b");
|
|
assert(decode("&#;", DecodeMode.LOOSE) == "&#;");
|
|
assert(decode("&#x;", DecodeMode.LOOSE) == "&#x;");
|
|
assert(decode("G;", DecodeMode.LOOSE) == "G;");
|
|
assert(decode("G;", DecodeMode.LOOSE) == "G;");
|
|
|
|
// Assert that things that shouldn't work, don't
|
|
assertNot("cat & dog");
|
|
assertNot("a > b");
|
|
assertNot("&#;");
|
|
assertNot("&#x;");
|
|
assertNot("G;");
|
|
assertNot("G;");
|
|
}
|
|
|
|
/**
|
|
* Class representing an XML document.
|
|
*
|
|
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
|
|
*
|
|
*/
|
|
class Document : Element
|
|
{
|
|
/**
|
|
* Contains all text which occurs before the root element.
|
|
* Defaults to <?xml version="1.0"?>
|
|
*/
|
|
string prolog = "<?xml version=\"1.0\"?>";
|
|
/**
|
|
* Contains all text which occurs after the root element.
|
|
* Defaults to the empty string
|
|
*/
|
|
string epilog;
|
|
|
|
/**
|
|
* Constructs a Document by parsing XML text.
|
|
*
|
|
* This function creates a complete DOM (Document Object Model) tree.
|
|
*
|
|
* The input to this function MUST be valid XML.
|
|
* This is enforced by DocumentParser's in contract.
|
|
*
|
|
* Params:
|
|
* s = the complete XML text.
|
|
*/
|
|
this(string s)
|
|
in
|
|
{
|
|
assert(s.length != 0);
|
|
}
|
|
body
|
|
{
|
|
auto xml = new DocumentParser(s);
|
|
string tagString = xml.tag.tagString;
|
|
|
|
this(xml.tag);
|
|
prolog = s[0 .. tagString.ptr - s.ptr];
|
|
parse(xml);
|
|
epilog = *xml.s;
|
|
}
|
|
|
|
/**
|
|
* Constructs a Document from a Tag.
|
|
*
|
|
* Params:
|
|
* tag = the start tag of the document.
|
|
*/
|
|
this(const(Tag) tag)
|
|
{
|
|
super(tag);
|
|
}
|
|
|
|
const
|
|
{
|
|
/**
|
|
* Compares two Documents for equality
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* Document d1,d2;
|
|
* if (d1 == d2) { }
|
|
* --------------
|
|
*/
|
|
override bool opEquals(Object o)
|
|
{
|
|
const doc = toType!(const Document)(o);
|
|
return
|
|
(prolog != doc.prolog ) ? false : (
|
|
(super != cast(const Element)doc) ? false : (
|
|
(epilog != doc.epilog ) ? false : (
|
|
true )));
|
|
}
|
|
|
|
/**
|
|
* Compares two Documents
|
|
*
|
|
* You should rarely need to call this function. It exists so that
|
|
* Documents can be used as associative array keys.
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* Document d1,d2;
|
|
* if (d1 < d2) { }
|
|
* --------------
|
|
*/
|
|
override int opCmp(Object o)
|
|
{
|
|
const doc = toType!(const Document)(o);
|
|
return
|
|
((prolog != doc.prolog )
|
|
? ( prolog < doc.prolog ? -1 : 1 ) :
|
|
((super != cast(const Element)doc)
|
|
? ( cast()super < cast()cast(const Element)doc ? -1 : 1 ) :
|
|
((epilog != doc.epilog )
|
|
? ( epilog < doc.epilog ? -1 : 1 ) :
|
|
0 )));
|
|
}
|
|
|
|
/**
|
|
* Returns the hash of a Document
|
|
*
|
|
* You should rarely need to call this function. It exists so that
|
|
* Documents can be used as associative array keys.
|
|
*/
|
|
override size_t toHash() @trusted
|
|
{
|
|
return hash(prolog, hash(epilog, (cast()super).toHash()));
|
|
}
|
|
|
|
/**
|
|
* Returns the string representation of a Document. (That is, the
|
|
* complete XML of a document).
|
|
*/
|
|
override string toString()
|
|
{
|
|
return prolog ~ super.toString() ~ epilog;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Class representing an XML element.
|
|
*
|
|
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
|
|
*/
|
|
class Element : Item
|
|
{
|
|
Tag tag; /// The start tag of the element
|
|
Item[] items; /// The element's items
|
|
Text[] texts; /// The element's text items
|
|
CData[] cdatas; /// The element's CData items
|
|
Comment[] comments; /// The element's comments
|
|
ProcessingInstruction[] pis; /// The element's processing instructions
|
|
Element[] elements; /// The element's child elements
|
|
|
|
/**
|
|
* Constructs an Element given a name and a string to be used as a Text
|
|
* interior.
|
|
*
|
|
* Params:
|
|
* name = the name of the element.
|
|
* interior = (optional) the string interior.
|
|
*
|
|
* Examples:
|
|
* -------------------------------------------------------
|
|
* auto element = new Element("title","Serenity")
|
|
* // constructs the element <title>Serenity</title>
|
|
* -------------------------------------------------------
|
|
*/
|
|
this(string name, string interior=null)
|
|
{
|
|
this(new Tag(name));
|
|
if (interior.length != 0) opCatAssign(new Text(interior));
|
|
}
|
|
|
|
/**
|
|
* Constructs an Element from a Tag.
|
|
*
|
|
* Params:
|
|
* tag_ = the start or empty tag of the element.
|
|
*/
|
|
this(const(Tag) tag_)
|
|
{
|
|
this.tag = new Tag(tag_.name);
|
|
tag.type = TagType.EMPTY;
|
|
foreach(k,v;tag_.attr) tag.attr[k] = v;
|
|
tag.tagString = tag_.tagString;
|
|
}
|
|
|
|
/**
|
|
* Append a text item to the interior of this element
|
|
*
|
|
* Params:
|
|
* item = the item you wish to append.
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* Element element;
|
|
* element ~= new Text("hello");
|
|
* --------------
|
|
*/
|
|
void opCatAssign(Text item)
|
|
{
|
|
texts ~= item;
|
|
appendItem(item);
|
|
}
|
|
|
|
/**
|
|
* Append a CData item to the interior of this element
|
|
*
|
|
* Params:
|
|
* item = the item you wish to append.
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* Element element;
|
|
* element ~= new CData("hello");
|
|
* --------------
|
|
*/
|
|
void opCatAssign(CData item)
|
|
{
|
|
cdatas ~= item;
|
|
appendItem(item);
|
|
}
|
|
|
|
/**
|
|
* Append a comment to the interior of this element
|
|
*
|
|
* Params:
|
|
* item = the item you wish to append.
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* Element element;
|
|
* element ~= new Comment("hello");
|
|
* --------------
|
|
*/
|
|
void opCatAssign(Comment item)
|
|
{
|
|
comments ~= item;
|
|
appendItem(item);
|
|
}
|
|
|
|
/**
|
|
* Append a processing instruction to the interior of this element
|
|
*
|
|
* Params:
|
|
* item = the item you wish to append.
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* Element element;
|
|
* element ~= new ProcessingInstruction("hello");
|
|
* --------------
|
|
*/
|
|
void opCatAssign(ProcessingInstruction item)
|
|
{
|
|
pis ~= item;
|
|
appendItem(item);
|
|
}
|
|
|
|
/**
|
|
* Append a complete element to the interior of this element
|
|
*
|
|
* Params:
|
|
* item = the item you wish to append.
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* Element element;
|
|
* Element other = new Element("br");
|
|
* element ~= other;
|
|
* // appends element representing <br />
|
|
* --------------
|
|
*/
|
|
void opCatAssign(Element item)
|
|
{
|
|
elements ~= item;
|
|
appendItem(item);
|
|
}
|
|
|
|
private void appendItem(Item item)
|
|
{
|
|
items ~= item;
|
|
if (tag.type == TagType.EMPTY && !item.isEmptyXML)
|
|
tag.type = TagType.START;
|
|
}
|
|
|
|
private void parse(ElementParser xml)
|
|
{
|
|
xml.onText = (string s) { opCatAssign(new Text(s)); };
|
|
xml.onCData = (string s) { opCatAssign(new CData(s)); };
|
|
xml.onComment = (string s) { opCatAssign(new Comment(s)); };
|
|
xml.onPI = (string s) { opCatAssign(new ProcessingInstruction(s)); };
|
|
|
|
xml.onStartTag[null] = (ElementParser xml)
|
|
{
|
|
auto e = new Element(xml.tag);
|
|
e.parse(xml);
|
|
opCatAssign(e);
|
|
};
|
|
|
|
xml.parse();
|
|
}
|
|
|
|
/**
|
|
* Compares two Elements for equality
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* Element e1,e2;
|
|
* if (e1 == e2) { }
|
|
* --------------
|
|
*/
|
|
override bool opEquals(Object o)
|
|
{
|
|
const element = toType!(const Element)(o);
|
|
auto len = items.length;
|
|
if (len != element.items.length) return false;
|
|
foreach (i; 0 .. len)
|
|
{
|
|
if (!items[i].opEquals(cast()element.items[i])) return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Compares two Elements
|
|
*
|
|
* You should rarely need to call this function. It exists so that Elements
|
|
* can be used as associative array keys.
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* Element e1,e2;
|
|
* if (e1 < e2) { }
|
|
* --------------
|
|
*/
|
|
override int opCmp(Object o)
|
|
{
|
|
const element = toType!(const Element)(o);
|
|
for (uint i=0; ; ++i)
|
|
{
|
|
if (i == items.length && i == element.items.length) return 0;
|
|
if (i == items.length) return -1;
|
|
if (i == element.items.length) return 1;
|
|
if (items[i] != element.items[i])
|
|
return items[i].opCmp(cast()element.items[i]);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns the hash of an Element
|
|
*
|
|
* You should rarely need to call this function. It exists so that Elements
|
|
* can be used as associative array keys.
|
|
*/
|
|
override size_t toHash() const
|
|
{
|
|
size_t hash = tag.toHash();
|
|
foreach(item;items) hash += item.toHash();
|
|
return hash;
|
|
}
|
|
|
|
const
|
|
{
|
|
/**
|
|
* Returns the decoded interior of an element.
|
|
*
|
|
* The element is assumed to contain text <i>only</i>. So, for
|
|
* example, given XML such as "<title>Good &amp;
|
|
* Bad</title>", will return "Good & Bad".
|
|
*
|
|
* Params:
|
|
* mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
|
|
*
|
|
* Throws: DecodeException if decode fails
|
|
*/
|
|
string text(DecodeMode mode=DecodeMode.LOOSE)
|
|
{
|
|
string buffer;
|
|
foreach(item;items)
|
|
{
|
|
Text t = cast(Text)item;
|
|
if (t is null) throw new DecodeException(item.toString());
|
|
buffer ~= decode(t.toString(),mode);
|
|
}
|
|
return buffer;
|
|
}
|
|
|
|
/**
|
|
* Returns an indented string representation of this item
|
|
*
|
|
* Params:
|
|
* indent = (optional) number of spaces by which to indent this
|
|
* element. Defaults to 2.
|
|
*/
|
|
override string[] pretty(uint indent=2)
|
|
{
|
|
|
|
if (isEmptyXML) return [ tag.toEmptyString() ];
|
|
|
|
if (items.length == 1)
|
|
{
|
|
Text t = cast(Text)(items[0]);
|
|
if (t !is null)
|
|
{
|
|
return [tag.toStartString() ~ t.toString() ~ tag.toEndString()];
|
|
}
|
|
}
|
|
|
|
string[] a = [ tag.toStartString() ];
|
|
foreach(item;items)
|
|
{
|
|
string[] b = item.pretty(indent);
|
|
foreach(s;b)
|
|
{
|
|
a ~= rightJustify(s,count(s) + indent);
|
|
}
|
|
}
|
|
a ~= tag.toEndString();
|
|
return a;
|
|
}
|
|
|
|
/**
|
|
* Returns the string representation of an Element
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* auto element = new Element("br");
|
|
* writefln(element.toString()); // writes "<br />"
|
|
* --------------
|
|
*/
|
|
override string toString()
|
|
{
|
|
if (isEmptyXML) return tag.toEmptyString();
|
|
|
|
string buffer = tag.toStartString();
|
|
foreach (item;items) { buffer ~= item.toString(); }
|
|
buffer ~= tag.toEndString();
|
|
return buffer;
|
|
}
|
|
|
|
override @property bool isEmptyXML() { return items.length == 0; }
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Tag types.
|
|
*
|
|
* $(DDOC_ENUM_MEMBERS START) Used for start tags
|
|
* $(DDOC_ENUM_MEMBERS END) Used for end tags
|
|
* $(DDOC_ENUM_MEMBERS EMPTY) Used for empty tags
|
|
*
|
|
*/
|
|
enum TagType { START, END, EMPTY }
|
|
|
|
/**
|
|
* Class representing an XML tag.
|
|
*
|
|
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
|
|
*
|
|
* The class invariant guarantees
|
|
* <ul>
|
|
* <li> that $(B type) is a valid enum TagType value</li>
|
|
* <li> that $(B name) consists of valid characters</li>
|
|
* <li> that each attribute name consists of valid characters</li>
|
|
* </ul>
|
|
*/
|
|
class Tag
|
|
{
|
|
TagType type = TagType.START; /// Type of tag
|
|
string name; /// Tag name
|
|
string[string] attr; /// Associative array of attributes
|
|
private string tagString;
|
|
|
|
invariant()
|
|
{
|
|
string s;
|
|
string t;
|
|
|
|
assert(type == TagType.START
|
|
|| type == TagType.END
|
|
|| type == TagType.EMPTY);
|
|
|
|
s = name;
|
|
try { checkName(s,t); }
|
|
catch(Err e) { assert(false,"Invalid tag name:" ~ e.toString()); }
|
|
|
|
foreach(k,v;attr)
|
|
{
|
|
s = k;
|
|
try { checkName(s,t); }
|
|
catch(Err e)
|
|
{ assert(false,"Invalid atrribute name:" ~ e.toString()); }
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Constructs an instance of Tag with a specified name and type
|
|
*
|
|
* The constructor does not initialize the attributes. To initialize the
|
|
* attributes, you access the $(B attr) member variable.
|
|
*
|
|
* Params:
|
|
* name = the Tag's name
|
|
* type = (optional) the Tag's type. If omitted, defaults to
|
|
* TagType.START.
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* auto tag = new Tag("img",Tag.EMPTY);
|
|
* tag.attr["src"] = "http://example.com/example.jpg";
|
|
* --------------
|
|
*/
|
|
this(string name, TagType type=TagType.START)
|
|
{
|
|
this.name = name;
|
|
this.type = type;
|
|
}
|
|
|
|
/* Private constructor (so don't ddoc this!)
|
|
*
|
|
* Constructs a Tag by parsing the string representation, e.g. "<html>".
|
|
*
|
|
* The string is passed by reference, and is advanced over all characters
|
|
* consumed.
|
|
*
|
|
* The second parameter is a dummy parameter only, required solely to
|
|
* distinguish this constructor from the public one.
|
|
*/
|
|
private this(ref string s, bool dummy)
|
|
{
|
|
tagString = s;
|
|
try
|
|
{
|
|
reqc(s,'<');
|
|
if (optc(s,'/')) type = TagType.END;
|
|
name = munch(s,"^/>"~whitespace);
|
|
munch(s,whitespace);
|
|
while(s.length > 0 && s[0] != '>' && s[0] != '/')
|
|
{
|
|
string key = munch(s,"^="~whitespace);
|
|
munch(s,whitespace);
|
|
reqc(s,'=');
|
|
munch(s,whitespace);
|
|
reqc(s,'"');
|
|
string val = decode(munch(s,"^\""), DecodeMode.LOOSE);
|
|
reqc(s,'"');
|
|
munch(s,whitespace);
|
|
attr[key] = val;
|
|
}
|
|
if (optc(s,'/'))
|
|
{
|
|
if (type == TagType.END) throw new TagException("");
|
|
type = TagType.EMPTY;
|
|
}
|
|
reqc(s,'>');
|
|
tagString.length = (s.ptr - tagString.ptr);
|
|
}
|
|
catch(XMLException e)
|
|
{
|
|
tagString.length = (s.ptr - tagString.ptr);
|
|
throw new TagException(tagString);
|
|
}
|
|
}
|
|
|
|
const
|
|
{
|
|
/**
|
|
* Compares two Tags for equality
|
|
*
|
|
* You should rarely need to call this function. It exists so that Tags
|
|
* can be used as associative array keys.
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* Tag tag1,tag2
|
|
* if (tag1 == tag2) { }
|
|
* --------------
|
|
*/
|
|
override bool opEquals(Object o)
|
|
{
|
|
const tag = toType!(const Tag)(o);
|
|
return
|
|
(name != tag.name) ? false : (
|
|
(attr != tag.attr) ? false : (
|
|
(type != tag.type) ? false : (
|
|
true )));
|
|
}
|
|
|
|
/**
|
|
* Compares two Tags
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* Tag tag1,tag2
|
|
* if (tag1 < tag2) { }
|
|
* --------------
|
|
*/
|
|
override int opCmp(Object o)
|
|
{
|
|
const tag = toType!(const Tag)(o);
|
|
// Note that attr is an AA, so the comparison is nonsensical (bug 10381)
|
|
return
|
|
((name != tag.name) ? ( name < tag.name ? -1 : 1 ) :
|
|
((attr != tag.attr) ? ( cast(void *)attr < cast(void*)tag.attr ? -1 : 1 ) :
|
|
((type != tag.type) ? ( type < tag.type ? -1 : 1 ) :
|
|
0 )));
|
|
}
|
|
|
|
/**
|
|
* Returns the hash of a Tag
|
|
*
|
|
* You should rarely need to call this function. It exists so that Tags
|
|
* can be used as associative array keys.
|
|
*/
|
|
override size_t toHash()
|
|
{
|
|
return typeid(name).getHash(&name);
|
|
}
|
|
|
|
/**
|
|
* Returns the string representation of a Tag
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* auto tag = new Tag("book",TagType.START);
|
|
* writefln(tag.toString()); // writes "<book>"
|
|
* --------------
|
|
*/
|
|
override string toString()
|
|
{
|
|
if (isEmpty) return toEmptyString();
|
|
return (isEnd) ? toEndString() : toStartString();
|
|
}
|
|
|
|
private
|
|
{
|
|
string toNonEndString()
|
|
{
|
|
string s = "<" ~ name;
|
|
foreach(key,val;attr)
|
|
s ~= format(" %s=\"%s\"",key,encode(val));
|
|
return s;
|
|
}
|
|
|
|
string toStartString() { return toNonEndString() ~ ">"; }
|
|
|
|
string toEndString() { return "</" ~ name ~ ">"; }
|
|
|
|
string toEmptyString() { return toNonEndString() ~ " />"; }
|
|
}
|
|
|
|
/**
|
|
* Returns true if the Tag is a start tag
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* if (tag.isStart) { }
|
|
* --------------
|
|
*/
|
|
@property bool isStart() { return type == TagType.START; }
|
|
|
|
/**
|
|
* Returns true if the Tag is an end tag
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* if (tag.isEnd) { }
|
|
* --------------
|
|
*/
|
|
@property bool isEnd() { return type == TagType.END; }
|
|
|
|
/**
|
|
* Returns true if the Tag is an empty tag
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* if (tag.isEmpty) { }
|
|
* --------------
|
|
*/
|
|
@property bool isEmpty() { return type == TagType.EMPTY; }
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Class representing a comment
|
|
*/
|
|
class Comment : Item
|
|
{
|
|
private string content;
|
|
|
|
/**
|
|
* Construct a comment
|
|
*
|
|
* Params:
|
|
* content = the body of the comment
|
|
*
|
|
* Throws: CommentException if the comment body is illegal (contains "--"
|
|
* or exactly equals "-")
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* auto item = new Comment("This is a comment");
|
|
* // constructs <!--This is a comment-->
|
|
* --------------
|
|
*/
|
|
this(string content)
|
|
{
|
|
if (content == "-" || content.indexOf("==") != -1)
|
|
throw new CommentException(content);
|
|
this.content = content;
|
|
}
|
|
|
|
/**
|
|
* Compares two comments for equality
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* Comment item1,item2;
|
|
* if (item1 == item2) { }
|
|
* --------------
|
|
*/
|
|
override bool opEquals(Object o)
|
|
{
|
|
const item = toType!(const Item)(o);
|
|
const t = cast(Comment)item;
|
|
return t !is null && content == t.content;
|
|
}
|
|
|
|
/**
|
|
* Compares two comments
|
|
*
|
|
* You should rarely need to call this function. It exists so that Comments
|
|
* can be used as associative array keys.
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* Comment item1,item2;
|
|
* if (item1 < item2) { }
|
|
* --------------
|
|
*/
|
|
override int opCmp(Object o)
|
|
{
|
|
const item = toType!(const Item)(o);
|
|
const t = cast(Comment)item;
|
|
return t !is null && (content != t.content
|
|
? (content < t.content ? -1 : 1 ) : 0 );
|
|
}
|
|
|
|
/**
|
|
* Returns the hash of a Comment
|
|
*
|
|
* You should rarely need to call this function. It exists so that Comments
|
|
* can be used as associative array keys.
|
|
*/
|
|
override size_t toHash() const { return hash(content); }
|
|
|
|
/**
|
|
* Returns a string representation of this comment
|
|
*/
|
|
override string toString() const { return "<!--" ~ content ~ "-->"; }
|
|
|
|
override @property bool isEmptyXML() const { return false; } /// Returns false always
|
|
}
|
|
|
|
/**
|
|
* Class representing a Character Data section
|
|
*/
|
|
class CData : Item
|
|
{
|
|
private string content;
|
|
|
|
/**
|
|
* Construct a character data section
|
|
*
|
|
* Params:
|
|
* content = the body of the character data segment
|
|
*
|
|
* Throws: CDataException if the segment body is illegal (contains "]]>")
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* auto item = new CData("<b>hello</b>");
|
|
* // constructs <![CDATA[<b>hello</b>]]>
|
|
* --------------
|
|
*/
|
|
this(string content)
|
|
{
|
|
if (content.indexOf("]]>") != -1) throw new CDataException(content);
|
|
this.content = content;
|
|
}
|
|
|
|
/**
|
|
* Compares two CDatas for equality
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* CData item1,item2;
|
|
* if (item1 == item2) { }
|
|
* --------------
|
|
*/
|
|
override bool opEquals(Object o)
|
|
{
|
|
const item = toType!(const Item)(o);
|
|
const t = cast(CData)item;
|
|
return t !is null && content == t.content;
|
|
}
|
|
|
|
/**
|
|
* Compares two CDatas
|
|
*
|
|
* You should rarely need to call this function. It exists so that CDatas
|
|
* can be used as associative array keys.
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* CData item1,item2;
|
|
* if (item1 < item2) { }
|
|
* --------------
|
|
*/
|
|
override int opCmp(Object o)
|
|
{
|
|
const item = toType!(const Item)(o);
|
|
const t = cast(CData)item;
|
|
return t !is null && (content != t.content
|
|
? (content < t.content ? -1 : 1 ) : 0 );
|
|
}
|
|
|
|
/**
|
|
* Returns the hash of a CData
|
|
*
|
|
* You should rarely need to call this function. It exists so that CDatas
|
|
* can be used as associative array keys.
|
|
*/
|
|
override size_t toHash() const { return hash(content); }
|
|
|
|
/**
|
|
* Returns a string representation of this CData section
|
|
*/
|
|
override string toString() const { return cdata ~ content ~ "]]>"; }
|
|
|
|
override @property bool isEmptyXML() const { return false; } /// Returns false always
|
|
}
|
|
|
|
/**
|
|
* Class representing a text (aka Parsed Character Data) section
|
|
*/
|
|
class Text : Item
|
|
{
|
|
private string content;
|
|
|
|
/**
|
|
* Construct a text (aka PCData) section
|
|
*
|
|
* Params:
|
|
* content = the text. This function encodes the text before
|
|
* insertion, so it is safe to insert any text
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* auto Text = new CData("a < b");
|
|
* // constructs a < b
|
|
* --------------
|
|
*/
|
|
this(string content)
|
|
{
|
|
this.content = encode(content);
|
|
}
|
|
|
|
/**
|
|
* Compares two text sections for equality
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* Text item1,item2;
|
|
* if (item1 == item2) { }
|
|
* --------------
|
|
*/
|
|
override bool opEquals(Object o)
|
|
{
|
|
const item = toType!(const Item)(o);
|
|
const t = cast(Text)item;
|
|
return t !is null && content == t.content;
|
|
}
|
|
|
|
/**
|
|
* Compares two text sections
|
|
*
|
|
* You should rarely need to call this function. It exists so that Texts
|
|
* can be used as associative array keys.
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* Text item1,item2;
|
|
* if (item1 < item2) { }
|
|
* --------------
|
|
*/
|
|
override int opCmp(Object o)
|
|
{
|
|
const item = toType!(const Item)(o);
|
|
const t = cast(Text)item;
|
|
return t !is null
|
|
&& (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
|
|
}
|
|
|
|
/**
|
|
* Returns the hash of a text section
|
|
*
|
|
* You should rarely need to call this function. It exists so that Texts
|
|
* can be used as associative array keys.
|
|
*/
|
|
override size_t toHash() const { return hash(content); }
|
|
|
|
/**
|
|
* Returns a string representation of this Text section
|
|
*/
|
|
override string toString() const { return content; }
|
|
|
|
/**
|
|
* Returns true if the content is the empty string
|
|
*/
|
|
override @property bool isEmptyXML() const { return content.length == 0; }
|
|
}
|
|
|
|
/**
|
|
* Class representing an XML Instruction section
|
|
*/
|
|
class XMLInstruction : Item
|
|
{
|
|
private string content;
|
|
|
|
/**
|
|
* Construct an XML Instruction section
|
|
*
|
|
* Params:
|
|
* content = the body of the instruction segment
|
|
*
|
|
* Throws: XIException if the segment body is illegal (contains ">")
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* auto item = new XMLInstruction("ATTLIST");
|
|
* // constructs <!ATTLIST>
|
|
* --------------
|
|
*/
|
|
this(string content)
|
|
{
|
|
if (content.indexOf(">") != -1) throw new XIException(content);
|
|
this.content = content;
|
|
}
|
|
|
|
/**
|
|
* Compares two XML instructions for equality
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* XMLInstruction item1,item2;
|
|
* if (item1 == item2) { }
|
|
* --------------
|
|
*/
|
|
override bool opEquals(Object o)
|
|
{
|
|
const item = toType!(const Item)(o);
|
|
const t = cast(XMLInstruction)item;
|
|
return t !is null && content == t.content;
|
|
}
|
|
|
|
/**
|
|
* Compares two XML instructions
|
|
*
|
|
* You should rarely need to call this function. It exists so that
|
|
* XmlInstructions can be used as associative array keys.
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* XMLInstruction item1,item2;
|
|
* if (item1 < item2) { }
|
|
* --------------
|
|
*/
|
|
override int opCmp(Object o)
|
|
{
|
|
const item = toType!(const Item)(o);
|
|
const t = cast(XMLInstruction)item;
|
|
return t !is null
|
|
&& (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
|
|
}
|
|
|
|
/**
|
|
* Returns the hash of an XMLInstruction
|
|
*
|
|
* You should rarely need to call this function. It exists so that
|
|
* XmlInstructions can be used as associative array keys.
|
|
*/
|
|
override size_t toHash() const { return hash(content); }
|
|
|
|
/**
|
|
* Returns a string representation of this XmlInstruction
|
|
*/
|
|
override string toString() const { return "<!" ~ content ~ ">"; }
|
|
|
|
override @property bool isEmptyXML() const { return false; } /// Returns false always
|
|
}
|
|
|
|
/**
|
|
* Class representing a Processing Instruction section
|
|
*/
|
|
class ProcessingInstruction : Item
|
|
{
|
|
private string content;
|
|
|
|
/**
|
|
* Construct a Processing Instruction section
|
|
*
|
|
* Params:
|
|
* content = the body of the instruction segment
|
|
*
|
|
* Throws: PIException if the segment body is illegal (contains "?>")
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* auto item = new ProcessingInstruction("php");
|
|
* // constructs <?php?>
|
|
* --------------
|
|
*/
|
|
this(string content)
|
|
{
|
|
if (content.indexOf("?>") != -1) throw new PIException(content);
|
|
this.content = content;
|
|
}
|
|
|
|
/**
|
|
* Compares two processing instructions for equality
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* ProcessingInstruction item1,item2;
|
|
* if (item1 == item2) { }
|
|
* --------------
|
|
*/
|
|
override bool opEquals(Object o)
|
|
{
|
|
const item = toType!(const Item)(o);
|
|
const t = cast(ProcessingInstruction)item;
|
|
return t !is null && content == t.content;
|
|
}
|
|
|
|
/**
|
|
* Compares two processing instructions
|
|
*
|
|
* You should rarely need to call this function. It exists so that
|
|
* ProcessingInstructions can be used as associative array keys.
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* ProcessingInstruction item1,item2;
|
|
* if (item1 < item2) { }
|
|
* --------------
|
|
*/
|
|
override int opCmp(Object o)
|
|
{
|
|
const item = toType!(const Item)(o);
|
|
const t = cast(ProcessingInstruction)item;
|
|
return t !is null
|
|
&& (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
|
|
}
|
|
|
|
/**
|
|
* Returns the hash of a ProcessingInstruction
|
|
*
|
|
* You should rarely need to call this function. It exists so that
|
|
* ProcessingInstructions can be used as associative array keys.
|
|
*/
|
|
override size_t toHash() const { return hash(content); }
|
|
|
|
/**
|
|
* Returns a string representation of this ProcessingInstruction
|
|
*/
|
|
override string toString() const { return "<?" ~ content ~ "?>"; }
|
|
|
|
override @property bool isEmptyXML() const { return false; } /// Returns false always
|
|
}
|
|
|
|
/**
|
|
* Abstract base class for XML items
|
|
*/
|
|
abstract class Item
|
|
{
|
|
/// Compares with another Item of same type for equality
|
|
abstract override bool opEquals(Object o);
|
|
|
|
/// Compares with another Item of same type
|
|
abstract override int opCmp(Object o);
|
|
|
|
/// Returns the hash of this item
|
|
abstract override size_t toHash() const;
|
|
|
|
/// Returns a string representation of this item
|
|
abstract override string toString() const;
|
|
|
|
/**
|
|
* Returns an indented string representation of this item
|
|
*
|
|
* Params:
|
|
* indent = number of spaces by which to indent child elements
|
|
*/
|
|
string[] pretty(uint indent) const
|
|
{
|
|
string s = strip(toString());
|
|
return s.length == 0 ? [] : [ s ];
|
|
}
|
|
|
|
/// Returns true if the item represents empty XML text
|
|
abstract @property bool isEmptyXML() const;
|
|
}
|
|
|
|
/**
|
|
* Class for parsing an XML Document.
|
|
*
|
|
* This is a subclass of ElementParser. Most of the useful functions are
|
|
* documented there.
|
|
*
|
|
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
|
|
*
|
|
* Bugs:
|
|
* Currently only supports UTF documents.
|
|
*
|
|
* If there is an encoding attribute in the prolog, it is ignored.
|
|
*
|
|
*/
|
|
class DocumentParser : ElementParser
|
|
{
|
|
string xmlText;
|
|
|
|
/**
|
|
* Constructs a DocumentParser.
|
|
*
|
|
* The input to this function MUST be valid XML.
|
|
* This is enforced by the function's in contract.
|
|
*
|
|
* Params:
|
|
* xmlText_ = the entire XML document as text
|
|
*
|
|
*/
|
|
this(string xmlText_)
|
|
in
|
|
{
|
|
assert(xmlText_.length != 0);
|
|
try
|
|
{
|
|
// Confirm that the input is valid XML
|
|
check(xmlText_);
|
|
}
|
|
catch (CheckException e)
|
|
{
|
|
// And if it's not, tell the user why not
|
|
assert(false, "\n" ~ e.toString());
|
|
}
|
|
}
|
|
body
|
|
{
|
|
xmlText = xmlText_;
|
|
s = &xmlText;
|
|
super(); // Initialize everything
|
|
parse(); // Parse through the root tag (but not beyond)
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Class for parsing an XML element.
|
|
*
|
|
* Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
|
|
*
|
|
* Note that you cannot construct instances of this class directly. You can
|
|
* construct a DocumentParser (which is a subclass of ElementParser), but
|
|
* otherwise, Instances of ElementParser will be created for you by the
|
|
* library, and passed your way via onStartTag handlers.
|
|
*
|
|
*/
|
|
class ElementParser
|
|
{
|
|
alias Handler = void delegate(string);
|
|
alias ElementHandler = void delegate(in Element element);
|
|
alias ParserHandler = void delegate(ElementParser parser);
|
|
|
|
private
|
|
{
|
|
Tag tag_;
|
|
string elementStart;
|
|
string* s;
|
|
|
|
Handler commentHandler = null;
|
|
Handler cdataHandler = null;
|
|
Handler xiHandler = null;
|
|
Handler piHandler = null;
|
|
Handler rawTextHandler = null;
|
|
Handler textHandler = null;
|
|
|
|
// Private constructor for start tags
|
|
this(ElementParser parent)
|
|
{
|
|
s = parent.s;
|
|
this();
|
|
tag_ = parent.tag_;
|
|
}
|
|
|
|
// Private constructor for empty tags
|
|
this(Tag tag, string* t)
|
|
{
|
|
s = t;
|
|
this();
|
|
tag_ = tag;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* The Tag at the start of the element being parsed. You can read this to
|
|
* determine the tag's name and attributes.
|
|
*/
|
|
@property const(Tag) tag() const { return tag_; }
|
|
|
|
/**
|
|
* Register a handler which will be called whenever a start tag is
|
|
* encountered which matches the specified name. You can also pass null as
|
|
* the name, in which case the handler will be called for any unmatched
|
|
* start tag.
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* // Call this function whenever a <podcast> start tag is encountered
|
|
* onStartTag["podcast"] = (ElementParser xml)
|
|
* {
|
|
* // Your code here
|
|
* //
|
|
* // This is a a closure, so code here may reference
|
|
* // variables which are outside of this scope
|
|
* };
|
|
*
|
|
* // call myEpisodeStartHandler (defined elsewhere) whenever an <episode>
|
|
* // start tag is encountered
|
|
* onStartTag["episode"] = &myEpisodeStartHandler;
|
|
*
|
|
* // call delegate dg for all other start tags
|
|
* onStartTag[null] = dg;
|
|
* --------------
|
|
*
|
|
* This library will supply your function with a new instance of
|
|
* ElementHandler, which may be used to parse inside the element whose
|
|
* start tag was just found, or to identify the tag attributes of the
|
|
* element, etc.
|
|
*
|
|
* Note that your function will be called for both start tags and empty
|
|
* tags. That is, we make no distinction between <br></br>
|
|
* and <br/>.
|
|
*/
|
|
ParserHandler[string] onStartTag;
|
|
|
|
/**
|
|
* Register a handler which will be called whenever an end tag is
|
|
* encountered which matches the specified name. You can also pass null as
|
|
* the name, in which case the handler will be called for any unmatched
|
|
* end tag.
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* // Call this function whenever a </podcast> end tag is encountered
|
|
* onEndTag["podcast"] = (in Element e)
|
|
* {
|
|
* // Your code here
|
|
* //
|
|
* // This is a a closure, so code here may reference
|
|
* // variables which are outside of this scope
|
|
* };
|
|
*
|
|
* // call myEpisodeEndHandler (defined elsewhere) whenever an </episode>
|
|
* // end tag is encountered
|
|
* onEndTag["episode"] = &myEpisodeEndHandler;
|
|
*
|
|
* // call delegate dg for all other end tags
|
|
* onEndTag[null] = dg;
|
|
* --------------
|
|
*
|
|
* Note that your function will be called for both start tags and empty
|
|
* tags. That is, we make no distinction between <br></br>
|
|
* and <br/>.
|
|
*/
|
|
ElementHandler[string] onEndTag;
|
|
|
|
protected this()
|
|
{
|
|
elementStart = *s;
|
|
}
|
|
|
|
/**
|
|
* Register a handler which will be called whenever text is encountered.
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* // Call this function whenever text is encountered
|
|
* onText = (string s)
|
|
* {
|
|
* // Your code here
|
|
*
|
|
* // The passed parameter s will have been decoded by the time you see
|
|
* // it, and so may contain any character.
|
|
* //
|
|
* // This is a a closure, so code here may reference
|
|
* // variables which are outside of this scope
|
|
* };
|
|
* --------------
|
|
*/
|
|
@property void onText(Handler handler) { textHandler = handler; }
|
|
|
|
/**
|
|
* Register an alternative handler which will be called whenever text
|
|
* is encountered. This differs from onText in that onText will decode
|
|
* the text, whereas onTextRaw will not. This allows you to make design
|
|
* choices, since onText will be more accurate, but slower, while
|
|
* onTextRaw will be faster, but less accurate. Of course, you can
|
|
* still call decode() within your handler, if you want, but you'd
|
|
* probably want to use onTextRaw only in circumstances where you
|
|
* know that decoding is unnecessary.
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* // Call this function whenever text is encountered
|
|
* onText = (string s)
|
|
* {
|
|
* // Your code here
|
|
*
|
|
* // The passed parameter s will NOT have been decoded.
|
|
* //
|
|
* // This is a a closure, so code here may reference
|
|
* // variables which are outside of this scope
|
|
* };
|
|
* --------------
|
|
*/
|
|
void onTextRaw(Handler handler) { rawTextHandler = handler; }
|
|
|
|
/**
|
|
* Register a handler which will be called whenever a character data
|
|
* segment is encountered.
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* // Call this function whenever a CData section is encountered
|
|
* onCData = (string s)
|
|
* {
|
|
* // Your code here
|
|
*
|
|
* // The passed parameter s does not include the opening <![CDATA[
|
|
* // nor closing ]]>
|
|
* //
|
|
* // This is a a closure, so code here may reference
|
|
* // variables which are outside of this scope
|
|
* };
|
|
* --------------
|
|
*/
|
|
@property void onCData(Handler handler) { cdataHandler = handler; }
|
|
|
|
/**
|
|
* Register a handler which will be called whenever a comment is
|
|
* encountered.
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* // Call this function whenever a comment is encountered
|
|
* onComment = (string s)
|
|
* {
|
|
* // Your code here
|
|
*
|
|
* // The passed parameter s does not include the opening <!-- nor
|
|
* // closing -->
|
|
* //
|
|
* // This is a a closure, so code here may reference
|
|
* // variables which are outside of this scope
|
|
* };
|
|
* --------------
|
|
*/
|
|
@property void onComment(Handler handler) { commentHandler = handler; }
|
|
|
|
/**
|
|
* Register a handler which will be called whenever a processing
|
|
* instruction is encountered.
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* // Call this function whenever a processing instruction is encountered
|
|
* onPI = (string s)
|
|
* {
|
|
* // Your code here
|
|
*
|
|
* // The passed parameter s does not include the opening <? nor
|
|
* // closing ?>
|
|
* //
|
|
* // This is a a closure, so code here may reference
|
|
* // variables which are outside of this scope
|
|
* };
|
|
* --------------
|
|
*/
|
|
@property void onPI(Handler handler) { piHandler = handler; }
|
|
|
|
/**
|
|
* Register a handler which will be called whenever an XML instruction is
|
|
* encountered.
|
|
*
|
|
* Examples:
|
|
* --------------
|
|
* // Call this function whenever an XML instruction is encountered
|
|
* // (Note: XML instructions may only occur preceding the root tag of a
|
|
* // document).
|
|
* onPI = (string s)
|
|
* {
|
|
* // Your code here
|
|
*
|
|
* // The passed parameter s does not include the opening <! nor
|
|
* // closing >
|
|
* //
|
|
* // This is a a closure, so code here may reference
|
|
* // variables which are outside of this scope
|
|
* };
|
|
* --------------
|
|
*/
|
|
@property void onXI(Handler handler) { xiHandler = handler; }
|
|
|
|
/**
|
|
* Parse an XML element.
|
|
*
|
|
* Parsing will continue until the end of the current element. Any items
|
|
* encountered for which a handler has been registered will invoke that
|
|
* handler.
|
|
*
|
|
* Throws: various kinds of XMLException
|
|
*/
|
|
void parse()
|
|
{
|
|
string t;
|
|
Tag root = tag_;
|
|
Tag[string] startTags;
|
|
if (tag_ !is null) startTags[tag_.name] = tag_;
|
|
|
|
while(s.length != 0)
|
|
{
|
|
if (startsWith(*s,"<!--"))
|
|
{
|
|
chop(*s,4);
|
|
t = chop(*s,indexOf(*s,"-->"));
|
|
if (commentHandler.funcptr !is null) commentHandler(t);
|
|
chop(*s,3);
|
|
}
|
|
else if (startsWith(*s,"<![CDATA["))
|
|
{
|
|
chop(*s,9);
|
|
t = chop(*s,indexOf(*s,"]]>"));
|
|
if (cdataHandler.funcptr !is null) cdataHandler(t);
|
|
chop(*s,3);
|
|
}
|
|
else if (startsWith(*s,"<!"))
|
|
{
|
|
chop(*s,2);
|
|
t = chop(*s,indexOf(*s,">"));
|
|
if (xiHandler.funcptr !is null) xiHandler(t);
|
|
chop(*s,1);
|
|
}
|
|
else if (startsWith(*s,"<?"))
|
|
{
|
|
chop(*s,2);
|
|
t = chop(*s,indexOf(*s,"?>"));
|
|
if (piHandler.funcptr !is null) piHandler(t);
|
|
chop(*s,2);
|
|
}
|
|
else if (startsWith(*s,"<"))
|
|
{
|
|
tag_ = new Tag(*s,true);
|
|
if (root is null)
|
|
return; // Return to constructor of derived class
|
|
|
|
if (tag_.isStart)
|
|
{
|
|
startTags[tag_.name] = tag_;
|
|
|
|
auto parser = new ElementParser(this);
|
|
|
|
auto handler = tag_.name in onStartTag;
|
|
if (handler !is null) (*handler)(parser);
|
|
else
|
|
{
|
|
handler = null in onStartTag;
|
|
if (handler !is null) (*handler)(parser);
|
|
}
|
|
}
|
|
else if (tag_.isEnd)
|
|
{
|
|
auto startTag = startTags[tag_.name];
|
|
string text;
|
|
|
|
immutable(char)* p = startTag.tagString.ptr
|
|
+ startTag.tagString.length;
|
|
immutable(char)* q = tag_.tagString.ptr;
|
|
text = decode(p[0..(q-p)], DecodeMode.LOOSE);
|
|
|
|
auto element = new Element(startTag);
|
|
if (text.length != 0) element ~= new Text(text);
|
|
|
|
auto handler = tag_.name in onEndTag;
|
|
if (handler !is null) (*handler)(element);
|
|
else
|
|
{
|
|
handler = null in onEndTag;
|
|
if (handler !is null) (*handler)(element);
|
|
}
|
|
|
|
if (tag_.name == root.name) return;
|
|
}
|
|
else if (tag_.isEmpty)
|
|
{
|
|
Tag startTag = new Tag(tag_.name);
|
|
|
|
// FIX by hed010gy, for bug 2979
|
|
// http://d.puremagic.com/issues/show_bug.cgi?id=2979
|
|
if (tag_.attr.length > 0)
|
|
foreach(tn,tv; tag_.attr) startTag.attr[tn]=tv;
|
|
// END FIX
|
|
|
|
// Handle the pretend start tag
|
|
string s2;
|
|
auto parser = new ElementParser(startTag,&s2);
|
|
auto handler1 = startTag.name in onStartTag;
|
|
if (handler1 !is null) (*handler1)(parser);
|
|
else
|
|
{
|
|
handler1 = null in onStartTag;
|
|
if (handler1 !is null) (*handler1)(parser);
|
|
}
|
|
|
|
// Handle the pretend end tag
|
|
auto element = new Element(startTag);
|
|
auto handler2 = tag_.name in onEndTag;
|
|
if (handler2 !is null) (*handler2)(element);
|
|
else
|
|
{
|
|
handler2 = null in onEndTag;
|
|
if (handler2 !is null) (*handler2)(element);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
t = chop(*s,indexOf(*s,"<"));
|
|
if (rawTextHandler.funcptr !is null)
|
|
rawTextHandler(t);
|
|
else if (textHandler.funcptr !is null)
|
|
textHandler(decode(t,DecodeMode.LOOSE));
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Returns that part of the element which has already been parsed
|
|
*/
|
|
override string toString() const
|
|
{
|
|
assert(elementStart.length >= s.length);
|
|
return elementStart[0 .. elementStart.length - s.length];
|
|
}
|
|
|
|
}
|
|
|
|
private
|
|
{
|
|
template Check(string msg)
|
|
{
|
|
string old = s;
|
|
|
|
void fail()
|
|
{
|
|
s = old;
|
|
throw new Err(s,msg);
|
|
}
|
|
|
|
void fail(Err e)
|
|
{
|
|
s = old;
|
|
throw new Err(s,msg,e);
|
|
}
|
|
|
|
void fail(string msg2)
|
|
{
|
|
fail(new Err(s,msg2));
|
|
}
|
|
}
|
|
|
|
void checkMisc(ref string s) // rule 27
|
|
{
|
|
mixin Check!("Misc");
|
|
|
|
try
|
|
{
|
|
if (s.startsWith("<!--")) { checkComment(s); }
|
|
else if (s.startsWith("<?")) { checkPI(s); }
|
|
else { checkSpace(s); }
|
|
}
|
|
catch(Err e) { fail(e); }
|
|
}
|
|
|
|
void checkDocument(ref string s) // rule 1
|
|
{
|
|
mixin Check!("Document");
|
|
try
|
|
{
|
|
checkProlog(s);
|
|
checkElement(s);
|
|
star!(checkMisc)(s);
|
|
}
|
|
catch(Err e) { fail(e); }
|
|
}
|
|
|
|
void checkChars(ref string s) // rule 2
|
|
{
|
|
// TO DO - Fix std.utf stride and decode functions, then use those
|
|
// instead
|
|
|
|
mixin Check!("Chars");
|
|
|
|
dchar c;
|
|
int n = -1;
|
|
foreach(int i,dchar d; s)
|
|
{
|
|
if (!isChar(d))
|
|
{
|
|
c = d;
|
|
n = i;
|
|
break;
|
|
}
|
|
}
|
|
if (n != -1)
|
|
{
|
|
s = s[n..$];
|
|
fail(format("invalid character: U+%04X",c));
|
|
}
|
|
}
|
|
|
|
void checkSpace(ref string s) // rule 3
|
|
{
|
|
mixin Check!("Whitespace");
|
|
munch(s,"\u0020\u0009\u000A\u000D");
|
|
if (s is old) fail();
|
|
}
|
|
|
|
void checkName(ref string s, out string name) // rule 5
|
|
{
|
|
mixin Check!("Name");
|
|
|
|
if (s.length == 0) fail();
|
|
int n;
|
|
foreach(int i,dchar c;s)
|
|
{
|
|
if (c == '_' || c == ':' || isLetter(c)) continue;
|
|
if (i == 0) fail();
|
|
if (c == '-' || c == '.' || isDigit(c)
|
|
|| isCombiningChar(c) || isExtender(c)) continue;
|
|
n = i;
|
|
break;
|
|
}
|
|
name = s[0..n];
|
|
s = s[n..$];
|
|
}
|
|
|
|
void checkAttValue(ref string s) // rule 10
|
|
{
|
|
mixin Check!("AttValue");
|
|
|
|
if (s.length == 0) fail();
|
|
char c = s[0];
|
|
if (c != '\u0022' && c != '\u0027')
|
|
fail("attribute value requires quotes");
|
|
s = s[1..$];
|
|
for(;;)
|
|
{
|
|
munch(s,"^<&"~c);
|
|
if (s.length == 0) fail("unterminated attribute value");
|
|
if (s[0] == '<') fail("< found in attribute value");
|
|
if (s[0] == c) break;
|
|
try { checkReference(s); } catch(Err e) { fail(e); }
|
|
}
|
|
s = s[1..$];
|
|
}
|
|
|
|
void checkCharData(ref string s) // rule 14
|
|
{
|
|
mixin Check!("CharData");
|
|
|
|
while (s.length != 0)
|
|
{
|
|
if (s.startsWith("&")) break;
|
|
if (s.startsWith("<")) break;
|
|
if (s.startsWith("]]>")) fail("]]> found within char data");
|
|
s = s[1..$];
|
|
}
|
|
}
|
|
|
|
void checkComment(ref string s) // rule 15
|
|
{
|
|
mixin Check!("Comment");
|
|
|
|
try { checkLiteral("<!--",s); } catch(Err e) { fail(e); }
|
|
ptrdiff_t n = s.indexOf("--");
|
|
if (n == -1) fail("unterminated comment");
|
|
s = s[n..$];
|
|
try { checkLiteral("-->",s); } catch(Err e) { fail(e); }
|
|
}
|
|
|
|
void checkPI(ref string s) // rule 16
|
|
{
|
|
mixin Check!("PI");
|
|
|
|
try
|
|
{
|
|
checkLiteral("<?",s);
|
|
checkEnd("?>",s);
|
|
}
|
|
catch(Err e) { fail(e); }
|
|
}
|
|
|
|
void checkCDSect(ref string s) // rule 18
|
|
{
|
|
mixin Check!("CDSect");
|
|
|
|
try
|
|
{
|
|
checkLiteral(cdata,s);
|
|
checkEnd("]]>",s);
|
|
}
|
|
catch(Err e) { fail(e); }
|
|
}
|
|
|
|
void checkProlog(ref string s) // rule 22
|
|
{
|
|
mixin Check!("Prolog");
|
|
|
|
try
|
|
{
|
|
/* The XML declaration is optional
|
|
* http://www.w3.org/TR/2008/REC-xml-20081126/#NT-prolog
|
|
*/
|
|
opt!(checkXMLDecl)(s);
|
|
|
|
star!(checkMisc)(s);
|
|
opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s);
|
|
}
|
|
catch(Err e) { fail(e); }
|
|
}
|
|
|
|
void checkXMLDecl(ref string s) // rule 23
|
|
{
|
|
mixin Check!("XMLDecl");
|
|
|
|
try
|
|
{
|
|
checkLiteral("<?xml",s);
|
|
checkVersionInfo(s);
|
|
opt!(checkEncodingDecl)(s);
|
|
opt!(checkSDDecl)(s);
|
|
opt!(checkSpace)(s);
|
|
checkLiteral("?>",s);
|
|
}
|
|
catch(Err e) { fail(e); }
|
|
}
|
|
|
|
void checkVersionInfo(ref string s) // rule 24
|
|
{
|
|
mixin Check!("VersionInfo");
|
|
|
|
try
|
|
{
|
|
checkSpace(s);
|
|
checkLiteral("version",s);
|
|
checkEq(s);
|
|
quoted!(checkVersionNum)(s);
|
|
}
|
|
catch(Err e) { fail(e); }
|
|
}
|
|
|
|
void checkEq(ref string s) // rule 25
|
|
{
|
|
mixin Check!("Eq");
|
|
|
|
try
|
|
{
|
|
opt!(checkSpace)(s);
|
|
checkLiteral("=",s);
|
|
opt!(checkSpace)(s);
|
|
}
|
|
catch(Err e) { fail(e); }
|
|
}
|
|
|
|
void checkVersionNum(ref string s) // rule 26
|
|
{
|
|
mixin Check!("VersionNum");
|
|
|
|
munch(s,"a-zA-Z0-9_.:-");
|
|
if (s is old) fail();
|
|
}
|
|
|
|
void checkDocTypeDecl(ref string s) // rule 28
|
|
{
|
|
mixin Check!("DocTypeDecl");
|
|
|
|
try
|
|
{
|
|
checkLiteral("<!DOCTYPE",s);
|
|
//
|
|
// TO DO -- ensure DOCTYPE is well formed
|
|
// (But not yet. That's one of our "future directions")
|
|
//
|
|
checkEnd(">",s);
|
|
}
|
|
catch(Err e) { fail(e); }
|
|
}
|
|
|
|
void checkSDDecl(ref string s) // rule 32
|
|
{
|
|
mixin Check!("SDDecl");
|
|
|
|
try
|
|
{
|
|
checkSpace(s);
|
|
checkLiteral("standalone",s);
|
|
checkEq(s);
|
|
}
|
|
catch(Err e) { fail(e); }
|
|
|
|
int n = 0;
|
|
if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5;
|
|
else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4;
|
|
else fail("standalone attribute value must be 'yes', \"yes\","~
|
|
" 'no' or \"no\"");
|
|
s = s[n..$];
|
|
}
|
|
|
|
void checkElement(ref string s) // rule 39
|
|
{
|
|
mixin Check!("Element");
|
|
|
|
string sname,ename,t;
|
|
try { checkTag(s,t,sname); } catch(Err e) { fail(e); }
|
|
|
|
if (t == "STag")
|
|
{
|
|
try
|
|
{
|
|
checkContent(s);
|
|
t = s;
|
|
checkETag(s,ename);
|
|
}
|
|
catch(Err e) { fail(e); }
|
|
|
|
if (sname != ename)
|
|
{
|
|
s = t;
|
|
fail("end tag name \"" ~ ename
|
|
~ "\" differs from start tag name \""~sname~"\"");
|
|
}
|
|
}
|
|
}
|
|
|
|
// rules 40 and 44
|
|
void checkTag(ref string s, out string type, out string name)
|
|
{
|
|
mixin Check!("Tag");
|
|
|
|
try
|
|
{
|
|
type = "STag";
|
|
checkLiteral("<",s);
|
|
checkName(s,name);
|
|
star!(seq!(checkSpace,checkAttribute))(s);
|
|
opt!(checkSpace)(s);
|
|
if (s.length != 0 && s[0] == '/')
|
|
{
|
|
s = s[1..$];
|
|
type = "ETag";
|
|
}
|
|
checkLiteral(">",s);
|
|
}
|
|
catch(Err e) { fail(e); }
|
|
}
|
|
|
|
void checkAttribute(ref string s) // rule 41
|
|
{
|
|
mixin Check!("Attribute");
|
|
|
|
try
|
|
{
|
|
string name;
|
|
checkName(s,name);
|
|
checkEq(s);
|
|
checkAttValue(s);
|
|
}
|
|
catch(Err e) { fail(e); }
|
|
}
|
|
|
|
void checkETag(ref string s, out string name) // rule 42
|
|
{
|
|
mixin Check!("ETag");
|
|
|
|
try
|
|
{
|
|
checkLiteral("</",s);
|
|
checkName(s,name);
|
|
opt!(checkSpace)(s);
|
|
checkLiteral(">",s);
|
|
}
|
|
catch(Err e) { fail(e); }
|
|
}
|
|
|
|
void checkContent(ref string s) // rule 43
|
|
{
|
|
mixin Check!("Content");
|
|
|
|
try
|
|
{
|
|
while (s.length != 0)
|
|
{
|
|
old = s;
|
|
if (s.startsWith("&")) { checkReference(s); }
|
|
else if (s.startsWith("<!--")) { checkComment(s); }
|
|
else if (s.startsWith("<?")) { checkPI(s); }
|
|
else if (s.startsWith(cdata)) { checkCDSect(s); }
|
|
else if (s.startsWith("</")) { break; }
|
|
else if (s.startsWith("<")) { checkElement(s); }
|
|
else { checkCharData(s); }
|
|
}
|
|
}
|
|
catch(Err e) { fail(e); }
|
|
}
|
|
|
|
void checkCharRef(ref string s, out dchar c) // rule 66
|
|
{
|
|
mixin Check!("CharRef");
|
|
|
|
c = 0;
|
|
try { checkLiteral("&#",s); } catch(Err e) { fail(e); }
|
|
int radix = 10;
|
|
if (s.length != 0 && s[0] == 'x')
|
|
{
|
|
s = s[1..$];
|
|
radix = 16;
|
|
}
|
|
if (s.length == 0) fail("unterminated character reference");
|
|
if (s[0] == ';')
|
|
fail("character reference must have at least one digit");
|
|
while (s.length != 0)
|
|
{
|
|
char d = s[0];
|
|
int n = 0;
|
|
switch(d)
|
|
{
|
|
case 'F','f': ++n; goto case;
|
|
case 'E','e': ++n; goto case;
|
|
case 'D','d': ++n; goto case;
|
|
case 'C','c': ++n; goto case;
|
|
case 'B','b': ++n; goto case;
|
|
case 'A','a': ++n; goto case;
|
|
case '9': ++n; goto case;
|
|
case '8': ++n; goto case;
|
|
case '7': ++n; goto case;
|
|
case '6': ++n; goto case;
|
|
case '5': ++n; goto case;
|
|
case '4': ++n; goto case;
|
|
case '3': ++n; goto case;
|
|
case '2': ++n; goto case;
|
|
case '1': ++n; goto case;
|
|
case '0': break;
|
|
default: n = 100; break;
|
|
}
|
|
if (n >= radix) break;
|
|
c *= radix;
|
|
c += n;
|
|
s = s[1..$];
|
|
}
|
|
if (!isChar(c)) fail(format("U+%04X is not a legal character",c));
|
|
if (s.length == 0 || s[0] != ';') fail("expected ;");
|
|
else s = s[1..$];
|
|
}
|
|
|
|
void checkReference(ref string s) // rule 67
|
|
{
|
|
mixin Check!("Reference");
|
|
|
|
try
|
|
{
|
|
dchar c;
|
|
if (s.startsWith("&#")) checkCharRef(s,c);
|
|
else checkEntityRef(s);
|
|
}
|
|
catch(Err e) { fail(e); }
|
|
}
|
|
|
|
void checkEntityRef(ref string s) // rule 68
|
|
{
|
|
mixin Check!("EntityRef");
|
|
|
|
try
|
|
{
|
|
string name;
|
|
checkLiteral("&",s);
|
|
checkName(s,name);
|
|
checkLiteral(";",s);
|
|
}
|
|
catch(Err e) { fail(e); }
|
|
}
|
|
|
|
void checkEncName(ref string s) // rule 81
|
|
{
|
|
mixin Check!("EncName");
|
|
|
|
munch(s,"a-zA-Z");
|
|
if (s is old) fail();
|
|
munch(s,"a-zA-Z0-9_.-");
|
|
}
|
|
|
|
void checkEncodingDecl(ref string s) // rule 80
|
|
{
|
|
mixin Check!("EncodingDecl");
|
|
|
|
try
|
|
{
|
|
checkSpace(s);
|
|
checkLiteral("encoding",s);
|
|
checkEq(s);
|
|
quoted!(checkEncName)(s);
|
|
}
|
|
catch(Err e) { fail(e); }
|
|
}
|
|
|
|
// Helper functions
|
|
|
|
void checkLiteral(string literal,ref string s)
|
|
{
|
|
mixin Check!("Literal");
|
|
|
|
if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\"");
|
|
s = s[literal.length..$];
|
|
}
|
|
|
|
void checkEnd(string end,ref string s)
|
|
{
|
|
// Deliberately no mixin Check here.
|
|
|
|
auto n = s.indexOf(end);
|
|
if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\"");
|
|
s = s[n..$];
|
|
checkLiteral(end,s);
|
|
}
|
|
|
|
// Metafunctions -- none of these use mixin Check
|
|
|
|
void opt(alias f)(ref string s)
|
|
{
|
|
try { f(s); } catch(Err e) {}
|
|
}
|
|
|
|
void plus(alias f)(ref string s)
|
|
{
|
|
f(s);
|
|
star!(f)(s);
|
|
}
|
|
|
|
void star(alias f)(ref string s)
|
|
{
|
|
while (s.length != 0)
|
|
{
|
|
try { f(s); }
|
|
catch(Err e) { return; }
|
|
}
|
|
}
|
|
|
|
void quoted(alias f)(ref string s)
|
|
{
|
|
if (s.startsWith("'"))
|
|
{
|
|
checkLiteral("'",s);
|
|
f(s);
|
|
checkLiteral("'",s);
|
|
}
|
|
else
|
|
{
|
|
checkLiteral("\"",s);
|
|
f(s);
|
|
checkLiteral("\"",s);
|
|
}
|
|
}
|
|
|
|
void seq(alias f,alias g)(ref string s)
|
|
{
|
|
f(s);
|
|
g(s);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Check an entire XML document for well-formedness
|
|
*
|
|
* Params:
|
|
* s = the document to be checked, passed as a string
|
|
*
|
|
* Throws: CheckException if the document is not well formed
|
|
*
|
|
* CheckException's toString() method will yield the complete hierarchy of
|
|
* parse failure (the XML equivalent of a stack trace), giving the line and
|
|
* column number of every failure at every level.
|
|
*/
|
|
void check(string s)
|
|
{
|
|
try
|
|
{
|
|
checkChars(s);
|
|
checkDocument(s);
|
|
if (s.length != 0) throw new Err(s,"Junk found after document");
|
|
}
|
|
catch(Err e)
|
|
{
|
|
e.complete(s);
|
|
throw e;
|
|
}
|
|
}
|
|
|
|
unittest
|
|
{
|
|
version (none) // WHY ARE WE NOT RUNNING THIS UNIT TEST?
|
|
{
|
|
try
|
|
{
|
|
check(q"[<?xml version="1.0"?>
|
|
<catalog>
|
|
<book id="bk101">
|
|
<author>Gambardella, Matthew</author>
|
|
<title>XML Developer's Guide</title>
|
|
<genre>Computer</genre>
|
|
<price>44.95</price>
|
|
<publish_date>2000-10-01</publish_date>
|
|
<description>An in-depth look at creating applications
|
|
with XML.</description>
|
|
</book>
|
|
<book id="bk102">
|
|
<author>Ralls, Kim</author>
|
|
<title>Midnight Rain</title>
|
|
<genre>Fantasy</genres>
|
|
<price>5.95</price>
|
|
<publish_date>2000-12-16</publish_date>
|
|
<description>A former architect battles corporate zombies,
|
|
an evil sorceress, and her own childhood to become queen
|
|
of the world.</description>
|
|
</book>
|
|
<book id="bk103">
|
|
<author>Corets, Eva</author>
|
|
<title>Maeve Ascendant</title>
|
|
<genre>Fantasy</genre>
|
|
<price>5.95</price>
|
|
<publish_date>2000-11-17</publish_date>
|
|
<description>After the collapse of a nanotechnology
|
|
society in England, the young survivors lay the
|
|
foundation for a new society.</description>
|
|
</book>
|
|
</catalog>
|
|
]");
|
|
assert(false);
|
|
}
|
|
catch(CheckException e)
|
|
{
|
|
int n = e.toString().indexOf("end tag name \"genres\" differs"~
|
|
" from start tag name \"genre\"");
|
|
assert(n != -1);
|
|
}
|
|
}
|
|
}
|
|
|
|
unittest
|
|
{
|
|
string s = q"EOS
|
|
<?xml version="1.0"?>
|
|
<set>
|
|
<one>A</one>
|
|
<!-- comment -->
|
|
<two>B</two>
|
|
</set>
|
|
EOS";
|
|
try
|
|
{
|
|
check(s);
|
|
}
|
|
catch (CheckException e)
|
|
{
|
|
assert(0, e.toString());
|
|
}
|
|
}
|
|
|
|
unittest
|
|
{
|
|
string s = q"EOS
|
|
<?xml version="1.0" encoding="utf-8"?> <Tests>
|
|
<Test thing="What & Up">What & Up Second</Test>
|
|
</Tests>
|
|
EOS";
|
|
auto xml = new DocumentParser(s);
|
|
|
|
xml.onStartTag["Test"] = (ElementParser xml) {
|
|
assert(xml.tag.attr["thing"] == "What & Up");
|
|
};
|
|
|
|
xml.onEndTag["Test"] = (in Element e) {
|
|
assert(e.text() == "What & Up Second");
|
|
};
|
|
xml.parse();
|
|
}
|
|
|
|
unittest
|
|
{
|
|
string s = `<tag attr=""value>" />`;
|
|
auto doc = new Document(s);
|
|
assert(doc.toString() == s);
|
|
}
|
|
|
|
/** The base class for exceptions thrown by this module */
|
|
class XMLException : Exception { this(string msg) { super(msg); } }
|
|
|
|
// Other exceptions
|
|
|
|
/// Thrown during Comment constructor
|
|
class CommentException : XMLException
|
|
{ private this(string msg) { super(msg); } }
|
|
|
|
/// Thrown during CData constructor
|
|
class CDataException : XMLException
|
|
{ private this(string msg) { super(msg); } }
|
|
|
|
/// Thrown during XMLInstruction constructor
|
|
class XIException : XMLException
|
|
{ private this(string msg) { super(msg); } }
|
|
|
|
/// Thrown during ProcessingInstruction constructor
|
|
class PIException : XMLException
|
|
{ private this(string msg) { super(msg); } }
|
|
|
|
/// Thrown during Text constructor
|
|
class TextException : XMLException
|
|
{ private this(string msg) { super(msg); } }
|
|
|
|
/// Thrown during decode()
|
|
class DecodeException : XMLException
|
|
{ private this(string msg) { super(msg); } }
|
|
|
|
/// Thrown if comparing with wrong type
|
|
class InvalidTypeException : XMLException
|
|
{ private this(string msg) { super(msg); } }
|
|
|
|
/// Thrown when parsing for Tags
|
|
class TagException : XMLException
|
|
{ private this(string msg) { super(msg); } }
|
|
|
|
/**
|
|
* Thrown during check()
|
|
*/
|
|
class CheckException : XMLException
|
|
{
|
|
CheckException err; /// Parent in hierarchy
|
|
private string tail;
|
|
/**
|
|
* Name of production rule which failed to parse,
|
|
* or specific error message
|
|
*/
|
|
string msg;
|
|
size_t line = 0; /// Line number at which parse failure occurred
|
|
size_t column = 0; /// Column number at which parse failure occurred
|
|
|
|
private this(string tail,string msg,Err err=null)
|
|
{
|
|
super(null);
|
|
this.tail = tail;
|
|
this.msg = msg;
|
|
this.err = err;
|
|
}
|
|
|
|
private void complete(string entire)
|
|
{
|
|
string head = entire[0..$-tail.length];
|
|
ptrdiff_t n = head.lastIndexOf('\n') + 1;
|
|
line = head.count("\n") + 1;
|
|
dstring t;
|
|
transcode(head[n..$],t);
|
|
column = t.length + 1;
|
|
if (err !is null) err.complete(entire);
|
|
}
|
|
|
|
override string toString() const
|
|
{
|
|
string s;
|
|
if (line != 0) s = format("Line %d, column %d: ",line,column);
|
|
s ~= msg;
|
|
s ~= '\n';
|
|
if (err !is null) s = err.toString() ~ s;
|
|
return s;
|
|
}
|
|
}
|
|
|
|
private alias Err = CheckException;
|
|
|
|
// Private helper functions
|
|
|
|
private
|
|
{
|
|
T toType(T)(Object o)
|
|
{
|
|
T t = cast(T)(o);
|
|
if (t is null)
|
|
{
|
|
throw new InvalidTypeException("Attempt to compare a "
|
|
~ T.stringof ~ " with an instance of another type");
|
|
}
|
|
return t;
|
|
}
|
|
|
|
string chop(ref string s, size_t n)
|
|
{
|
|
if (n == -1) n = s.length;
|
|
string t = s[0..n];
|
|
s = s[n..$];
|
|
return t;
|
|
}
|
|
|
|
bool optc(ref string s, char c)
|
|
{
|
|
bool b = s.length != 0 && s[0] == c;
|
|
if (b) s = s[1..$];
|
|
return b;
|
|
}
|
|
|
|
void reqc(ref string s, char c)
|
|
{
|
|
if (s.length == 0 || s[0] != c) throw new TagException("");
|
|
s = s[1..$];
|
|
}
|
|
|
|
size_t hash(string s,size_t h=0) @trusted nothrow
|
|
{
|
|
return typeid(s).getHash(&s) + h;
|
|
}
|
|
|
|
// Definitions from the XML specification
|
|
immutable CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD,
|
|
0x10000,0x10FFFF];
|
|
immutable BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8,
|
|
0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A,
|
|
0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250,
|
|
0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E,
|
|
0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE,
|
|
0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451,
|
|
0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0,
|
|
0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561,
|
|
0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671,
|
|
0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5,
|
|
0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F,
|
|
0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC,
|
|
0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13,
|
|
0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59,
|
|
0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F,
|
|
0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD,
|
|
0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A,
|
|
0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F,
|
|
0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C,
|
|
0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7,
|
|
0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35,
|
|
0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA,
|
|
0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E,
|
|
0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30,
|
|
0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87,
|
|
0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1,
|
|
0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0,
|
|
0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49,
|
|
0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105,
|
|
0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E,
|
|
0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154,
|
|
0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167,
|
|
0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E,
|
|
0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA,
|
|
0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00,
|
|
0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48,
|
|
0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F,
|
|
0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6,
|
|
0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6,
|
|
0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041,
|
|
0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3];
|
|
immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5];
|
|
immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486,
|
|
0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2,
|
|
0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF,
|
|
0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C,
|
|
0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983,
|
|
0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8,
|
|
0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C,
|
|
0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D,
|
|
0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9,
|
|
0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48,
|
|
0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8,
|
|
0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48,
|
|
0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8,
|
|
0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48,
|
|
0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E,
|
|
0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19,
|
|
0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F,
|
|
0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD,
|
|
0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F,
|
|
0x3099,0x3099,0x309A,0x309A];
|
|
immutable DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966,
|
|
0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7,
|
|
0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0,
|
|
0x0ED9,0x0F20,0x0F29];
|
|
immutable ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387,
|
|
0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031,
|
|
0x3035,0x309D,0x309E,0x30FC,0x30FE];
|
|
|
|
bool lookup(const(int)[] table, int c)
|
|
{
|
|
while (table.length != 0)
|
|
{
|
|
auto m = (table.length >> 1) & ~1;
|
|
if (c < table[m])
|
|
{
|
|
table = table[0..m];
|
|
}
|
|
else if (c > table[m+1])
|
|
{
|
|
table = table[m+2..$];
|
|
}
|
|
else return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
string startOf(string s)
|
|
{
|
|
string r;
|
|
foreach(char c;s)
|
|
{
|
|
r ~= (c < 0x20 || c > 0x7F) ? '.' : c;
|
|
if (r.length >= 40) { r ~= "___"; break; }
|
|
}
|
|
return r;
|
|
}
|
|
|
|
void exit(string s=null)
|
|
{
|
|
throw new XMLException(s);
|
|
}
|
|
}
|
|
|