mirror of https://github.com/adamdruppe/arsd.git
67 lines
1.2 KiB
D
67 lines
1.2 KiB
D
/++
|
|
Bare minimum support for reading Microsoft Word files.
|
|
|
|
History:
|
|
Added February 19, 2025
|
|
+/
|
|
module arsd.docx;
|
|
|
|
import arsd.core;
|
|
import arsd.zip;
|
|
import arsd.dom;
|
|
import arsd.color;
|
|
|
|
/++
|
|
|
|
+/
|
|
class DocxFile {
|
|
private ZipFile zipFile;
|
|
private XmlDocument document;
|
|
|
|
/++
|
|
|
|
+/
|
|
this(FilePath file) {
|
|
this.zipFile = new ZipFile(file);
|
|
|
|
load();
|
|
}
|
|
|
|
/// ditto
|
|
this(immutable(ubyte)[] rawData) {
|
|
this.zipFile = new ZipFile(rawData);
|
|
|
|
load();
|
|
}
|
|
|
|
/++
|
|
Converts the document to a plain text string that gives you
|
|
the jist of the document that you can view in a plain editor.
|
|
|
|
Most formatting is stripped out.
|
|
+/
|
|
string toPlainText() {
|
|
string ret;
|
|
foreach(paragraph; document.querySelectorAll("w\\:p")) {
|
|
if(ret.length)
|
|
ret ~= "\n\n";
|
|
ret ~= paragraph.innerText;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
// FIXME: to RTF, markdown, html, and terminal sequences might also be useful.
|
|
|
|
private void load() {
|
|
loadXml("word/document.xml", (document) {
|
|
this.document = document;
|
|
});
|
|
}
|
|
|
|
private void loadXml(string filename, scope void delegate(XmlDocument document) handler) {
|
|
auto document = new XmlDocument(cast(string) zipFile.getContent(filename));
|
|
handler(document);
|
|
}
|
|
|
|
}
|