mirror of https://github.com/adamdruppe/arsd.git
291 lines
6.0 KiB
D
291 lines
6.0 KiB
D
/++
|
|
Some support for the Microsoft Excel Spreadsheet file format.
|
|
|
|
Don't expect much from it.
|
|
|
|
Some code is borrowed from the xlsxreader package.
|
|
|
|
History:
|
|
Added February 13, 2025
|
|
|
|
See_Also:
|
|
https://github.com/symmetryinvestments/xlsxd which supports writing xlsx files. I might add write support here too someday but I kinda doubt it.
|
|
+/
|
|
module arsd.xlsx;
|
|
|
|
// See also Robert's impl: https://github.com/symmetryinvestments/xlsxreader/blob/master/source/xlsxreader.d
|
|
|
|
import arsd.core;
|
|
import arsd.zip;
|
|
import arsd.dom;
|
|
import arsd.color;
|
|
|
|
import std.conv;
|
|
|
|
/+
|
|
struct XlsxCell {
|
|
string type;
|
|
string formula;
|
|
string value;
|
|
}
|
|
+/
|
|
|
|
struct CellReference {
|
|
string name;
|
|
|
|
static CellReference fromInts(int column, int row) {
|
|
string ret;
|
|
|
|
string piece;
|
|
do {
|
|
piece ~= cast(char)(column % 26 + 'A');
|
|
column /= 26;
|
|
} while(column);
|
|
|
|
foreach_reverse(ch; piece)
|
|
ret ~= ch;
|
|
piece = null;
|
|
|
|
do {
|
|
piece ~= cast(char)(row % 10 + '0');
|
|
row /= 10;
|
|
} while(row);
|
|
|
|
foreach_reverse(ch; piece)
|
|
ret ~= ch;
|
|
piece = null;
|
|
|
|
return CellReference(ret);
|
|
}
|
|
|
|
int toColumnIndex() {
|
|
int accumulator;
|
|
foreach(ch; name) {
|
|
if(ch < 'A' || ch > 'Z')
|
|
break;
|
|
accumulator *= 26;
|
|
accumulator += ch - 'A';
|
|
}
|
|
return accumulator;
|
|
}
|
|
|
|
int toRowIndex() {
|
|
int accumulator;
|
|
foreach(ch; name) {
|
|
if(ch >= 'A' && ch <= 'Z')
|
|
continue;
|
|
accumulator *= 10;
|
|
accumulator += ch - '0';
|
|
}
|
|
return accumulator;
|
|
}
|
|
}
|
|
|
|
/++
|
|
|
|
+/
|
|
class XlsxSheet {
|
|
private string name_;
|
|
private XlsxFile file;
|
|
private XmlDocument document;
|
|
private this(XlsxFile file, string name, XmlDocument document) {
|
|
this.file = file;
|
|
this.name_ = name;
|
|
this.document = document;
|
|
|
|
this.dimension = document.requireSelector("worksheet > dimension").getAttribute("ref");
|
|
// there's also sheetView with selection, activeCell, etc
|
|
// and cols with widths and such
|
|
|
|
auto ul = this.upperLeft;
|
|
this.minRow = ul.toRowIndex;
|
|
this.minColumn = ul.toColumnIndex;
|
|
|
|
auto lr = this.lowerRight;
|
|
this.maxRow = lr.toRowIndex + 1;
|
|
this.maxColumn = lr.toColumnIndex + 1;
|
|
}
|
|
|
|
private string dimension;
|
|
|
|
private int minRow;
|
|
private int minColumn;
|
|
private int maxRow;
|
|
private int maxColumn;
|
|
|
|
/++
|
|
+/
|
|
Size size() {
|
|
return Size(maxColumn - minColumn, maxRow - minRow);
|
|
}
|
|
|
|
private CellReference upperLeft() {
|
|
foreach(idx, ch; dimension)
|
|
if(ch == ':')
|
|
return CellReference(dimension[0 .. idx]);
|
|
assert(0);
|
|
}
|
|
|
|
private CellReference lowerRight() {
|
|
foreach(idx, ch; dimension)
|
|
if(ch == ':')
|
|
return CellReference(dimension[idx + 1 .. $]);
|
|
assert(0);
|
|
}
|
|
|
|
// opIndex could be like sheet["A1:B4"] and sheet["A1", "B4"] and stuff maybe.
|
|
|
|
/++
|
|
+/
|
|
string name() {
|
|
return name_;
|
|
}
|
|
|
|
/++
|
|
Suitable for passing to [arsd.csv.toCsv]
|
|
+/
|
|
string[][] toStringGrid() {
|
|
// FIXME: this crashes on opend dmd!
|
|
// string[][] ret = new string[][](size.height, size.width);
|
|
|
|
string[][] ret;
|
|
ret.length = size.height;
|
|
foreach(ref row; ret)
|
|
row.length = size.width;
|
|
|
|
//alloc done
|
|
|
|
foreach(int rowIdx, row; ret)
|
|
foreach(int cellIdx, ref cell; row) {
|
|
string cellReference = CellReference.fromInts(cellIdx + minColumn, rowIdx + minRow).name;
|
|
// FIXME: i should prolly read left to right here at least and not iterate the whole document over and over
|
|
auto element = document.querySelector("c[r=\""~cellReference~"\"]");
|
|
if(element is null)
|
|
continue;
|
|
string v = element.requireSelector("v").textContent;
|
|
if(element.attrs.t == "s")
|
|
v = file.sharedStrings[v.to!int()];
|
|
cell = v;
|
|
}
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
/++
|
|
|
|
+/
|
|
class XlsxFile {
|
|
private ZipFile zipFile;
|
|
|
|
/++
|
|
|
|
+/
|
|
this(FilePath file) {
|
|
this.zipFile = new ZipFile(file);
|
|
|
|
load();
|
|
}
|
|
|
|
/// ditto
|
|
this(immutable(ubyte)[] rawData) {
|
|
this.zipFile = new ZipFile(rawData);
|
|
|
|
load();
|
|
}
|
|
|
|
/++
|
|
+/
|
|
int sheetCount() {
|
|
return cast(int) sheetsInternal.length;
|
|
}
|
|
|
|
/++
|
|
+/
|
|
string[] sheetNames() {
|
|
string[] ret;
|
|
foreach(sheet; sheetsInternal)
|
|
ret ~= sheet.name;
|
|
return ret;
|
|
}
|
|
|
|
/++
|
|
+/
|
|
XlsxSheet getSheet(string name) {
|
|
foreach(ref sheet; sheetsInternal)
|
|
if(sheet.name == name)
|
|
return getSheetParsed(sheet);
|
|
return null;
|
|
|
|
}
|
|
|
|
/// ditto
|
|
XlsxSheet getSheet(int indexZeroBased) {
|
|
// FIXME: if it is out of range do what?
|
|
return getSheetParsed(sheetsInternal[indexZeroBased]);
|
|
}
|
|
|
|
// docProps/core.xml has creator, last modified, etc.
|
|
|
|
private string[string] contentTypes;
|
|
private struct Relationship {
|
|
string id;
|
|
string type;
|
|
string target;
|
|
}
|
|
private Relationship[string] relationships;
|
|
private string[] sharedStrings;
|
|
|
|
private struct SheetInternal {
|
|
string name;
|
|
string id;
|
|
string rel;
|
|
|
|
XmlDocument cached;
|
|
XlsxSheet parsed;
|
|
}
|
|
private SheetInternal[] sheetsInternal;
|
|
|
|
private XmlDocument getSheetXml(ref SheetInternal sheet) {
|
|
if(sheet.cached is null)
|
|
loadXml("xl/" ~ relationships[sheet.rel].target, (document) { sheet.cached = document; });
|
|
|
|
return sheet.cached;
|
|
}
|
|
|
|
private XlsxSheet getSheetParsed(ref SheetInternal sheet) {
|
|
if(sheet.parsed is null)
|
|
sheet.parsed = new XlsxSheet(this, sheet.name, getSheetXml(sheet));
|
|
|
|
return sheet.parsed;
|
|
}
|
|
|
|
|
|
private void load() {
|
|
loadXml("[Content_Types].xml", (document) {
|
|
foreach(element; document.querySelectorAll("Override"))
|
|
contentTypes[element.attrs.PartName] = element.attrs.ContentType;
|
|
});
|
|
|
|
loadXml("xl/_rels/workbook.xml.rels", (document) {
|
|
foreach(element; document.querySelectorAll("Relationship"))
|
|
relationships[element.attrs.Id] = Relationship(element.attrs.Id, element.attrs.Type, element.attrs.Target);
|
|
});
|
|
|
|
loadXml("xl/sharedStrings.xml", (document) {
|
|
foreach(element; document.querySelectorAll("si t"))
|
|
sharedStrings ~= element.textContent;
|
|
});
|
|
|
|
loadXml("xl/workbook.xml", (document) {
|
|
foreach(element; document.querySelectorAll("sheets > sheet")) {
|
|
sheetsInternal ~= SheetInternal(element.attrs.name, element.attrs.sheetId, element.getAttribute("r:id"));
|
|
}
|
|
});
|
|
}
|
|
|
|
private void loadXml(string filename, scope void delegate(XmlDocument document) handler) {
|
|
auto document = new XmlDocument(cast(string) zipFile.getContent(filename));
|
|
handler(document);
|
|
}
|
|
}
|