arsd/xlsx.d

1439 lines
36 KiB
D

/++
Some support for the Microsoft Excel Spreadsheet file format.
Don't expect much from it, not even API stability.
Some code is borrowed from the xlsxreader package.
History:
Added February 13, 2025
See_Also:
https://github.com/symmetryinvestments/xlsxd which supports writing xlsx files. I might add write support here too someday but I kinda doubt it.
+/
module arsd.xlsx;
/+
./csv-viewer ~/Downloads/UI_comparison.xlsx
arsd.dom.ElementNotFoundException@/home/me/program/lib/arsd/xlsx.d(823): Element of type 'Element' matching {worksheet > dimension} not found.
+/
/+
sheet at double[]:
nan payloads for blank, errors, then strings as indexes into a table.
+/
// FIXME: does excel save errors like DIV0 to content in the file?
// See also Robert's impl: https://github.com/symmetryinvestments/xlsxreader/blob/master/source/xlsxreader.d
import arsd.core;
import arsd.zip;
import arsd.dom;
import arsd.color;
import std.conv;
private struct ExcelFormatStringLexeme {
string lexeme;
bool isLiteral;
}
class ExcelFormatStringException : Exception {
this(string msg, string file = __FILE__, size_t line = __LINE__) {
super(msg, file, line);
}
}
// FIXME: out contract that asserts s_io.length has indeed been reduced
private ExcelFormatStringLexeme extractExcelFormatStringLexeme(ref string s_io) {
assert(s_io.length);
string s = s_io;
switch(s[0]) {
case '[':
// condition or color
// or elapsed time thing.
// or a locale setting thing for dates (and more?)
int count = 0;
int size = 0;
while(s[0]) {
if(s[0] == '[')
count++;
if(s[0] == ']')
count--;
s = s[1 .. $];
size++;
if(count == 0)
break;
if(s.length == 0)
throw new ExcelFormatStringException("unclosed [");
}
string ret = s_io[0 .. size];
s_io = s_io[size .. $];
return ExcelFormatStringLexeme(ret, false);
case '"':
// quoted thing watching for backslash
bool escaped;
int size;
size++;
s = s[1 .. $]; // skip the first "
string ret;
while(escaped || s[0] != '"') {
if(!escaped) {
if(s[0] == '"') {
break;
}
if(s[0] == '\\')
escaped = true;
else
ret ~= s[0];
} else {
ret ~= s[0];
escaped = false;
}
s = s[1 .. $];
size++;
}
if(s.length == 0)
throw new ExcelFormatStringException("unclosed \"");
size++;
s_io = s_io[size .. $];
return ExcelFormatStringLexeme(ret, true);
case '\\':
// escaped character
s = s[1 .. $]; // skip the \
s_io = s_io[1 .. $];
// FIXME: need real stride
auto stride = 1;
s_io = s_io[stride .. $];
return ExcelFormatStringLexeme(s[0 .. stride], true);
case '$', '+', '(', ':', '^', '\'', '{', '<', '=', '-', ')', '!', '&', '~', '}', '>', ' ': // they say slash but that seems to be fraction instead
// character literals w/o needing to be quoted
s_io = s_io[1 .. $];
return ExcelFormatStringLexeme(s[0 .. 1], true);
case 'A', 'a', 'P', 'p':
// am/pm
int size = 0;
while(
s[0] == 'a' || s[0] == 'A' ||
s[0] == 'p' || s[0] == 'P' ||
s[0] == 'm' || s[0] == 'M' ||
s[0] == '/'
) {
size++;
s = s[1 .. $];
if(s.length == 0)
break;
}
// also switches hour to 12 hour format when it happens
string ret = s_io[0 .. size];
s_io = s_io[size .. $];
return ExcelFormatStringLexeme(ret, false);
// the single char directives
case '@': // text placeholder
case ';': // clause separator
s_io = s_io[1 .. $];
return ExcelFormatStringLexeme(s[0 .. 1], false);
case '_': // padding char - this adds a space with the same width as the char that follows it, for column alignment.
case '*': // fill char
// the padding or fill is the next lexeme, not the next char!
s_io = s_io[1 .. $];
return ExcelFormatStringLexeme(s[0 .. 1], false);
case 'e', 'E': // scientific notation request
case '%': // percent indicator
case ',': // thousands separator
case '.': // decimal separator
case '/': // fraction or date separator
s_io = s_io[1 .. $];
return ExcelFormatStringLexeme(s[0 .. 1], false);
case /*'m',*/ 'd', 'y': // date parts
case 'h', 'm', 's': // time parts
/+
Note: The m or mm code must appear immediately after the h or hh code or immediately before the ss code; otherwise, Excel displays the month instead of minutes.
it can be either a date/time OR a number/fraction, not both.
+/
auto thing = s[0];
int size;
while(s.length && s[0] == thing) {
s = s[1 .. $];
size++;
}
auto keep = s_io[0 .. size];
s_io = s_io[size .. $];
return ExcelFormatStringLexeme(keep, false);
case '1': .. case '9': // fraction denominators or just literal numbers
int size;
while(s.length && s[0] >= '1' && s[0] <= '9') {
s = s[1 .. $];
size++;
}
auto keep = s_io[0 .. size];
s_io = s_io[size .. $];
return ExcelFormatStringLexeme(keep, false);
case '0', '#', '?': // digit placeholder
int size;
while(s[0] == '0' || s[0] == '#' || s[0] == '?') {
s = s[1 .. $];
size++;
if(s.length == 0)
break;
}
auto keep = s_io[0 .. size];
s_io = s_io[size .. $];
return ExcelFormatStringLexeme(keep, false);
default:
// idk
throw new ExcelFormatStringException("unknown char " ~ s);
}
assert(0);
}
unittest {
string thing = `[>50][Red]"foo"`;
ExcelFormatStringLexeme lexeme;
lexeme = extractExcelFormatStringLexeme(thing);
assert(thing == `[Red]"foo"`);
lexeme = extractExcelFormatStringLexeme(thing);
assert(thing == `"foo"`);
lexeme = extractExcelFormatStringLexeme(thing);
assert(thing == "");
assert(lexeme.lexeme == "foo");
thing = `"\""`;
lexeme = extractExcelFormatStringLexeme(thing);
assert(thing == "");
assert(lexeme.lexeme == `"`);
thing = `\,`;
lexeme = extractExcelFormatStringLexeme(thing);
assert(thing == "");
assert(lexeme.lexeme == `,`);
/*
thing = `"A\""`;
lexeme = extractExcelFormatStringLexeme(thing);
assert(thing == "");
assert(lexeme.lexeme == `"`);
*/
/+
thing = "mm-yyyy";
lexeme = extractExcelFormatStringLexeme(thing);
import std.stdio; writeln(thing); writeln(lexeme);
+/
}
struct XlsxFormat {
string originalFormatString;
Color foregroundColor;
Color backgroundColor;
int alignment; // 0 = left, 1 = right, 2 = center
enum Type {
/++
+/
String,
/++
+/
Number,
/++
A Date is a special kind of number in Excel.
+/
Date,
/++
things like # ?/4
+/
Fraction,
Percent
}
Type type;
/++
+/
static struct Result {
string content;
string color;
int alignment;
}
/++
+/
Result applyTo(string s) const {
if(this.type == Type.String || originalFormatString == "@" || originalFormatString.length == 0)
return Result(s, null, alignment);
int alignment = this.alignment;
// need to check for a text thing and if conversion fails, we use that
double value;
try {
value = to!double(s);
} catch(Exception e) {
value = double.nan;
}
DateTime date_;
bool dateCalculated;
DateTime getDate() {
// make sure value is not nan before here or it will throw "negative overflow"!
if(!dateCalculated) {
date_ = doubleToDateTime(value);
dateCalculated = true;
}
return date_;
}
// parse out the original format string
// the ordering by default is positive;negative;zero;text
//
// these can also be like [Color][Condition]fmt;generic
// color is allowed anywhere
// but condition can only have two things following: `[Color][Condition];` repeated any number of times then `;generic-number;text`. no more negative/zero stuff.
// once we see a condition, it switches modes - following things MUST have condition or else are treated as just generic catch all for number and then text.
//
// it matches linearly.
/+
so it goes:
implicit match >0
implicit match <0
implicit match =0
text
but if at any point one of them has a condition, the following ones must be either more conditions (immediately!) or unconditional:
fallthrough for number
text
and if i dont support a format thing i can always fall back to the original text.
+/
try {
string fmt = originalFormatString;
int state = 0; // 0 == positive, 1 == negative or custom, 2 == other, 3 == text
bool matchesCurrentCondition = value > 0;
bool hasMultipleClauses = false;
{
string fmt2 = fmt;
while(fmt2.length) {
auto next = extractExcelFormatStringLexeme(fmt2);
if(!next.isLiteral && next.lexeme == ";")
hasMultipleClauses = true;
break;
}
}
if(hasMultipleClauses == false)
matchesCurrentCondition = true; // only one thing means we must always match it
int numericState;
bool inDenominator;
bool inAmPm;
bool inDecimal;
bool justSawHours;
// these are populated below once we match a clause
bool hasAmPm;
bool hasFraction;
bool hasScientificNotation;
bool hasPercent;
bool first = true;
string color;
string ret;
while(fmt.length) {
auto lexeme = extractExcelFormatStringLexeme(fmt);
ExcelFormatStringLexeme peekLexeme(bool returnLiteral = false) {
string fmt2 = fmt;
skip:
if(fmt2.length == 0)
return ExcelFormatStringLexeme.init;
auto next = extractExcelFormatStringLexeme(fmt2);
if(next.isLiteral && !returnLiteral)
goto skip;
return next;
}
if(!lexeme.isLiteral && lexeme.lexeme[0] == ';') {
// we finished the format of the match, so no need to continue
if(matchesCurrentCondition)
break;
// otherwise, we go to the next thing
state++;
if(state == 1) {
matchesCurrentCondition = value < 0;
} else if(state == 2) {
// this is going to be either the catch-all fallback or another custom one
// for now, assume it is a catch-all
import std.math;
matchesCurrentCondition = !isNaN(value) ? true : false; // only numbers, so not text, matches the catch-all
} else if(state == 3) {
matchesCurrentCondition = true; // this needs to match, we're at the end, so this is the text display
} else {
throw new ExcelFormatStringException("too many ; pieces");
}
continue;
}
if(!matchesCurrentCondition)
continue;
// scan ahead to see if we're doing some special cases: fractions, 12 hour clock, percentages, and sci notation
if(first) {
string fmt2 = fmt;
while(fmt2.length) {
auto next = extractExcelFormatStringLexeme(fmt2);
if(!next.isLiteral) {
// don't proceed into the next clause
if(next.lexeme == ";")
break;
char c = next.lexeme[0] | 0x20;
if(next.lexeme == "/")
hasFraction = true;
else if(next.lexeme == "%") {
hasPercent = true;
value *= 100.0;
} else if(c == 'e')
hasScientificNotation = true;
else if(c == 'a' || c == 'p')
hasAmPm = true;
}
}
first = false;
}
if(hasScientificNotation)
return Result(s, "unsupported feature: scientific notation"); // FIXME
if(hasFraction)
return Result(s, "unsupported feature: fractions"); // FIXME
if(!lexeme.isLiteral && lexeme.lexeme[0] == '[') {
// look for color, condition, or locale
char nc = lexeme.lexeme[1];
if(nc == '$')
continue; // locale i think, skip it
if(nc == '<' || nc == '>' || nc == '=') {
// condition
if(state == 1 || state == 2) {
state = 1;
// read the condition, see if we match it
auto condition = lexeme.lexeme[1 .. $-1];
string operator;
string num;
if(condition[1] == '=') {
operator = condition[0 .. 2];
num = condition[2 .. $];
} else {
operator = condition[0 .. 1];
num = condition[1 .. $];
}
double compareTo;
try {
compareTo = to!double(num);
} catch(Exception e) {
throw new ExcelFormatStringException("not a number: " ~ num);
}
switch(operator) {
case "<":
matchesCurrentCondition = value < compareTo;
break;
case "<=":
matchesCurrentCondition = value <= compareTo;
break;
case ">":
matchesCurrentCondition = value > compareTo;
break;
case ">=":
matchesCurrentCondition = value >= compareTo;
break;
case "=":
// FIXME: approxEqual?
matchesCurrentCondition = value == compareTo;
break;
default:
throw new ExcelFormatStringException("not a supported comparison operator " ~ operator);
}
continue;
} else {
throw new ExcelFormatStringException("inappropriately placed custom condition");
}
} else {
// color, we hope. FIXME can also be [s], [m], or [h] or maybe [ss], [mm], [hh]
// colors are capitalized...
color = lexeme.lexeme[1 .. $-1];
continue;
}
}
// if we're here, it should actually match and need some processing.
if(lexeme.isLiteral) {
// literals are easy...
ret ~= lexeme.lexeme;
} else {
// but the rest of these are formatting commands
switch(lexeme.lexeme[0]) {
case ',':
// thousands separator requested,
// handled below in the decimal placeholder thing
break;
case '_', '*':
auto lexemeToPadWith = extractExcelFormatStringLexeme(fmt);
if(lexeme.lexeme[0] == '_')
ret ~= " "; // FIXME supposed to match width of the char
else if(lexeme.lexeme[0] == '*')
ret ~= lexemeToPadWith.lexeme; // FIXME: supposed to repeat to fill the column width
break;
case '@': // the original text
ret ~= s;
break;
case '%':
ret ~= lexeme.lexeme;
break;
case '.':
inDecimal = true;
ret ~= lexeme.lexeme;
break;
case '/':
if(!inAmPm) {
inDenominator = true;
ret ~= lexeme.lexeme;
}
break;
case '#', '0', '?':
// decimal group
// # = digit
// 0 = digit, pad with 0 if not significant
// ? = digit, pad with space (same sized as digit) if not significant
if(value is double.nan)
return Result(s, "NaN");
alignment = 1; // if we are printing numbers let's assume right align FIXME
/+
if(s.length == 0 && value is double.nan) // and if we printing numbers, treat empty cell as 0
value = 0.0;
+/
bool appendNumber(double v, bool includeThousandsSeparator) {
if(v < 0)
v = -v;
string f = to!string(cast(int) v);
if(f.length < lexeme.lexeme.length)
foreach(l; lexeme.lexeme[0 .. $ - f.length]) {
if(l == '0')
ret ~= '0';
else if(l == '?')
ret ~= ' ';
}
if(f.length) {
if(includeThousandsSeparator) {
// 14532
// 1234
// 123
auto offset = cast(int) f.length % 3;
while(f.length > 3) {
ret ~= f[offset .. offset + 3];
ret ~= ",";
f = f[3 .. $];
}
ret ~= f;
} else {
ret ~= f;
}
return true;
}
return false;
}
if(peekLexeme().lexeme == ",") {
// thousands separator requested...
auto v = cast(int) value / 1000;
if(v == 0)
continue; // FIXME? maybe we want some leading 0 padding?
auto hadOutput = appendNumber(v, true);
value = value - v * 1000; // take the remainder for the next iteration of the loop
if(hadOutput)
ret ~= ","; // append the comma before the final thousands digits in the next iteration
continue;
}
if(inDecimal) {
// FIXME: no more std.format
import std.format;
string f = format("%."~to!string(lexeme.lexeme.length)~"f", value - cast(int) value)[2..$]; // slice off the "0."
ret ~= f;
} else {
appendNumber(value, false);
}
inDenominator = false;
break;
case '1': .. case '9':
// number, if in denominator position
// otherwise treat as string
if(inDenominator)
inDenominator = false; // the rest is handled elsewhere
else
ret ~= lexeme.lexeme;
break;
case 'y':
if(value is double.nan)
return Result(s, "NaN date");
justSawHours = false;
auto y = getDate().year;
char[16] buffer;
switch(lexeme.lexeme.length) {
case 2:
ret ~= intToString(y % 100, buffer[], IntToStringArgs().withPadding(2));
break;
case 4:
ret ~= intToString(y, buffer[], IntToStringArgs().withPadding(4));
break;
default:
throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme);
}
break;
case 'm':
if(value is double.nan)
return Result(s, "NaN date");
auto peek = peekLexeme(false);
bool precedesSeconds =
(peek.lexeme.length && peek.lexeme[0] == 's')
||
(peek.lexeme.length > 1 && peek.lexeme[1] == 's')
;
if(justSawHours || precedesSeconds) {
// minutes
auto m = getDate().timeOfDay.minute;
char[16] buffer;
switch(lexeme.lexeme.length) {
case 1:
ret ~= intToString(m, buffer[]);
break;
case 2:
ret ~= intToString(m, buffer[], IntToStringArgs().withPadding(2));
break;
default:
throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme);
}
} else {
// month
auto m = cast(int) getDate().month;
char[16] buffer;
import arsd.calendar;
switch(lexeme.lexeme.length) {
case 1:
ret ~= intToString(m, buffer[]);
break;
case 2:
ret ~= intToString(m, buffer[], IntToStringArgs().withPadding(2));
break;
case 3: // abbreviation
ret ~= monthNames[m][0 .. 3];
break;
case 4: // full name
ret ~= monthNames[m];
break;
case 5: // single letter
ret ~= monthNames[m][0 .. 1]; // FIXME?
break;
default:
throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme);
}
}
justSawHours = false;
break;
case 'd':
if(value is double.nan)
return Result(s, "NaN date");
justSawHours = false;
char[16] buffer;
import arsd.calendar;
auto d = getDate().day;
auto dow = cast(int) getDate().dayOfWeek;
switch(lexeme.lexeme.length) {
case 1:
ret ~= intToString(d, buffer[]);
break;
case 2:
ret ~= intToString(d, buffer[], IntToStringArgs().withPadding(2));
break;
case 3:
// abbreviation
ret ~= daysOfWeekNames[dow][0 .. 3];
break;
case 4:
// full name
ret ~= daysOfWeekNames[dow];
break;
default:
throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme);
}
break;
case 'h':
if(value is double.nan)
return Result(s, "NaN date");
justSawHours = true;
auto m = getDate().timeOfDay.hour;
char[16] buffer;
if(hasAmPm && m > 12)
m -= 12;
if(hasAmPm && m == 0)
m = 12;
switch(lexeme.lexeme.length) {
case 1:
ret ~= intToString(m, buffer[]);
break;
case 2:
ret ~= intToString(m, buffer[], IntToStringArgs().withPadding(2));
break;
default:
throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme);
}
break;
case 'a', 'A':
if(value is double.nan)
return Result(s, "NaN date");
inAmPm = true;
auto m = getDate().timeOfDay.hour;
if(m >= 12)
ret ~= lexeme.lexeme[0] == 'a' ? "pm" : "PM";
else
ret ~= lexeme.lexeme[0] == 'a' ? "am" : "AM";
break;
case 'p', 'P':
inAmPm = false;
break;
case 's':
if(value is double.nan)
return Result(s, "NaN date");
auto m = getDate().timeOfDay.second;
char[16] buffer;
switch(lexeme.lexeme.length) {
case 1:
ret ~= intToString(m, buffer[]);
break;
case 2:
ret ~= intToString(m, buffer[], IntToStringArgs().withPadding(2));
break;
default:
throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme);
}
break;
case 'e', 'E':
// FIXME: scientific notation
break;
default:
assert(0, "unsupported formatting command: " ~ lexeme.lexeme);
}
}
}
return Result(ret, color, alignment);
} catch(ExcelFormatStringException e) {
// we'll fall back to just displaying the original input text
return Result(s, e.msg /* FIXME should be null */, alignment);
}
}
/+
positive;negative;zero;text
can include formats and dates and tons of stuff.
https://support.microsoft.com/en-us/office/review-guidelines-for-customizing-a-number-format-c0a1d1fa-d3f4-4018-96b7-9c9354dd99f5
+/
private this(XlsxFile file, XlsxFile.StyleInternal.xf formatting) {
if(formatting.applyNumberFormat) {
// dates too depending on format
//import std.stdio; writeln(formatting.numFmtId); writeln(file.styleInternal.numFmts);
this.originalFormatString = file.styleInternal.numFmts[formatting.numFmtId];
this.type = Type.Number;
} else {
this.type = Type.String;
}
/+
xf also has:
int xfId;
int numFmtId;
int fontId;
int fillId;
int borderId;
+/
}
private this(string f) {
this.originalFormatString = f;
this.type = Type.Number;
}
}
unittest {
assert(XlsxFormat(`;;;"foo"`).applyTo("anything") == XlsxFormat.Result("foo", null));
assert(XlsxFormat(`#.#;;;"foo"`).applyTo("2.0") == XlsxFormat.Result("2.0", null, 1));
assert(XlsxFormat(`0#.##;;;"foo"`).applyTo("24.25") == XlsxFormat.Result("24.25", null, 1));
assert(XlsxFormat(`0#.##;;;"foo"`).applyTo("2.25") == XlsxFormat.Result("02.25", null, 1));
assert(XlsxFormat(`#,#.##`).applyTo("2.25") == XlsxFormat.Result("2.25", null, 1));
assert(XlsxFormat(`#,#.##`).applyTo("123.25") == XlsxFormat.Result("123.25", null, 1));
assert(XlsxFormat(`#,#.##`).applyTo("1234.25") == XlsxFormat.Result("1,234.25", null, 1));
assert(XlsxFormat(`#,#.##`).applyTo("123456.25") == XlsxFormat.Result("123,456.25", null, 1));
}
struct XlsxCell {
string formula;
string content;
XlsxFormat formatting;
XlsxFormat.Result displayableResult() {
return formatting.applyTo(content);
}
string toString() {
return displayableResult().content;
}
}
struct CellReference {
string name;
static CellReference fromInts(int column, int row) {
string ret;
string piece;
int adjustment = 0;
do {
piece ~= cast(char)(column % 26 + 'A' - adjustment);
if(adjustment == 0)
adjustment = 1;
column /= 26;
} while(column);
foreach_reverse(ch; piece)
ret ~= ch;
piece = null;
do {
piece ~= cast(char)(row % 10 + '0');
row /= 10;
} while(row);
foreach_reverse(ch; piece)
ret ~= ch;
piece = null;
return CellReference(ret);
}
int toColumnIndex() {
size_t endSlice = name.length;
foreach(idx, ch; name) {
if(ch < 'A' || ch > 'Z') {
endSlice = idx;
break;
}
}
int accumulator;
foreach(idx, ch; name[0 .. endSlice]) {
int value;
if(idx + 1 == endSlice) {
// an A in the last "digit" is a 0, elsewhere it is a 1
value = ch - 'A';
} else {
value = ch - 'A' + 1;
}
accumulator *= 26;
accumulator += value;
}
return accumulator;
}
int toRowIndex() {
int accumulator;
foreach(ch; name) {
if(ch >= 'A' && ch <= 'Z')
continue;
accumulator *= 10;
accumulator += ch - '0';
}
return accumulator;
}
}
unittest {
auto cr = CellReference("AE434");
assert(cr.toColumnIndex == 30);
cr = CellReference("E434");
assert(cr.toColumnIndex == 4); // zero-based
// zero-based column, 1-based row. wtf?
assert(CellReference("AE434") == CellReference.fromInts(30, 434));
assert(CellReference("Z1") == CellReference.fromInts(25, 1));
}
/++
+/
class XlsxSheet {
private string name_;
private XlsxFile file;
private XmlDocument document;
private this(XlsxFile file, string name, XmlDocument document) {
this.file = file;
this.name_ = name;
this.document = document;
this.dimension = document.requireSelector("worksheet > dimension").getAttribute("ref");
// there's also sheetView with selection, activeCell, etc
// and cols with widths and such
auto ul = this.upperLeft;
this.minRow = ul.toRowIndex;
this.minColumn = ul.toColumnIndex;
auto lr = this.lowerRight;
this.maxRow = lr.toRowIndex + 1;
this.maxColumn = lr.toColumnIndex + 1;
}
private string dimension;
private int minRow;
private int minColumn;
private int maxRow;
private int maxColumn;
/++
+/
Size size() {
return Size(maxColumn - minColumn, maxRow - minRow);
}
private CellReference upperLeft() {
foreach(idx, ch; dimension)
if(ch == ':')
return CellReference(dimension[0 .. idx]);
//assert(0); // it has no lower right...
return CellReference(dimension);
}
private CellReference lowerRight() {
foreach(idx, ch; dimension)
if(ch == ':')
return CellReference(dimension[idx + 1 .. $]);
assert(0);
}
// opIndex could be like sheet["A1:B4"] and sheet["A1", "B4"] and stuff maybe.
/++
+/
string name() {
return name_;
}
/++
Suitable for passing to [arsd.csv.toCsv]
+/
string[][] toStringGrid() {
auto grid = this.toGrid();
string[][] ret;
ret.length = size.height;
foreach(i, ref row; ret) {
row.length = size.width;
foreach(k, ref cell; row)
cell = grid[i][k].toString();
}
return ret;
}
/++
+/
XlsxCell[][] toGrid() {
// FIXME: this crashes on opend dmd!
// string[][] ret = new string[][](size.height, size.width);
/+
// almost everything we allocate in here is to keep, so
// turning off the GC while working prevents unnecessary
// collection attempts that won't find any garbage anyway.
// but meh no significant difference in perf anyway.
import core.memory;
GC.disable();
scope(exit)
GC.enable();
+/
XlsxCell[][] ret;
ret.length = size.height;
foreach(ref row; ret)
row.length = size.width;
//alloc done
auto sheetData = document.requireSelector("sheetData");
Element[] rowElements = sheetData.childNodes;
Element[] nextRow(int expected) {
if(rowElements.length == 0)
throw new Exception("ran out of row elements...");
Element rowElement;
Element[] before = rowElements;
do {
rowElement = rowElements[0];
rowElements = rowElements[1 .. $];
} while(rowElement.tagName != "row");
if(rowElement.attrs.r.to!int != expected) {
// a row was skipped in the file, so we'll
// return an empty placeholder too
rowElements = before;
return null;
}
return rowElement.childNodes;
}
foreach(int rowIdx, row; ret) {
auto cellElements = nextRow(rowIdx + 1);
foreach(int cellIdx, ref cell; row) {
string cellReference = CellReference.fromInts(cellIdx + minColumn, rowIdx + minRow).name;
Element element = null;
foreach(idx, thing; cellElements) {
if(thing.attrs.r == cellReference) {
element = thing;
cellElements = cellElements[idx + 1 .. $];
break;
}
}
if(element is null)
continue;
string v = element.optionSelector("v").textContent;
if(element.attrs.t == "s")
v = file.sharedStrings[v.to!int()];
auto sString = element.attrs.s;
auto sId = sString.length ? to!int(sString) : 0;
string f = element.optionSelector("f").textContent;
cell = XlsxCell(f, v, XlsxFormat(file, file.styleInternal.xfs[sId]));
}
}
return ret;
}
}
/++
+/
class XlsxFile {
private ZipFile zipFile;
/++
+/
this(FilePath file) {
this.zipFile = new ZipFile(file);
load();
}
/// ditto
this(immutable(ubyte)[] rawData) {
this.zipFile = new ZipFile(rawData);
load();
}
/++
+/
int sheetCount() {
return cast(int) sheetsInternal.length;
}
/++
+/
string[] sheetNames() {
string[] ret;
foreach(sheet; sheetsInternal)
ret ~= sheet.name;
return ret;
}
/++
+/
XlsxSheet getSheet(string name) {
foreach(ref sheet; sheetsInternal)
if(sheet.name == name)
return getSheetParsed(sheet);
return null;
}
/// ditto
XlsxSheet getSheet(int indexZeroBased) {
// FIXME: if it is out of range do what?
return getSheetParsed(sheetsInternal[indexZeroBased]);
}
// docProps/core.xml has creator, last modified, etc.
private string[string] contentTypes;
private struct Relationship {
string id;
string type;
string target;
}
private Relationship[string] relationships;
private string[] sharedStrings;
private struct SheetInternal {
string name;
string id;
string rel;
XmlDocument cached;
XlsxSheet parsed;
}
private SheetInternal[] sheetsInternal;
// https://stackoverflow.com/questions/3154646/what-does-the-s-attribute-signify-in-a-cell-tag-in-xlsx
private struct StyleInternal {
string[int] numFmts;
// fonts
// font references color theme from xl/themes
// fills
// borders
// cellStyleXfs
// cellXfs
struct xf {
int xfId;
int numFmtId;
int fontId;
int fillId;
int borderId;
bool applyNumberFormat; // if yes, you get default right alignment
}
xf[] xfs;
// cellStyles
// dxfs
// tableStyles
}
private StyleInternal styleInternal;
private XmlDocument getSheetXml(ref SheetInternal sheet) {
if(sheet.cached is null)
loadXml("xl/" ~ relationships[sheet.rel].target, (document) { sheet.cached = document; });
return sheet.cached;
}
private XlsxSheet getSheetParsed(ref SheetInternal sheet) {
if(sheet.parsed is null)
sheet.parsed = new XlsxSheet(this, sheet.name, getSheetXml(sheet));
return sheet.parsed;
}
private void load() {
loadXml("[Content_Types].xml", (document) {
foreach(element; document.querySelectorAll("Override"))
contentTypes[element.attrs.PartName] = element.attrs.ContentType;
});
loadXml("xl/_rels/workbook.xml.rels", (document) {
foreach(element; document.querySelectorAll("Relationship"))
relationships[element.attrs.Id] = Relationship(element.attrs.Id, element.attrs.Type, element.attrs.Target);
});
loadXml("xl/sharedStrings.xml", (document) {
foreach(element; document.querySelectorAll("si t"))
sharedStrings ~= element.textContent;
});
loadXml("xl/styles.xml", (document) {
// need to keep the generic hardcoded formats first
styleInternal.numFmts = [
0: "@",
1: "0",
2: "0.00",
3: "#,##0",
4: "#,##0.00",
5: "$#,##0_);($#,##0)",
6: "$#,##0_);[Red]($#,##0)",
7: "$#,##0.00_);($#,##0.00)",
8: "$#,##0.00_);[Red]($#,##0.00)",
9: "0%",
10: "0.00%",
11: "0.00E+00",
12: "# ?/?",
13: "# ??/??",
14: "m/d/yyyy", // ive heard this one does different things in different locales
15: "d-mmm-yy",
16: "d-mmm",
17: "mmm-yy",
18: "h:mm AM/PM",
19: "h:mm:ss AM/PM",
20: "h:mm",
21: "h:mm:ss",
22: "m/d/yyyy h:mm",
37: "#,##0_);(#,##0)",
38: "#,##0_);[Red](#,##0)",
39: "#,##0.00_);(#,##0.00)",
40: "#,##0.00_);[Red](#,##0.00)",
45: "mm:ss",
46: "[h]:mm:ss",
47: "mm:ss.0",
48: "##0.0E+0",
49: "@",
];
foreach(element; document.querySelectorAll("numFmts > numFmt")) {
styleInternal.numFmts[to!int(element.attrs.numFmtId)] = element.attrs.formatCode;
}
foreach(element; document.querySelectorAll("cellXfs > xf")) {
StyleInternal.xf xf;
xf.xfId = element.attrs.xfId.to!int;
xf.fontId = element.attrs.fontId.to!int;
xf.fillId = element.attrs.fillId.to!int;
xf.borderId = element.attrs.borderId.to!int;
xf.numFmtId = element.attrs.numFmtId.to!int;
if(element.attrs.applyNumberFormat == "1")
xf.applyNumberFormat = true;
styleInternal.xfs ~= xf;
}
});
loadXml("xl/workbook.xml", (document) {
foreach(element; document.querySelectorAll("sheets > sheet")) {
sheetsInternal ~= SheetInternal(element.attrs.name, element.attrs.sheetId, element.getAttribute("r:id"));
}
});
}
private void loadXml(string filename, scope void delegate(XmlDocument document) handler) {
auto document = new XmlDocument(cast(string) zipFile.getContent(filename));
handler(document);
}
}
// from Robert Schadek's code {
import std.datetime;
version(unittest) import std.format;
Date longToDate(long d) @safe {
// modifed from https://www.codeproject.com/Articles/2750/
// Excel-Serial-Date-to-Day-Month-Year-and-Vice-Versa
// Excel/Lotus 123 have a bug with 29-02-1900. 1900 is not a
// leap year, but Excel/Lotus 123 think it is...
if(d == 60) {
return Date(1900, 2, 29);
} else if(d < 60) {
// Because of the 29-02-1900 bug, any serial date
// under 60 is one off... Compensate.
++d;
}
// Modified Julian to DMY calculation with an addition of 2415019
int l = cast(int)d + 68569 + 2415019;
int n = int(( 4 * l ) / 146097);
l = l - int(( 146097 * n + 3 ) / 4);
int i = int(( 4000 * ( l + 1 ) ) / 1461001);
l = l - int(( 1461 * i ) / 4) + 31;
int j = int(( 80 * l ) / 2447);
int nDay = l - int(( 2447 * j ) / 80);
l = int(j / 11);
int nMonth = j + 2 - ( 12 * l );
int nYear = 100 * ( n - 49 ) + i + l;
return Date(nYear, nMonth, nDay);
}
long dateToLong(Date d) @safe {
// modifed from https://www.codeproject.com/Articles/2750/
// Excel-Serial-Date-to-Day-Month-Year-and-Vice-Versa
// Excel/Lotus 123 have a bug with 29-02-1900. 1900 is not a
// leap year, but Excel/Lotus 123 think it is...
if(d.day == 29 && d.month == 2 && d.year == 1900) {
return 60;
}
// DMY to Modified Julian calculated with an extra subtraction of 2415019.
long nSerialDate =
int(( 1461 * ( d.year + 4800 + int(( d.month - 14 ) / 12) ) ) / 4) +
int(( 367 * ( d.month - 2 - 12 *
( ( d.month - 14 ) / 12 ) ) ) / 12) -
int(( 3 * ( int(( d.year + 4900
+ int(( d.month - 14 ) / 12) ) / 100) ) ) / 4) +
d.day - 2415019 - 32075;
if(nSerialDate < 60) {
// Because of the 29-02-1900 bug, any serial date
// under 60 is one off... Compensate.
nSerialDate--;
}
return nSerialDate;
}
@safe unittest {
auto ds = [ Date(1900,2,1), Date(1901, 2, 28), Date(2019, 06, 05) ];
foreach(const d; ds) {
long l = dateToLong(d);
Date r = longToDate(l);
assert(r == d, format("%s %s", r, d));
}
}
TimeOfDay doubleToTimeOfDay(double s) @safe {
import core.stdc.math : lround;
double secs = (24.0 * 60.0 * 60.0) * s;
// TODO not one-hundred my lround is needed
int secI = to!int(lround(secs));
return TimeOfDay(secI / 3600, (secI / 60) % 60, secI % 60);
}
double timeOfDayToDouble(TimeOfDay tod) @safe {
long h = tod.hour * 60 * 60;
long m = tod.minute * 60;
long s = tod.second;
return (h + m + s) / (24.0 * 60.0 * 60.0);
}
@safe unittest {
auto tods = [ TimeOfDay(23, 12, 11), TimeOfDay(11, 0, 11),
TimeOfDay(0, 0, 0), TimeOfDay(0, 1, 0),
TimeOfDay(23, 59, 59), TimeOfDay(0, 0, 0)];
foreach(const tod; tods) {
double d = timeOfDayToDouble(tod);
assert(d <= 1.0, format("%s", d));
TimeOfDay r = doubleToTimeOfDay(d);
assert(r == tod, format("%s %s", r, tod));
}
}
double datetimeToDouble(DateTime dt) @safe {
double d = dateToLong(dt.date);
double t = timeOfDayToDouble(dt.timeOfDay);
return d + t;
}
DateTime doubleToDateTime(double d) @safe {
long l = cast(long)d;
Date dt = longToDate(l);
TimeOfDay t = doubleToTimeOfDay(d - l);
return DateTime(dt, t);
}
@safe unittest {
auto ds = [ Date(1900,2,1), Date(1901, 2, 28), Date(2019, 06, 05) ];
auto tods = [ TimeOfDay(23, 12, 11), TimeOfDay(11, 0, 11),
TimeOfDay(0, 0, 0), TimeOfDay(0, 1, 0),
TimeOfDay(23, 59, 59), TimeOfDay(0, 0, 0)];
foreach(const d; ds) {
foreach(const tod; tods) {
DateTime dt = DateTime(d, tod);
double dou = datetimeToDouble(dt);
Date rd = longToDate(cast(long)dou);
assert(rd == d, format("%s %s", rd, d));
double rest = dou - cast(long)dou;
TimeOfDay rt = doubleToTimeOfDay(dou - cast(long)dou);
assert(rt == tod, format("%s %s", rt, tod));
DateTime r = doubleToDateTime(dou);
assert(r == dt, format("%s %s", r, dt));
}
}
}
// end from burner's code }