diff --git a/htmltotext.d b/htmltotext.d
index 272dadc..dc877a1 100644
--- a/htmltotext.d
+++ b/htmltotext.d
@@ -28,7 +28,7 @@ class HtmlConverter {
// The table stuff is removed right now because while it looks
// ok for test tables, it isn't working well for the emails I have
// - it handles data ok but not really nested layouts.
- case "trfixme":
+ case "trlol":
auto children = element.childElements;
auto tdWidth = (width - cast(int)(children.length)*3) / cast(int)(children.length);
@@ -91,6 +91,16 @@ class HtmlConverter {
s ~= "\n";
}
break;
+ case "tr":
+ startBlock(2);
+ sinkChildren();
+ endBlock();
+ break;
+ case "td":
+ startBlock(0);
+ sinkChildren();
+ endBlock();
+ break;
case "a":
sinkChildren();
if(element.href != element.innerText) {
@@ -116,6 +126,8 @@ class HtmlConverter {
if(csc.length)
s ~= "\033[39m";
*/
+
+ sinkChildren();
break;
case "p":
startBlock();
@@ -139,20 +151,28 @@ class HtmlConverter {
break;
case "ul":
ulDepth++;
+ startBlock(2);
sinkChildren();
+ endBlock();
ulDepth--;
break;
case "ol":
olDepth++;
+ startBlock(2);
sinkChildren();
+ endBlock();
olDepth--;
break;
case "li":
startBlock();
//sink('\t', true);
- sink(' ', true);
- sink(' ', true);
+ /*
+ foreach(cnt; 0 .. olDepth + ulDepth) {
+ sink(' ', true);
+ sink(' ', true);
+ }
+ */
if(olDepth)
sink('*', false);
if(ulDepth)
@@ -164,15 +184,33 @@ class HtmlConverter {
endBlock();
break;
- case "h1", "h2":
+ case "dl":
+ case "dt":
+ case "dd":
+ startBlock(element.tagName == "dd" ? 2 : 0);
+ sinkChildren();
+ endBlock();
+ break;
+
+ case "h1":
+ startBlock();
+ sink('#', true);
+ sink('#', true);
+ sink(' ', true);
+ sinkChildren();
+ sink(' ', true);
+ sink('#', true);
+ sink('#', true);
+ endBlock();
+ break;
+ case "h2", "h3":
startBlock();
sinkChildren();
sink('\n', true);
foreach(dchar ch; element.innerText)
- sink(element.tagName == "h1" ? '=' : '-', false);
+ sink(element.tagName == "h2" ? '=' : '-', false);
endBlock();
break;
-
case "hr":
startBlock();
foreach(i; 0 .. width / 4)
@@ -185,7 +223,6 @@ class HtmlConverter {
case "br":
sink('\n', true);
break;
- case "tr":
case "div":
startBlock();
@@ -207,7 +244,7 @@ class HtmlConverter {
endBlock();
break;
case "pre":
- startBlock();
+ startBlock(4);
foreach(child; element.childNodes)
htmlToText(child, true, width);
endBlock();
@@ -237,6 +274,10 @@ class HtmlConverter {
//auto stylesheet = new StyleSheet(readText("/var/www/dpldocs.info/experimental-docs/style.css"));
//stylesheet.apply(document);
+ return convert(start, wantWordWrap, wrapAmount);
+ }
+
+ string convert(Element start, bool wantWordWrap = true, int wrapAmount = 74) {
htmlToText(start, false, wrapAmount);
return s;
}
@@ -255,6 +296,12 @@ class HtmlConverter {
int lineLength;
void sink(dchar item, bool preformatted, int lineWidthOverride = int.min) {
+
+ if(needsIndent && item != '\n') {
+ lineLength += doIndent();
+ needsIndent = false;
+ }
+
int width = lineWidthOverride == int.min ? this.width : lineWidthOverride;
if(!preformatted && isWhite(item)) {
if(!justOutputWhitespace) {
@@ -282,8 +329,9 @@ class HtmlConverter {
auto os = s;
s = os[0 .. idx];
s ~= '\n';
- s ~= os[idx + 1 .. $];
lineLength = cast(int)(os[idx+1..$].length);
+ lineLength += doIndent();
+ s ~= os[idx + 1 .. $];
broken = true;
break;
}
@@ -295,15 +343,17 @@ class HtmlConverter {
if(!broken) {
s ~= '\n';
lineLength = 0;
+ needsIndent = true;
justOutputWhitespace = true;
}
}
- if(item == '\n')
+ if(item == '\n') {
lineLength = 0;
- else
+ needsIndent = true;
+ } else
lineLength ++;
@@ -312,22 +362,45 @@ class HtmlConverter {
justOutputMargin = false;
}
}
- void startBlock() {
+
+ int doIndent() {
+ int cnt = 0;
+ foreach(i; indentStack)
+ foreach(lol; 0 .. i) {
+ s ~= ' ';
+ cnt++;
+ }
+ return cnt;
+ }
+
+ int[] indentStack;
+ bool needsIndent = false;
+
+ void startBlock(int indent = 0) {
+
+ indentStack ~= indent;
+
if(!justOutputBlock) {
s ~= "\n";
lineLength = 0;
+ needsIndent = true;
justOutputBlock = true;
}
if(!justOutputMargin) {
s ~= "\n";
lineLength = 0;
+ needsIndent = true;
justOutputMargin = true;
}
}
void endBlock() {
+ if(indentStack.length)
+ indentStack = indentStack[0 .. $ - 1];
+
if(!justOutputMargin) {
s ~= "\n";
lineLength = 0;
+ needsIndent = true;
justOutputMargin = true;
}
}
@@ -403,13 +476,11 @@ void penis() {
ele.innerText = "^" ~ ele.innerText;
ele.stripOut();
break;
- /*
case "img":
string alt = ele.getAttribute("alt");
if(alt)
result ~= ele.alt;
break;
- */
default:
ele.stripOut();
goto again;