fix compile

This commit is contained in:
Adam D. Ruppe 2019-01-12 08:57:05 -05:00
parent d8720892c8
commit 8c67623cf6
5 changed files with 271 additions and 161 deletions

View file

@ -7,10 +7,47 @@ import std.string;
import std.uni : isWhite;
///
class HtmlConverter {
int width;
/++
Will enable color output using VT codes. Determines color through dom.d's css support, which means you need to apply a stylesheet first.
---
import arsd.dom;
auto document = new Document(source_code_for_html);
auto stylesheet = new Stylesheet(source_code_for_css);
stylesheet.apply(document);
---
+/
bool enableVtOutput;
string color;
string backgroundColor;
///
void htmlToText(Element element, bool preformatted, int width) {
string color, backgroundColor;
if(enableVtOutput) {
color = element.computedStyle.getValue("color");
backgroundColor = element.computedStyle.getValue("background-color");
}
string originalColor = this.color, originalBackgroundColor = this.backgroundColor;
this.color = color.length ? color : this.color;
this.backgroundColor = backgroundColor.length ? backgroundColor : this.backgroundColor;
scope(exit) {
// the idea is as we pop working back up the tree, it restores what it was here
this.color = originalColor;
this.backgroundColor = originalBackgroundColor;
}
this.width = width;
if(auto tn = cast(TextNode) element) {
foreach(dchar ch; tn.nodeValue) {
@ -115,19 +152,27 @@ class HtmlConverter {
}
break;
case "span":
/*
auto csc = element.computedStyle.getValue("color");
if(csc.length) {
auto c = Color.fromString(csc);
s ~= format("\033[38;2;%d;%d;%dm", c.r, c.g, c.b);
if(enableVtOutput) {
auto csc = color; // element.computedStyle.getValue("color");
if(csc.length) {
auto c = Color.fromString(csc);
s ~= format("\033[38;2;%d;%d;%dm", c.r, c.g, c.b);
}
bool bold = element.computedStyle.getValue("font-weight") == "bold";
if(bold)
s ~= "\033[1m";
sinkChildren();
if(bold)
s ~= "\033[0m";
if(csc.length)
s ~= "\033[39m";
} else {
sinkChildren();
}
sinkChildren();
if(csc.length)
s ~= "\033[39m";
*/
sinkChildren();
break;
case "p":
startBlock();
@ -138,9 +183,15 @@ class HtmlConverter {
case "em", "i":
if(element.innerText.length == 0)
break;
sink('*', false);
sinkChildren();
sink('*', false);
if(enableVtOutput) {
s ~= "\033[1m";
sinkChildren();
s ~= "\033[0m";
} else {
sink('*', false);
sinkChildren();
sink('*', false);
}
break;
case "u":
if(element.innerText.length == 0)
@ -258,6 +309,7 @@ class HtmlConverter {
int olDepth;
int ulDepth;
///
string convert(string html, bool wantWordWrap = true, int wrapAmount = 74) {
Document document = new Document;
@ -277,11 +329,13 @@ class HtmlConverter {
return convert(start, wantWordWrap, wrapAmount);
}
///
string convert(Element start, bool wantWordWrap = true, int wrapAmount = 74) {
htmlToText(start, false, wrapAmount);
return s;
}
///
void reset() {
s = null;
justOutputWhitespace = true;
@ -289,6 +343,7 @@ class HtmlConverter {
justOutputMargin = true;
}
///
string s;
bool justOutputWhitespace = true;
bool justOutputBlock = true;
@ -406,140 +461,9 @@ class HtmlConverter {
}
}
///
string htmlToText(string html, bool wantWordWrap = true, int wrapAmount = 74) {
auto converter = new HtmlConverter();
return converter.convert(html, true, wrapAmount);
}
string repeat(string s, ulong num) {
string ret;
foreach(i; 0 .. num)
ret ~= s;
return ret;
}
import std.stdio;
version(none)
void penis() {
again:
string result = "";
foreach(ele; start.tree) {
if(ele is start) continue;
if(ele.nodeType != 1) continue;
switch(ele.tagName) {
goto again;
case "h1":
ele.innerText = "\r" ~ ele.innerText ~ "\n" ~ repeat("=", ele.innerText.length) ~ "\r";
ele.stripOut();
goto again;
case "h2":
ele.innerText = "\r" ~ ele.innerText ~ "\n" ~ repeat("-", ele.innerText.length) ~ "\r";
ele.stripOut();
goto again;
case "h3":
ele.innerText = "\r" ~ ele.innerText.toUpper ~ "\r";
ele.stripOut();
goto again;
case "td":
case "p":
/*
if(ele.innerHTML.length > 1)
ele.innerHTML = "\r" ~ wrap(ele.innerHTML) ~ "\r";
ele.stripOut();
goto again;
*/
break;
case "a":
string href = ele.getAttribute("href");
if(href && !ele.hasClass("no-brackets")) {
if(ele.hasClass("href-text"))
ele.innerText = href;
else {
if(ele.innerText != href)
ele.innerText = ele.innerText ~ " <" ~ href ~ "> ";
}
}
ele.stripOut();
goto again;
case "ol":
case "ul":
ele.innerHTML = "\r" ~ ele.innerHTML ~ "\r";
break;
case "li":
if(!ele.innerHTML.startsWith("* "))
ele.innerHTML = "* " ~ ele.innerHTML ~ "\r";
// ele.stripOut();
break;
case "sup":
ele.innerText = "^" ~ ele.innerText;
ele.stripOut();
break;
case "img":
string alt = ele.getAttribute("alt");
if(alt)
result ~= ele.alt;
break;
default:
ele.stripOut();
goto again;
}
}
again2:
//start.innerHTML = start.innerHTML().replace("\u0001", "\n");
foreach(ele; start.tree) {
if(ele.tagName == "td") {
if(ele.directText().strip().length) {
ele.prependText("\r");
ele.appendText("\r");
}
ele.stripOut();
goto again2;
} else if(ele.tagName == "p") {
if(strip(ele.innerText()).length > 1) {
string res = "";
string all = ele.innerText().replace("\n \n", "\n\n");
foreach(part; all.split("\n\n"))
res ~= "\r" ~ strip( wantWordWrap ? wrap(part, /*74*/ wrapAmount) : part ) ~ "\r";
ele.innerText = res;
} else
ele.innerText = strip(ele.innerText);
ele.stripOut();
goto again2;
} else if(ele.tagName == "li") {
auto part = ele.innerText;
part = strip( wantWordWrap ? wrap(part, wrapAmount - 2) : part );
part = " " ~ part.replace("\n", "\n\v") ~ "\r";
ele.innerText = part;
ele.stripOut();
goto again2;
}
}
result = start.innerText();
result = squeeze(result, " ");
result = result.replace("\r ", "\r");
result = result.replace(" \r", "\r");
//result = result.replace("\u00a0", " ");
result = squeeze(result, "\r");
result = result.replace("\r", "\n\n");
result = result.replace("\v", " ");
result = result.replace("&#33303;", "'"); // HACK: this shouldn't be needed, but apparently is in practice surely due to a bug elsewhere
result = result.replace("&quot;", "\""); // HACK: this shouldn't be needed, but apparently is in practice surely due to a bug elsewhere
//result = htmlEntitiesDecode(result); // for special chars mainly
result = result.replace("\u0001 ", "\n");
result = result.replace("\u0001", "\n");
//a = std.regex.replace(a, std.regex.regex("(\n\t)+", "g"), "\n"); //\t");
return result.strip;
}