helpers: Fix TrimShortHTML used by markdownify and RenderString

Closes #11698
This commit is contained in:
Joe Mooring 2023-11-11 21:27:44 -08:00 committed by Bjørn Erik Pedersen
parent ac7cffa7e2
commit 0bde6931ac
3 changed files with 78 additions and 11 deletions

View file

@ -251,18 +251,15 @@ func (c *ContentSpec) TruncateWordsToWholeSentence(s string) (string, bool) {
// where said tags are the only <p> tags in the input and enclose the content
// of the input (whitespace excluded).
func (c *ContentSpec) TrimShortHTML(input []byte) []byte {
firstOpeningP := bytes.Index(input, paragraphIndicator)
lastOpeningP := bytes.LastIndex(input, paragraphIndicator)
lastClosingP := bytes.LastIndex(input, closingPTag)
lastClosing := bytes.LastIndex(input, closingIndicator)
if firstOpeningP == lastOpeningP && lastClosingP == lastClosing {
input = bytes.TrimSpace(input)
input = bytes.TrimPrefix(input, openingPTag)
input = bytes.TrimSuffix(input, closingPTag)
if bytes.Count(input, openingPTag) == 1 {
input = bytes.TrimSpace(input)
if bytes.HasPrefix(input, openingPTag) && bytes.HasSuffix(input, closingPTag) {
input = bytes.TrimPrefix(input, openingPTag)
input = bytes.TrimSuffix(input, closingPTag)
input = bytes.TrimSpace(input)
}
}
return input
}

View file

@ -32,12 +32,15 @@ func TestTrimShortHTML(t *testing.T) {
}{
{[]byte(""), []byte("")},
{[]byte("Plain text"), []byte("Plain text")},
{[]byte(" \t\n Whitespace text\n\n"), []byte("Whitespace text")},
// This seems wrong. Why touch it if it doesn't have p tag?
// {[]byte(" \t\n Whitespace text\n\n"), []byte("Whitespace text")},
{[]byte("<p>Simple paragraph</p>"), []byte("Simple paragraph")},
{[]byte("\n \n \t <p> \t Whitespace\nHTML \n\t </p>\n\t"), []byte("Whitespace\nHTML")},
{[]byte("<p>Multiple</p><p>paragraphs</p>"), []byte("<p>Multiple</p><p>paragraphs</p>")},
{[]byte("<p>Nested<p>paragraphs</p></p>"), []byte("<p>Nested<p>paragraphs</p></p>")},
{[]byte("<p>Hello</p>\n<ul>\n<li>list1</li>\n<li>list2</li>\n</ul>"), []byte("<p>Hello</p>\n<ul>\n<li>list1</li>\n<li>list2</li>\n</ul>")},
// Issue #11698
{[]byte("<h2 id=`a`>b</h2>\n\n<p>c</p>"), []byte("<h2 id=`a`>b</h2>\n\n<p>c</p>")},
}
c := newTestContentSpec(nil)