Make HTML behave exactly like other content formats (note)

Fixes #11999
This commit is contained in:
Bjørn Erik Pedersen 2024-02-07 10:30:32 +01:00
parent 068ccde4c7
commit 676e6875da
9 changed files with 107 additions and 175 deletions

View file

@ -43,8 +43,6 @@ type pageLexer struct {
summaryDivider []byte
// Set when we have parsed any summary divider
summaryDividerChecked bool
// Whether we're in a HTML comment.
isInHTMLComment bool
lexerShortcodeState
@ -102,8 +100,6 @@ var (
delimTOML = []byte("+++")
delimYAML = []byte("---")
delimOrg = []byte("#+")
htmlCommentStart = []byte("<!--")
htmlCommentEnd = []byte("-->")
)
func (l *pageLexer) next() rune {
@ -232,13 +228,6 @@ func (l *pageLexer) errorf(format string, args ...any) stateFunc {
return nil
}
// documentError can be used to signal a fatal error in the lexing process.
// nil terminates the parser
func (l *pageLexer) documentError(err error) stateFunc {
l.err = err
return nil
}
func (l *pageLexer) consumeCRLF() bool {
var consumed bool
for _, r := range crLf {
@ -251,15 +240,6 @@ func (l *pageLexer) consumeCRLF() bool {
return consumed
}
func (l *pageLexer) consumeToNextLine() {
for {
r := l.next()
if r == eof || isEndOfLine(r) {
return
}
}
}
func (l *pageLexer) consumeToSpace() {
for {
r := l.next()
@ -441,10 +421,6 @@ func lexMainSection(l *pageLexer) stateFunc {
return lexDone
}
if l.isInHTMLComment {
return lexEndFrontMatterHTMLComment
}
// Fast forward as far as possible.
skip := l.sectionHandlers.skip()

View file

@ -13,10 +13,6 @@
package pageparser
import "errors"
var ErrPlainHTMLDocumentsNotSupported = errors.New("plain HTML documents not supported")
func lexIntroSection(l *pageLexer) stateFunc {
l.summaryDivider = summaryDivider
@ -39,19 +35,6 @@ LOOP:
case r == byteOrderMark:
l.emit(TypeIgnore)
case !isSpace(r) && !isEndOfLine(r):
if r == '<' {
l.backup()
if l.hasPrefix(htmlCommentStart) {
// This may be commented out front matter, which should
// still be read.
l.consumeToNextLine()
l.isInHTMLComment = true
l.emit(TypeIgnore)
continue LOOP
} else {
return l.documentError(ErrPlainHTMLDocumentsNotSupported)
}
}
break LOOP
}
}
@ -60,19 +43,6 @@ LOOP:
return lexMainSection
}
func lexEndFrontMatterHTMLComment(l *pageLexer) stateFunc {
l.isInHTMLComment = false
right := l.index(htmlCommentEnd)
if right == -1 {
return l.errorf("starting HTML comment with no end")
}
l.pos += right + len(htmlCommentEnd)
l.emit(TypeIgnore)
// Now move on to the shortcodes.
return lexMainSection
}
func lexFrontMatterJSON(l *pageLexer) stateFunc {
// Include the left delimiter
l.backup()

View file

@ -61,13 +61,9 @@ var crLfReplacer = strings.NewReplacer("\r", "#", "\n", "$")
var frontMatterTests = []lexerTest{
{"empty", "", []typeText{tstEOF}, nil},
{"Byte order mark", "\ufeff\nSome text.\n", []typeText{nti(TypeIgnore, "\ufeff"), tstSomeText, tstEOF}, nil},
{"HTML Document", ` <html> `, nil, ErrPlainHTMLDocumentsNotSupported},
{"HTML Document with shortcode", `<html>{{< sc1 >}}</html>`, nil, ErrPlainHTMLDocumentsNotSupported},
{"No front matter", "\nSome text.\n", []typeText{tstSomeText, tstEOF}, nil},
{"YAML front matter", "---\nfoo: \"bar\"\n---\n\nSome text.\n", []typeText{tstFrontMatterYAML, tstSomeText, tstEOF}, nil},
{"YAML empty front matter", "---\n---\n\nSome text.\n", []typeText{nti(TypeFrontMatterYAML, ""), tstSomeText, tstEOF}, nil},
{"YAML commented out front matter", "<!--\n---\nfoo: \"bar\"\n---\n-->\nSome text.\n", []typeText{nti(TypeIgnore, "<!--\n"), tstFrontMatterYAML, nti(TypeIgnore, "-->"), tstSomeText, tstEOF}, nil},
{"YAML commented out front matter, no end", "<!--\n---\nfoo: \"bar\"\n---\nSome text.\n", []typeText{nti(TypeIgnore, "<!--\n"), tstFrontMatterYAML, nti(tError, "starting HTML comment with no end")}, nil},
// Note that we keep all bytes as they are, but we need to handle CRLF
{"YAML front matter CRLF", "---\r\nfoo: \"bar\"\r\n---\n\nSome text.\n", []typeText{tstFrontMatterYAMLCRLF, tstSomeText, tstEOF}, nil},
{"TOML front matter", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n", []typeText{tstFrontMatterTOML, tstSomeText, tstEOF}, nil},