Make HTML behave exactly like other content formats (note)

Fixes #11999
2025-04-26 13:40:38 +03:00 · 2024-02-07 10:30:32 +01:00 · 2024-02-07 10:30:32 +01:00 · 676e6875da
commit 676e6875da
parent 068ccde4c7
9 changed files with 107 additions and 175 deletions
--- a/parser/pageparser/pagelexer.go
+++ b/parser/pageparser/pagelexer.go
@ -43,8 +43,6 @@ type pageLexer struct {
 	summaryDivider []byte
 	// Set when we have parsed any summary divider
 	summaryDividerChecked bool
-	// Whether we're in a HTML comment.
-	isInHTMLComment bool

 	lexerShortcodeState

@ -102,8 +100,6 @@ var (
 	delimTOML         = []byte("+++")
 	delimYAML         = []byte("---")
 	delimOrg          = []byte("#+")
-	htmlCommentStart  = []byte("<!--")
-	htmlCommentEnd    = []byte("-->")
 )

 func (l *pageLexer) next() rune {
@ -232,13 +228,6 @@ func (l *pageLexer) errorf(format string, args ...any) stateFunc {
 	return nil
 }

-// documentError can be used to signal a fatal error in the lexing process.
-// nil terminates the parser
-func (l *pageLexer) documentError(err error) stateFunc {
-	l.err = err
-	return nil
-}
-
 func (l *pageLexer) consumeCRLF() bool {
 	var consumed bool
 	for _, r := range crLf {
@ -251,15 +240,6 @@ func (l *pageLexer) consumeCRLF() bool {
 	return consumed
 }

-func (l *pageLexer) consumeToNextLine() {
-	for {
-		r := l.next()
-		if r == eof || isEndOfLine(r) {
-			return
-		}
-	}
-}
-
 func (l *pageLexer) consumeToSpace() {
 	for {
 		r := l.next()
@ -441,10 +421,6 @@ func lexMainSection(l *pageLexer) stateFunc {
 		return lexDone
 	}

-	if l.isInHTMLComment {
-		return lexEndFrontMatterHTMLComment
-	}
-
 	// Fast forward as far as possible.
 	skip := l.sectionHandlers.skip()

--- a/parser/pageparser/pagelexer_intro.go
+++ b/parser/pageparser/pagelexer_intro.go
@ -13,10 +13,6 @@

 package pageparser

-import "errors"
-
-var ErrPlainHTMLDocumentsNotSupported = errors.New("plain HTML documents not supported")
-
 func lexIntroSection(l *pageLexer) stateFunc {
 	l.summaryDivider = summaryDivider

@ -39,19 +35,6 @@ LOOP:
 		case r == byteOrderMark:
 			l.emit(TypeIgnore)
 		case !isSpace(r) && !isEndOfLine(r):
-			if r == '<' {
-				l.backup()
-				if l.hasPrefix(htmlCommentStart) {
-					// This may be commented out front matter, which should
-					// still be read.
-					l.consumeToNextLine()
-					l.isInHTMLComment = true
-					l.emit(TypeIgnore)
-					continue LOOP
-				} else {
-					return l.documentError(ErrPlainHTMLDocumentsNotSupported)
-				}
-			}
 			break LOOP
 		}
 	}
@ -60,19 +43,6 @@ LOOP:
 	return lexMainSection
 }

-func lexEndFrontMatterHTMLComment(l *pageLexer) stateFunc {
-	l.isInHTMLComment = false
-	right := l.index(htmlCommentEnd)
-	if right == -1 {
-		return l.errorf("starting HTML comment with no end")
-	}
-	l.pos += right + len(htmlCommentEnd)
-	l.emit(TypeIgnore)
-
-	// Now move on to the shortcodes.
-	return lexMainSection
-}
-
 func lexFrontMatterJSON(l *pageLexer) stateFunc {
 	// Include the left delimiter
 	l.backup()
--- a/parser/pageparser/pageparser_intro_test.go
+++ b/parser/pageparser/pageparser_intro_test.go
@ -61,13 +61,9 @@ var crLfReplacer = strings.NewReplacer("\r", "#", "\n", "$")
 var frontMatterTests = []lexerTest{
 	{"empty", "", []typeText{tstEOF}, nil},
 	{"Byte order mark", "\ufeff\nSome text.\n", []typeText{nti(TypeIgnore, "\ufeff"), tstSomeText, tstEOF}, nil},
-	{"HTML Document", `  <html>  `, nil, ErrPlainHTMLDocumentsNotSupported},
-	{"HTML Document with shortcode", `<html>{{< sc1 >}}</html>`, nil, ErrPlainHTMLDocumentsNotSupported},
 	{"No front matter", "\nSome text.\n", []typeText{tstSomeText, tstEOF}, nil},
 	{"YAML front matter", "---\nfoo: \"bar\"\n---\n\nSome text.\n", []typeText{tstFrontMatterYAML, tstSomeText, tstEOF}, nil},
 	{"YAML empty front matter", "---\n---\n\nSome text.\n", []typeText{nti(TypeFrontMatterYAML, ""), tstSomeText, tstEOF}, nil},
-	{"YAML commented out front matter", "<!--\n---\nfoo: \"bar\"\n---\n-->\nSome text.\n", []typeText{nti(TypeIgnore, "<!--\n"), tstFrontMatterYAML, nti(TypeIgnore, "-->"), tstSomeText, tstEOF}, nil},
-	{"YAML commented out front matter, no end", "<!--\n---\nfoo: \"bar\"\n---\nSome text.\n", []typeText{nti(TypeIgnore, "<!--\n"), tstFrontMatterYAML, nti(tError, "starting HTML comment with no end")}, nil},
 	// Note that we keep all bytes as they are, but we need to handle CRLF
 	{"YAML front matter CRLF", "---\r\nfoo: \"bar\"\r\n---\n\nSome text.\n", []typeText{tstFrontMatterYAMLCRLF, tstSomeText, tstEOF}, nil},
 	{"TOML front matter", "+++\nfoo = \"bar\"\n+++\n\nSome text.\n", []typeText{tstFrontMatterTOML, tstSomeText, tstEOF}, nil},