markdown: Pass emoji codes to yuin/goldmark-emoji

Removes emoji code conversion from the page and shortcode parsers. Emoji codes in markdown are now passed to Goldmark, where the goldmark-emoji extension converts them to decimal numeric character references. This disables emoji rendering for the alternate content formats: html, asciidoc, org, pandoc, and rst. Fixes #7332 Fixes #11587 Closes #11598
2025-04-26 05:30:54 +03:00 · 2023-10-24 03:04:13 -07:00 · 2023-10-24 03:04:13 -07:00 · 272484f8bf
commit 272484f8bf
parent de4e466036
16 changed files with 151 additions and 312 deletions
--- a/parser/pageparser/pagelexer.go
+++ b/parser/pageparser/pagelexer.go
@ -61,9 +61,7 @@ func (l *pageLexer) Input() []byte {
 	return l.input
 }

-type Config struct {
-	EnableEmoji bool
-}
+type Config struct{}

 // note: the input position here is normally 0 (start), but
 // can be set if position of first shortcode is known
@ -103,8 +101,6 @@ var (
 	delimOrg          = []byte("#+")
 	htmlCommentStart  = []byte("<!--")
 	htmlCommentEnd    = []byte("-->")
-
-	emojiDelim = byte(':')
 )

 func (l *pageLexer) next() rune {
@ -276,34 +272,6 @@ func (l *pageLexer) consumeSpace() {
 	}
 }

-// lex a string starting at ":"
-func lexEmoji(l *pageLexer) stateFunc {
-	pos := l.pos + 1
-	valid := false
-
-	for i := pos; i < len(l.input); i++ {
-		if i > pos && l.input[i] == emojiDelim {
-			pos = i + 1
-			valid = true
-			break
-		}
-		r, _ := utf8.DecodeRune(l.input[i:])
-		if !(isAlphaNumericOrHyphen(r) || r == '+') {
-			break
-		}
-	}
-
-	if valid {
-		l.pos = pos
-		l.emit(TypeEmoji)
-	} else {
-		l.pos++
-		l.emit(tText)
-	}
-
-	return lexMainSection
-}
-
 type sectionHandlers struct {
 	l *pageLexer

@ -399,20 +367,6 @@ func createSectionHandlers(l *pageLexer) *sectionHandlers {

 	handlers := []*sectionHandler{shortCodeHandler, summaryDividerHandler}

-	if l.cfg.EnableEmoji {
-		emojiHandler := &sectionHandler{
-			l: l,
-			skipFunc: func(l *pageLexer) int {
-				return l.indexByte(emojiDelim)
-			},
-			lexFunc: func(origin stateFunc, l *pageLexer) (stateFunc, bool) {
-				return lexEmoji, true
-			},
-		}
-
-		handlers = append(handlers, emojiHandler)
-	}
-
 	return &sectionHandlers{
 		l:           l,
 		handlers:    handlers,
--- a/parser/pageparser/pageparser_main_test.go
+++ b/parser/pageparser/pageparser_main_test.go
@ -1,42 +0,0 @@
-// Copyright 2018 The Hugo Authors. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package pageparser
-
-import (
-	"testing"
-
-	qt "github.com/frankban/quicktest"
-)
-
-func TestMain(t *testing.T) {
-	t.Parallel()
-	c := qt.New(t)
-
-	mainTests := []lexerTest{
-		{"emoji #1", "Some text with :emoji:", []typeText{nti(tText, "Some text with "), nti(TypeEmoji, ":emoji:"), tstEOF}},
-		{"emoji #2", "Some text with :emoji: and some text.", []typeText{nti(tText, "Some text with "), nti(TypeEmoji, ":emoji:"), nti(tText, " and some text."), tstEOF}},
-		{"looks like an emoji #1", "Some text and then :emoji", []typeText{nti(tText, "Some text and then "), nti(tText, ":"), nti(tText, "emoji"), tstEOF}},
-		{"looks like an emoji #2", "Some text and then ::", []typeText{nti(tText, "Some text and then "), nti(tText, ":"), nti(tText, ":"), tstEOF}},
-		{"looks like an emoji #3", ":Some :text", []typeText{nti(tText, ":"), nti(tText, "Some "), nti(tText, ":"), nti(tText, "text"), tstEOF}},
-	}
-
-	for i, test := range mainTests {
-		items := collectWithConfig([]byte(test.input), false, lexMainSection, Config{EnableEmoji: true})
-		if !equal(test.input, items, test.items) {
-			got := itemsToString(items, []byte(test.input))
-			expected := testItemsToString(test.items)
-			c.Assert(got, qt.Equals, expected, qt.Commentf("Test %d: %s", i, test.name))
-		}
-	}
-}
--- a/parser/pageparser/pageparser_test.go
+++ b/parser/pageparser/pageparser_test.go
@ -24,7 +24,7 @@ import (

 func BenchmarkParse(b *testing.B) {
 	start := `
-	
+

 ---
 title: "Front Matters"
@ -38,33 +38,7 @@ This is some summary. This is some summary. This is some summary. This is some s

 `
 	input := []byte(start + strings.Repeat(strings.Repeat("this is text", 30)+"{{< myshortcode >}}This is some inner content.{{< /myshortcode >}}", 10))
-	cfg := Config{EnableEmoji: false}
-
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		if _, err := parseBytes(input, cfg, lexIntroSection); err != nil {
-			b.Fatal(err)
-		}
-	}
-}
-
-func BenchmarkParseWithEmoji(b *testing.B) {
-	start := `
-	
-
---
-title: "Front Matters"
-description: "It really does"
---
-
-This is some summary. This is some summary. This is some summary. This is some summary.
-
- <!--more-->
-
-
-`
-	input := []byte(start + strings.Repeat("this is not emoji: ", 50) + strings.Repeat("some text ", 70) + strings.Repeat("this is not: ", 50) + strings.Repeat("but this is a :smile: ", 3) + strings.Repeat("some text ", 70))
-	cfg := Config{EnableEmoji: true}
+	cfg := Config{}

 	b.ResetTimer()
 	for i := 0; i < b.N; i++ {