markdown: Pass emoji codes to yuin/goldmark-emoji

Removes emoji code conversion from the page and shortcode parsers. Emoji
codes in markdown are now passed to Goldmark, where the goldmark-emoji
extension converts them to decimal numeric character references.

This disables emoji rendering for the alternate content formats: html,
asciidoc, org, pandoc, and rst.

Fixes #7332
Fixes #11587
Closes #11598
This commit is contained in:
Joe Mooring 2023-10-24 03:04:13 -07:00 committed by GitHub
parent de4e466036
commit 272484f8bf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 151 additions and 312 deletions

View file

@ -61,9 +61,7 @@ func (l *pageLexer) Input() []byte {
return l.input
}
type Config struct {
EnableEmoji bool
}
type Config struct{}
// note: the input position here is normally 0 (start), but
// can be set if position of first shortcode is known
@ -103,8 +101,6 @@ var (
delimOrg = []byte("#+")
htmlCommentStart = []byte("<!--")
htmlCommentEnd = []byte("-->")
emojiDelim = byte(':')
)
func (l *pageLexer) next() rune {
@ -276,34 +272,6 @@ func (l *pageLexer) consumeSpace() {
}
}
// lex a string starting at ":"
func lexEmoji(l *pageLexer) stateFunc {
pos := l.pos + 1
valid := false
for i := pos; i < len(l.input); i++ {
if i > pos && l.input[i] == emojiDelim {
pos = i + 1
valid = true
break
}
r, _ := utf8.DecodeRune(l.input[i:])
if !(isAlphaNumericOrHyphen(r) || r == '+') {
break
}
}
if valid {
l.pos = pos
l.emit(TypeEmoji)
} else {
l.pos++
l.emit(tText)
}
return lexMainSection
}
type sectionHandlers struct {
l *pageLexer
@ -399,20 +367,6 @@ func createSectionHandlers(l *pageLexer) *sectionHandlers {
handlers := []*sectionHandler{shortCodeHandler, summaryDividerHandler}
if l.cfg.EnableEmoji {
emojiHandler := &sectionHandler{
l: l,
skipFunc: func(l *pageLexer) int {
return l.indexByte(emojiDelim)
},
lexFunc: func(origin stateFunc, l *pageLexer) (stateFunc, bool) {
return lexEmoji, true
},
}
handlers = append(handlers, emojiHandler)
}
return &sectionHandlers{
l: l,
handlers: handlers,

View file

@ -1,42 +0,0 @@
// Copyright 2018 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package pageparser
import (
"testing"
qt "github.com/frankban/quicktest"
)
func TestMain(t *testing.T) {
t.Parallel()
c := qt.New(t)
mainTests := []lexerTest{
{"emoji #1", "Some text with :emoji:", []typeText{nti(tText, "Some text with "), nti(TypeEmoji, ":emoji:"), tstEOF}},
{"emoji #2", "Some text with :emoji: and some text.", []typeText{nti(tText, "Some text with "), nti(TypeEmoji, ":emoji:"), nti(tText, " and some text."), tstEOF}},
{"looks like an emoji #1", "Some text and then :emoji", []typeText{nti(tText, "Some text and then "), nti(tText, ":"), nti(tText, "emoji"), tstEOF}},
{"looks like an emoji #2", "Some text and then ::", []typeText{nti(tText, "Some text and then "), nti(tText, ":"), nti(tText, ":"), tstEOF}},
{"looks like an emoji #3", ":Some :text", []typeText{nti(tText, ":"), nti(tText, "Some "), nti(tText, ":"), nti(tText, "text"), tstEOF}},
}
for i, test := range mainTests {
items := collectWithConfig([]byte(test.input), false, lexMainSection, Config{EnableEmoji: true})
if !equal(test.input, items, test.items) {
got := itemsToString(items, []byte(test.input))
expected := testItemsToString(test.items)
c.Assert(got, qt.Equals, expected, qt.Commentf("Test %d: %s", i, test.name))
}
}
}

View file

@ -24,7 +24,7 @@ import (
func BenchmarkParse(b *testing.B) {
start := `
---
title: "Front Matters"
@ -38,33 +38,7 @@ This is some summary. This is some summary. This is some summary. This is some s
`
input := []byte(start + strings.Repeat(strings.Repeat("this is text", 30)+"{{< myshortcode >}}This is some inner content.{{< /myshortcode >}}", 10))
cfg := Config{EnableEmoji: false}
b.ResetTimer()
for i := 0; i < b.N; i++ {
if _, err := parseBytes(input, cfg, lexIntroSection); err != nil {
b.Fatal(err)
}
}
}
func BenchmarkParseWithEmoji(b *testing.B) {
start := `
---
title: "Front Matters"
description: "It really does"
---
This is some summary. This is some summary. This is some summary. This is some summary.
<!--more-->
`
input := []byte(start + strings.Repeat("this is not emoji: ", 50) + strings.Repeat("some text ", 70) + strings.Repeat("this is not: ", 50) + strings.Repeat("but this is a :smile: ", 3) + strings.Repeat("some text ", 70))
cfg := Config{EnableEmoji: true}
cfg := Config{}
b.ResetTimer()
for i := 0; i < b.N; i++ {