Add autoID for definition terms

Fixes #13403
See #11566

Co-authored-by: Joe Mooring <joe@mooring.com>
This commit is contained in:
Bjørn Erik Pedersen 2025-02-15 17:13:20 +01:00
parent 9c2f8ec61b
commit 157d3703c3
9 changed files with 262 additions and 47 deletions

View file

@ -26,6 +26,7 @@ import (
"github.com/gohugoio/hugo/common/text" "github.com/gohugoio/hugo/common/text"
"github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/ast"
east "github.com/yuin/goldmark/extension/ast"
"github.com/yuin/goldmark/parser" "github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/util" "github.com/yuin/goldmark/util"
@ -43,11 +44,11 @@ func sanitizeAnchorName(b []byte, idType string) []byte {
func sanitizeAnchorNameWithHook(b []byte, idType string, hook func(buf *bytes.Buffer)) []byte { func sanitizeAnchorNameWithHook(b []byte, idType string, hook func(buf *bytes.Buffer)) []byte {
buf := bp.GetBuffer() buf := bp.GetBuffer()
if idType == goldmark_config.AutoHeadingIDTypeBlackfriday { if idType == goldmark_config.AutoIDTypeBlackfriday {
// TODO(bep) make it more efficient. // TODO(bep) make it more efficient.
buf.WriteString(blackfriday.SanitizedAnchorName(string(b))) buf.WriteString(blackfriday.SanitizedAnchorName(string(b)))
} else { } else {
asciiOnly := idType == goldmark_config.AutoHeadingIDTypeGitHubAscii asciiOnly := idType == goldmark_config.AutoIDTypeGitHubAscii
if asciiOnly { if asciiOnly {
// Normalize it to preserve accents if possible. // Normalize it to preserve accents if possible.
@ -90,8 +91,9 @@ func isAlphaNumeric(r rune) bool {
var _ parser.IDs = (*idFactory)(nil) var _ parser.IDs = (*idFactory)(nil)
type idFactory struct { type idFactory struct {
idType string idType string
vals map[string]struct{} vals map[string]struct{}
duplicates []string
} }
func newIDFactory(idType string) *idFactory { func newIDFactory(idType string) *idFactory {
@ -101,11 +103,28 @@ func newIDFactory(idType string) *idFactory {
} }
} }
type stringValuesProvider interface {
StringValues() []string
}
var _ stringValuesProvider = (*idFactory)(nil)
func (ids *idFactory) StringValues() []string {
values := make([]string, 0, len(ids.vals))
for k := range ids.vals {
values = append(values, k)
}
values = append(values, ids.duplicates...)
return values
}
func (ids *idFactory) Generate(value []byte, kind ast.NodeKind) []byte { func (ids *idFactory) Generate(value []byte, kind ast.NodeKind) []byte {
return sanitizeAnchorNameWithHook(value, ids.idType, func(buf *bytes.Buffer) { return sanitizeAnchorNameWithHook(value, ids.idType, func(buf *bytes.Buffer) {
if buf.Len() == 0 { if buf.Len() == 0 {
if kind == ast.KindHeading { if kind == ast.KindHeading {
buf.WriteString("heading") buf.WriteString("heading")
} else if kind == east.KindDefinitionTerm {
buf.WriteString("term")
} else { } else {
buf.WriteString("id") buf.WriteString("id")
} }
@ -123,11 +142,18 @@ func (ids *idFactory) Generate(value []byte, kind ast.NodeKind) []byte {
buf.Truncate(pos) buf.Truncate(pos)
} }
} }
ids.put(buf.String())
ids.vals[buf.String()] = struct{}{}
}) })
} }
func (ids *idFactory) Put(value []byte) { func (ids *idFactory) put(s string) {
ids.vals[util.BytesToReadOnlyString(value)] = struct{}{} if _, found := ids.vals[s]; found {
ids.duplicates = append(ids.duplicates, s)
} else {
ids.vals[s] = struct{}{}
}
}
func (ids *idFactory) Put(value []byte) {
ids.put(string(value))
} }

View file

@ -78,9 +78,9 @@ tabspace
expect := expectlines[i] expect := expectlines[i]
c.Run(input, func(c *qt.C) { c.Run(input, func(c *qt.C) {
b := []byte(input) b := []byte(input)
got := string(sanitizeAnchorName(b, goldmark_config.AutoHeadingIDTypeGitHub)) got := string(sanitizeAnchorName(b, goldmark_config.AutoIDTypeGitHub))
c.Assert(got, qt.Equals, expect) c.Assert(got, qt.Equals, expect)
c.Assert(sanitizeAnchorNameString(input, goldmark_config.AutoHeadingIDTypeGitHub), qt.Equals, expect) c.Assert(sanitizeAnchorNameString(input, goldmark_config.AutoIDTypeGitHub), qt.Equals, expect)
c.Assert(string(b), qt.Equals, input) c.Assert(string(b), qt.Equals, input)
}) })
} }
@ -89,20 +89,20 @@ tabspace
func TestSanitizeAnchorNameAsciiOnly(t *testing.T) { func TestSanitizeAnchorNameAsciiOnly(t *testing.T) {
c := qt.New(t) c := qt.New(t)
c.Assert(sanitizeAnchorNameString("god is神真美好 good", goldmark_config.AutoHeadingIDTypeGitHubAscii), qt.Equals, "god-is-good") c.Assert(sanitizeAnchorNameString("god is神真美好 good", goldmark_config.AutoIDTypeGitHubAscii), qt.Equals, "god-is-good")
c.Assert(sanitizeAnchorNameString("Resumé", goldmark_config.AutoHeadingIDTypeGitHubAscii), qt.Equals, "resume") c.Assert(sanitizeAnchorNameString("Resumé", goldmark_config.AutoIDTypeGitHubAscii), qt.Equals, "resume")
} }
func TestSanitizeAnchorNameBlackfriday(t *testing.T) { func TestSanitizeAnchorNameBlackfriday(t *testing.T) {
c := qt.New(t) c := qt.New(t)
c.Assert(sanitizeAnchorNameString("Let's try this, shall we?", goldmark_config.AutoHeadingIDTypeBlackfriday), qt.Equals, "let-s-try-this-shall-we") c.Assert(sanitizeAnchorNameString("Let's try this, shall we?", goldmark_config.AutoIDTypeBlackfriday), qt.Equals, "let-s-try-this-shall-we")
} }
func BenchmarkSanitizeAnchorName(b *testing.B) { func BenchmarkSanitizeAnchorName(b *testing.B) {
input := []byte("God is good: 神真美好") input := []byte("God is good: 神真美好")
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeGitHub) result := sanitizeAnchorName(input, goldmark_config.AutoIDTypeGitHub)
if len(result) != 24 { if len(result) != 24 {
b.Fatalf("got %d", len(result)) b.Fatalf("got %d", len(result))
} }
@ -113,7 +113,7 @@ func BenchmarkSanitizeAnchorNameAsciiOnly(b *testing.B) {
input := []byte("God is good: 神真美好") input := []byte("God is good: 神真美好")
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeGitHubAscii) result := sanitizeAnchorName(input, goldmark_config.AutoIDTypeGitHubAscii)
if len(result) != 12 { if len(result) != 12 {
b.Fatalf("got %d", len(result)) b.Fatalf("got %d", len(result))
} }
@ -124,7 +124,7 @@ func BenchmarkSanitizeAnchorNameBlackfriday(b *testing.B) {
input := []byte("God is good: 神真美好") input := []byte("God is good: 神真美好")
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
result := sanitizeAnchorName(input, goldmark_config.AutoHeadingIDTypeBlackfriday) result := sanitizeAnchorName(input, goldmark_config.AutoIDTypeBlackfriday)
if len(result) != 24 { if len(result) != 24 {
b.Fatalf("got %d", len(result)) b.Fatalf("got %d", len(result))
} }
@ -135,7 +135,7 @@ func BenchmarkSanitizeAnchorNameString(b *testing.B) {
input := "God is good: 神真美好" input := "God is good: 神真美好"
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
result := sanitizeAnchorNameString(input, goldmark_config.AutoHeadingIDTypeGitHub) result := sanitizeAnchorNameString(input, goldmark_config.AutoIDTypeGitHub)
if len(result) != 24 { if len(result) != 24 {
b.Fatalf("got %d", len(result)) b.Fatalf("got %d", len(result))
} }

View file

@ -61,7 +61,7 @@ func (p provide) New(cfg converter.ProviderConfig) (converter.Provider, error) {
cfg: cfg, cfg: cfg,
md: md, md: md,
sanitizeAnchorName: func(s string) string { sanitizeAnchorName: func(s string) string {
return sanitizeAnchorNameString(s, cfg.MarkupConfig().Goldmark.Parser.AutoHeadingIDType) return sanitizeAnchorNameString(s, cfg.MarkupConfig().Goldmark.Parser.AutoIDType)
}, },
}, nil }, nil
}), nil }), nil
@ -188,16 +188,12 @@ func newMarkdown(pcfg converter.ProviderConfig) goldmark.Markdown {
extensions = append(extensions, emoji.Emoji) extensions = append(extensions, emoji.Emoji)
} }
if cfg.Parser.AutoHeadingID {
parserOptions = append(parserOptions, parser.WithAutoHeadingID())
}
if cfg.Parser.Attribute.Title { if cfg.Parser.Attribute.Title {
parserOptions = append(parserOptions, parser.WithAttribute()) parserOptions = append(parserOptions, parser.WithAttribute())
} }
if cfg.Parser.Attribute.Block { if cfg.Parser.Attribute.Block || cfg.Parser.AutoHeadingID || cfg.Parser.AutoDefinitionTermID {
extensions = append(extensions, attributes.New()) extensions = append(extensions, attributes.New(cfg.Parser))
} }
md := goldmark.New( md := goldmark.New(
@ -295,7 +291,7 @@ func (c *goldmarkConverter) Convert(ctx converter.RenderContext) (converter.Resu
} }
func (c *goldmarkConverter) newParserContext(rctx converter.RenderContext) *parserContext { func (c *goldmarkConverter) newParserContext(rctx converter.RenderContext) *parserContext {
ctx := parser.NewContext(parser.WithIDs(newIDFactory(c.cfg.MarkupConfig().Goldmark.Parser.AutoHeadingIDType))) ctx := parser.NewContext(parser.WithIDs(newIDFactory(c.cfg.MarkupConfig().Goldmark.Parser.AutoIDType)))
ctx.Set(tocEnableKey, rctx.RenderTOC) ctx.Set(tocEnableKey, rctx.RenderTOC)
return &parserContext{ return &parserContext{
Context: ctx, Context: ctx,

View file

@ -15,9 +15,9 @@
package goldmark_config package goldmark_config
const ( const (
AutoHeadingIDTypeGitHub = "github" AutoIDTypeGitHub = "github"
AutoHeadingIDTypeGitHubAscii = "github-ascii" AutoIDTypeGitHubAscii = "github-ascii"
AutoHeadingIDTypeBlackfriday = "blackfriday" AutoIDTypeBlackfriday = "blackfriday"
) )
// Default holds the default Goldmark configuration. // Default holds the default Goldmark configuration.
@ -79,7 +79,8 @@ var Default = Config{
}, },
Parser: Parser{ Parser: Parser{
AutoHeadingID: true, AutoHeadingID: true,
AutoHeadingIDType: AutoHeadingIDTypeGitHub, AutoDefinitionTermID: false,
AutoIDType: AutoIDTypeGitHub,
WrapStandAloneImageWithinParagraph: true, WrapStandAloneImageWithinParagraph: true,
Attribute: ParserAttribute{ Attribute: ParserAttribute{
Title: true, Title: true,
@ -97,6 +98,16 @@ type Config struct {
RenderHooks RenderHooks RenderHooks RenderHooks
} }
func (c *Config) Init() error {
if err := c.Parser.Init(); err != nil {
return err
}
if c.Parser.AutoDefinitionTermID && !c.Extensions.DefinitionList {
c.Parser.AutoDefinitionTermID = false
}
return nil
}
// RenderHooks contains configuration for Goldmark render hooks. // RenderHooks contains configuration for Goldmark render hooks.
type RenderHooks struct { type RenderHooks struct {
Image ImageRenderHook Image ImageRenderHook
@ -250,16 +261,30 @@ type Parser struct {
// auto generated heading ids. // auto generated heading ids.
AutoHeadingID bool AutoHeadingID bool
// The strategy to use when generating heading IDs. // Enables auto definition term ids.
// Available options are "github", "github-ascii". AutoDefinitionTermID bool
// The strategy to use when generating IDs.
// Available options are "github", "github-ascii", and "blackfriday".
// Default is "github", which will create GitHub-compatible anchor names. // Default is "github", which will create GitHub-compatible anchor names.
AutoHeadingIDType string AutoIDType string
// Enables custom attributes. // Enables custom attributes.
Attribute ParserAttribute Attribute ParserAttribute
// Whether to wrap stand-alone images within a paragraph or not. // Whether to wrap stand-alone images within a paragraph or not.
WrapStandAloneImageWithinParagraph bool WrapStandAloneImageWithinParagraph bool
// Renamed to AutoIDType in 0.144.0.
AutoHeadingIDType string `json:"-"`
}
func (p *Parser) Init() error {
// Renamed from AutoHeadingIDType to AutoIDType in 0.144.0.
if p.AutoHeadingIDType != "" {
p.AutoIDType = p.AutoHeadingIDType
}
return nil
} }
type ParserAttribute struct { type ParserAttribute struct {

View file

@ -1,8 +1,11 @@
package attributes package attributes
import ( import (
"github.com/gohugoio/hugo/markup/goldmark/goldmark_config"
"github.com/gohugoio/hugo/markup/goldmark/internal/render"
"github.com/yuin/goldmark" "github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/ast"
east "github.com/yuin/goldmark/extension/ast"
"github.com/yuin/goldmark/parser" "github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/text" "github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util" "github.com/yuin/goldmark/util"
@ -14,24 +17,29 @@ import (
var ( var (
kindAttributesBlock = ast.NewNodeKind("AttributesBlock") kindAttributesBlock = ast.NewNodeKind("AttributesBlock")
attrNameID = []byte("id")
defaultParser = new(attrParser) defaultParser = new(attrParser)
defaultTransformer = new(transformer)
attributes goldmark.Extender = new(attrExtension)
) )
func New() goldmark.Extender { func New(cfg goldmark_config.Parser) goldmark.Extender {
return attributes return &attrExtension{cfg: cfg}
} }
type attrExtension struct{} type attrExtension struct {
cfg goldmark_config.Parser
}
func (a *attrExtension) Extend(m goldmark.Markdown) { func (a *attrExtension) Extend(m goldmark.Markdown) {
if a.cfg.Attribute.Block {
m.Parser().AddOptions(
parser.WithBlockParsers(
util.Prioritized(defaultParser, 100)),
)
}
m.Parser().AddOptions( m.Parser().AddOptions(
parser.WithBlockParsers(
util.Prioritized(defaultParser, 100)),
parser.WithASTTransformers( parser.WithASTTransformers(
util.Prioritized(defaultTransformer, 100), util.Prioritized(&transformer{cfg: a.cfg}, 100),
), ),
) )
} }
@ -92,18 +100,47 @@ func (a *attributesBlock) Kind() ast.NodeKind {
return kindAttributesBlock return kindAttributesBlock
} }
type transformer struct{} type transformer struct {
cfg goldmark_config.Parser
}
func (a *transformer) isFragmentNode(n ast.Node) bool {
switch n.Kind() {
case east.KindDefinitionTerm, ast.KindHeading:
return true
default:
return false
}
}
func (a *transformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) { func (a *transformer) Transform(node *ast.Document, reader text.Reader, pc parser.Context) {
attributes := make([]ast.Node, 0, 500) var attributes []ast.Node
if a.cfg.Attribute.Block {
attributes = make([]ast.Node, 0, 500)
}
ast.Walk(node, func(node ast.Node, entering bool) (ast.WalkStatus, error) { ast.Walk(node, func(node ast.Node, entering bool) (ast.WalkStatus, error) {
if entering && node.Kind() == kindAttributesBlock { if !entering {
return ast.WalkContinue, nil
}
if a.isFragmentNode(node) {
if id, found := node.Attribute(attrNameID); !found {
a.generateAutoID(node, reader, pc)
} else {
pc.IDs().Put(id.([]byte))
}
}
if a.cfg.Attribute.Block && node.Kind() == kindAttributesBlock {
// Attributes for fenced code blocks are handled in their own extension, // Attributes for fenced code blocks are handled in their own extension,
// but note that we currently only support code block attributes when // but note that we currently only support code block attributes when
// CodeFences=true. // CodeFences=true.
if node.PreviousSibling() != nil && node.PreviousSibling().Kind() != ast.KindFencedCodeBlock && !node.HasBlankPreviousLines() { if node.PreviousSibling() != nil && node.PreviousSibling().Kind() != ast.KindFencedCodeBlock && !node.HasBlankPreviousLines() {
attributes = append(attributes, node) attributes = append(attributes, node)
return ast.WalkSkipChildren, nil return ast.WalkSkipChildren, nil
} else {
// remove attributes node
node.Parent().RemoveChild(node.Parent(), node)
} }
} }
@ -123,3 +160,33 @@ func (a *transformer) Transform(node *ast.Document, reader text.Reader, pc parse
attr.Parent().RemoveChild(attr.Parent(), attr) attr.Parent().RemoveChild(attr.Parent(), attr)
} }
} }
func (a *transformer) generateAutoID(n ast.Node, reader text.Reader, pc parser.Context) {
var text []byte
switch n := n.(type) {
case *ast.Heading:
if a.cfg.AutoHeadingID {
text = textHeadingID(n, reader)
}
case *east.DefinitionTerm:
if a.cfg.AutoDefinitionTermID {
text = []byte(render.TextPlain(n, reader.Source()))
}
}
if len(text) > 0 {
headingID := pc.IDs().Generate(text, n.Kind())
n.SetAttribute(attrNameID, headingID)
}
}
// Markdown settext headers can have multiple lines, use the last line for the ID.
func textHeadingID(node *ast.Heading, reader text.Reader) []byte {
var line []byte
lastIndex := node.Lines().Len() - 1
if lastIndex > -1 {
lastLine := node.Lines().At(lastIndex)
line = lastLine.Value(reader.Source())
}
return line
}

View file

@ -0,0 +1,74 @@
package attributes_test
import (
"testing"
"github.com/gohugoio/hugo/hugolib"
)
func TestDescriptionListAutoID(t *testing.T) {
t.Parallel()
files := `
-- hugo.toml --
[markup.goldmark.parser]
autoHeadingID = true
autoDefinitionTermID = true
autoIDType = 'github-ascii'
-- content/p1.md --
---
title: "Title"
---
## Title with id set {#title-with-id}
## Title with id set duplicate {#title-with-id}
## My Title
Base Name
: Base name of the file.
Base Name
: Duplicate term name.
My Title
: Term with same name as title.
Foo@Bar
: The foo bar.
foo [something](/a/b/) bar
: A foo bar.
良善天父
: The good father.
Ā ā Ă ă Ą ą Ć ć Ĉ ĉ Ċ ċ Č č Ď
: Testing accents.
Mutiline set text header
Second line
---------------
-- layouts/_default/single.html --
{{ .Content }}|Identifiers: {{ .Fragments.Identifiers }}|
`
b := hugolib.Test(t, files)
b.AssertFileContent("public/p1/index.html",
`<dt id="base-name">Base Name</dt>`,
`<dt id="base-name-1">Base Name</dt>`,
`<dt id="foobar">Foo@Bar</dt>`,
`<h2 id="my-title">My Title</h2>`,
`<dt id="foo-something-bar">foo <a href="/a/b/">something</a> bar</dt>`,
`<h2 id="title-with-id">Title with id set</h2>`,
`<h2 id="title-with-id">Title with id set duplicate</h2>`,
`<dt id="my-title-1">My Title</dt>`,
`<dt id="term">良善天父</dt>`,
`<dt id="a-a-a-a-a-a-c-c-c-c-c-c-c-c-d">Ā ā Ă ă Ą ą Ć ć Ĉ ĉ Ċ ċ Č č Ď</dt>`,
`<h2 id="second-line">Mutiline set text header`,
"|Identifiers: [a-a-a-a-a-a-c-c-c-c-c-c-c-c-d base-name base-name-1 foo-something-bar foobar my-title my-title-1 second-line term title-with-id title-with-id]|",
)
}

View file

@ -53,6 +53,10 @@ func (t *tocTransformer) Transform(n *ast.Document, reader text.Reader, pc parse
headingText bytes.Buffer headingText bytes.Buffer
) )
if ids := pc.IDs().(stringValuesProvider).StringValues(); len(ids) > 0 {
toc.SetIdentifiers(ids)
}
ast.Walk(n, func(n ast.Node, entering bool) (ast.WalkStatus, error) { ast.Walk(n, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
s := ast.WalkStatus(ast.WalkContinue) s := ast.WalkStatus(ast.WalkContinue)
if n.Kind() == ast.KindHeading { if n.Kind() == ast.KindHeading {
@ -131,5 +135,7 @@ func (e *tocExtension) Extend(m goldmark.Markdown) {
r.AddOptions(e.options...) r.AddOptions(e.options...)
m.Parser().AddOptions(parser.WithASTTransformers(util.Prioritized(&tocTransformer{ m.Parser().AddOptions(parser.WithASTTransformers(util.Prioritized(&tocTransformer{
r: r, r: r,
}, 10))) },
// This must run after the ID generation (priority 100).
110)))
} }

View file

@ -41,6 +41,10 @@ type Config struct {
AsciidocExt asciidocext_config.Config AsciidocExt asciidocext_config.Config
} }
func (c *Config) Init() error {
return c.Goldmark.Init()
}
func Decode(cfg config.Provider) (conf Config, err error) { func Decode(cfg config.Provider) (conf Config, err error) {
conf = Default conf = Default
@ -57,6 +61,10 @@ func Decode(cfg config.Provider) (conf Config, err error) {
return return
} }
if err = conf.Init(); err != nil {
return
}
if err = highlight.ApplyLegacyConfig(cfg, &conf.Highlight); err != nil { if err = highlight.ApplyLegacyConfig(cfg, &conf.Highlight); err != nil {
return return
} }

View file

@ -31,7 +31,8 @@ var Empty = &Fragments{
// Builder is used to build the ToC data structure. // Builder is used to build the ToC data structure.
type Builder struct { type Builder struct {
toc *Fragments identifiersSet bool
toc *Fragments
} }
// AddAt adds the heading to the ToC. // AddAt adds the heading to the ToC.
@ -42,6 +43,16 @@ func (b *Builder) AddAt(h *Heading, row, level int) {
b.toc.addAt(h, row, level) b.toc.addAt(h, row, level)
} }
// SetIdentifiers sets the identifiers in the ToC.
func (b *Builder) SetIdentifiers(ids []string) {
if b.toc == nil {
b.toc = &Fragments{}
}
b.identifiersSet = true
sort.Strings(ids)
b.toc.Identifiers = ids
}
// Build returns the ToC. // Build returns the ToC.
func (b Builder) Build() *Fragments { func (b Builder) Build() *Fragments {
if b.toc == nil { if b.toc == nil {
@ -51,7 +62,9 @@ func (b Builder) Build() *Fragments {
b.toc.walk(func(h *Heading) { b.toc.walk(func(h *Heading) {
if h.ID != "" { if h.ID != "" {
b.toc.HeadingsMap[h.ID] = h b.toc.HeadingsMap[h.ID] = h
b.toc.Identifiers = append(b.toc.Identifiers, h.ID) if !b.identifiersSet {
b.toc.Identifiers = append(b.toc.Identifiers, h.ID)
}
} }
}) })
sort.Strings(b.toc.Identifiers) sort.Strings(b.toc.Identifiers)