hugo/markup/pandoc/convert_test.go
Sebastian Höffner 861f9d5b7c
markup: add --citeproc to pandoc converter
Adds the citeproc filter to the pandoc converter.

There are several PRs for it this feature already. However, I think
simply adding `--citeproc` is the cleanest way to enable this feature,
with the option to flesh it out later, e.g., in #7529.

Some PRs and issues attempt adding more config options to Hugo which
indirectly configure pandoc, but I think simply configuring Pandoc via
Pandoc itself is simpler, as it is already possible with two YAML
blocks -- one for Hugo, and one for Pandoc:

    ---
    title: This is the Hugo YAML block
    ---
    ---
    bibliography: assets/pandoc-yaml-block-bibliography.bib
    ...
    Document content with @citation!

There are other useful options, e.g., #4800 attempts to use `nocite`,
which works out of the box with this PR:

    ---
    title: This is the Hugo YAML block
    ---
    ---
    bibliography: assets/pandoc-yaml-block-bibliography.bib
    nocite: |
      @*
    ...
    Document content with no citations but a full bibliography:

    ## Bibliography

Other useful options are `csl: ...` and `link-citations: true`, which
set the path to a custom CSL file and create HTML links between the
references and the bibliography.

The following issues and PRs are related:

- Add support for parsing citations and Jupyter notebooks via Pandoc and/or Goldmark extension #6101
  Bundles multiple requests, this PR tackles citation parsing.

- WIP: Bibliography with Pandoc #4800
  Passes the frontmatter to Pandoc and still uses
  `--filter pandoc-citeproc` instead of `--citeproc`.
- Allow configuring Pandoc #7529
  That PR is much more extensive and might eventually supersede this PR,
  but I think --bibliography and --citeproc should be independent
  options (--bibliography should be optional and citeproc can always be
  specified).
- Pandoc - allow citeproc extension to be invoked, with bibliography. #8610
  Similar to #7529, #8610 adds a new config option to Hugo.
  I think passing --citeproc and letting the users decide on the
  metadata they want to pass to pandoc is better, albeit uglier.
2025-03-19 21:37:28 -07:00

142 lines
3.7 KiB
Go

// Copyright 2019 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package pandoc
import (
"testing"
"github.com/gohugoio/hugo/common/hexec"
"github.com/gohugoio/hugo/common/loggers"
"github.com/gohugoio/hugo/config/security"
"github.com/gohugoio/hugo/markup/converter"
qt "github.com/frankban/quicktest"
)
func setupTestConverter(t *testing.T) (*qt.C, converter.Converter, converter.ProviderConfig) {
if !Supports() {
t.Skip("pandoc not installed")
}
c := qt.New(t)
sc := security.DefaultConfig
var err error
sc.Exec.Allow, err = security.NewWhitelist("pandoc")
c.Assert(err, qt.IsNil)
cfg := converter.ProviderConfig{Exec: hexec.New(sc, "", loggers.NewDefault()), Logger: loggers.NewDefault()}
p, err := Provider.New(cfg)
c.Assert(err, qt.IsNil)
conv, err := p.New(converter.DocumentContext{})
c.Assert(err, qt.IsNil)
return c, conv, cfg
}
func TestConvert(t *testing.T) {
c, conv, _ := setupTestConverter(t)
output, err := conv.Convert(converter.RenderContext{Src: []byte("testContent")})
c.Assert(err, qt.IsNil)
c.Assert(string(output.Bytes()), qt.Equals, "<p>testContent</p>\n")
}
func runCiteprocTest(t *testing.T, content string, expectContained []string, expectNotContained []string) {
c, conv, cfg := setupTestConverter(t)
if !supportsCitations(cfg) {
t.Skip("pandoc does not support citations")
}
output, err := conv.Convert(converter.RenderContext{Src: []byte(content)})
c.Assert(err, qt.IsNil)
for _, expected := range expectContained {
c.Assert(string(output.Bytes()), qt.Contains, expected)
}
for _, notExpected := range expectNotContained {
c.Assert(string(output.Bytes()), qt.Not(qt.Contains), notExpected)
}
}
func TestGetPandocSupportsCiteprocCallTwice(t *testing.T) {
c, _, cfg := setupTestConverter(t)
supports1, err1 := getPandocSupportsCiteproc(cfg)
supports2, err2 := getPandocSupportsCiteproc(cfg)
c.Assert(supports1, qt.Equals, supports2)
c.Assert(err1, qt.IsNil)
c.Assert(err2, qt.IsNil)
}
func TestCiteprocWithHugoMeta(t *testing.T) {
content := `
---
title: Test
published: 2022-05-30
---
testContent
`
expected := []string{"testContent"}
unexpected := []string{"Doe", "Mustermann", "2022", "Treatise"}
runCiteprocTest(t, content, expected, unexpected)
}
func TestCiteprocWithPandocMeta(t *testing.T) {
content := `
---
---
---
...
testContent
`
expected := []string{"testContent"}
unexpected := []string{"Doe", "Mustermann", "2022", "Treatise"}
runCiteprocTest(t, content, expected, unexpected)
}
func TestCiteprocWithBibliography(t *testing.T) {
content := `
---
---
---
bibliography: testdata/bibliography.bib
...
testContent
`
expected := []string{"testContent"}
unexpected := []string{"Doe", "Mustermann", "2022", "Treatise"}
runCiteprocTest(t, content, expected, unexpected)
}
func TestCiteprocWithExplicitCitation(t *testing.T) {
content := `
---
---
---
bibliography: testdata/bibliography.bib
...
@Doe2022
`
expected := []string{"Doe", "Mustermann", "2022", "Treatise"}
runCiteprocTest(t, content, expected, []string{})
}
func TestCiteprocWithNocite(t *testing.T) {
content := `
---
---
---
bibliography: testdata/bibliography.bib
nocite: |
@*
...
`
expected := []string{"Doe", "Mustermann", "2022", "Treatise"}
runCiteprocTest(t, content, expected, []string{})
}