hugo/markup/pandoc/convert.go
Sebastian Höffner 861f9d5b7c
markup: add --citeproc to pandoc converter
Adds the citeproc filter to the pandoc converter.

There are several PRs for it this feature already. However, I think
simply adding `--citeproc` is the cleanest way to enable this feature,
with the option to flesh it out later, e.g., in #7529.

Some PRs and issues attempt adding more config options to Hugo which
indirectly configure pandoc, but I think simply configuring Pandoc via
Pandoc itself is simpler, as it is already possible with two YAML
blocks -- one for Hugo, and one for Pandoc:

    ---
    title: This is the Hugo YAML block
    ---
    ---
    bibliography: assets/pandoc-yaml-block-bibliography.bib
    ...
    Document content with @citation!

There are other useful options, e.g., #4800 attempts to use `nocite`,
which works out of the box with this PR:

    ---
    title: This is the Hugo YAML block
    ---
    ---
    bibliography: assets/pandoc-yaml-block-bibliography.bib
    nocite: |
      @*
    ...
    Document content with no citations but a full bibliography:

    ## Bibliography

Other useful options are `csl: ...` and `link-citations: true`, which
set the path to a custom CSL file and create HTML links between the
references and the bibliography.

The following issues and PRs are related:

- Add support for parsing citations and Jupyter notebooks via Pandoc and/or Goldmark extension #6101
  Bundles multiple requests, this PR tackles citation parsing.

- WIP: Bibliography with Pandoc #4800
  Passes the frontmatter to Pandoc and still uses
  `--filter pandoc-citeproc` instead of `--citeproc`.
- Allow configuring Pandoc #7529
  That PR is much more extensive and might eventually supersede this PR,
  but I think --bibliography and --citeproc should be independent
  options (--bibliography should be optional and citeproc can always be
  specified).
- Pandoc - allow citeproc extension to be invoked, with bibliography. #8610
  Similar to #7529, #8610 adds a new config option to Hugo.
  I think passing --citeproc and letting the users decide on the
  metadata they want to pass to pandoc is better, albeit uglier.
2025-03-19 21:37:28 -07:00

133 lines
3.6 KiB
Go

// Copyright 2019 The Hugo Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package pandoc converts content to HTML using Pandoc as an external helper.
package pandoc
import (
"bytes"
"sync"
"github.com/gohugoio/hugo/common/hexec"
"github.com/gohugoio/hugo/htesting"
"github.com/gohugoio/hugo/identity"
"github.com/gohugoio/hugo/markup/converter"
"github.com/gohugoio/hugo/markup/internal"
)
// Provider is the package entry point.
var Provider converter.ProviderProvider = provider{}
type provider struct{}
func (p provider) New(cfg converter.ProviderConfig) (converter.Provider, error) {
return converter.NewProvider("pandoc", func(ctx converter.DocumentContext) (converter.Converter, error) {
return &pandocConverter{
ctx: ctx,
cfg: cfg,
}, nil
}), nil
}
type pandocConverter struct {
ctx converter.DocumentContext
cfg converter.ProviderConfig
}
func (c *pandocConverter) Convert(ctx converter.RenderContext) (converter.ResultRender, error) {
b, err := c.getPandocContent(ctx.Src, c.ctx)
if err != nil {
return nil, err
}
return converter.Bytes(b), nil
}
func (c *pandocConverter) Supports(feature identity.Identity) bool {
return false
}
// getPandocContent calls pandoc as an external helper to convert pandoc markdown to HTML.
func (c *pandocConverter) getPandocContent(src []byte, ctx converter.DocumentContext) ([]byte, error) {
logger := c.cfg.Logger
binaryName := getPandocBinaryName()
if binaryName == "" {
logger.Println("pandoc not found in $PATH: Please install.\n",
" Leaving pandoc content unrendered.")
return src, nil
}
args := []string{"--mathjax"}
if supportsCitations(c.cfg) {
args = append(args[:], "--citeproc")
}
return internal.ExternallyRenderContent(c.cfg, ctx, src, binaryName, args)
}
const pandocBinary = "pandoc"
func getPandocBinaryName() string {
if hexec.InPath(pandocBinary) {
return pandocBinary
}
return ""
}
var pandocSupportsCiteprocOnce sync.Once
var pandocSupportsCiteproc bool
// getPandocSupportsCiteproc runs a dump-args to determine if pandoc knows the --citeproc argument
func getPandocSupportsCiteproc(cfg converter.ProviderConfig) (bool, error) {
var err error
pandocSupportsCiteprocOnce.Do(func() {
argsv := []any{"--dump-args", "--citeproc"}
var out bytes.Buffer
argsv = append(argsv, hexec.WithStdout(&out))
cmd, err := cfg.Exec.New(pandocBinary, argsv...)
if err != nil {
pandocSupportsCiteproc = false
return
}
err = cmd.Run()
if err != nil {
pandocSupportsCiteproc = false
return
}
pandocSupportsCiteproc = true
})
return pandocSupportsCiteproc, err
}
// supportsCitations returns true if citeproc is available
func supportsCitations(cfg converter.ProviderConfig) bool {
if Supports() {
supportsCiteproc, err := getPandocSupportsCiteproc(cfg)
return supportsCiteproc && err == nil
}
return false
}
// Supports returns whether Pandoc is installed on this computer.
func Supports() bool {
hasBin := getPandocBinaryName() != ""
if htesting.SupportsAll() {
if !hasBin {
panic("pandoc not installed")
}
return true
}
return hasBin
}