diff --git a/docs/content/en/content-management/bibliography.md b/docs/content/en/content-management/bibliography.md new file mode 100644 index 00000000000..315f76a5741 --- /dev/null +++ b/docs/content/en/content-management/bibliography.md @@ -0,0 +1,50 @@ +--- +title: Bibliographies in Markdown +linkTitle: Bibliography +description: Include citations and a bibliography in Markdown using LaTeX markup. +categories: [content management] +keywords: [latex,pandoc,citation,reference,bibliography] +menu: + docs: + parent: content-management + weight: 320 +weight: 320 +toc: true +--- + +{{< new-in 0.144.0 />}} + +## Citations and Bibliographies + +[Pandoc](https://pandoc.org) is a universal document converter and can be used to convert markdown files. + +With **Pandoc >= 2.11**, you can use [citations](https://pandoc.org/MANUAL.html#extension-citations). +One way is to employ [BibTeX files](https://en.wikibooks.org/wiki/LaTeX/Bibliography_Management#BibTeX) to cite: + +``` +--- +title: Citation document +--- +--- +bibliography: assets/bibliography.bib +... +This is a citation: @Doe2022 +``` + +Note that Hugo will **not** pass its metadata YAML block to Pandoc; however, it will pass the **second** meta data block, denoted with `---` and `...` to Pandoc. +Thus, all Pandoc-specific settings should go there. + +You can also add all elements from a bibliography file (without citing them explicitly) using: + +``` +--- +title: My Publications +--- +--- +bibliography: assets/bibliography.bib +nocite: | + @* +... +``` + +It is also possible to provide a custom [CSL style](https://citationstyles.org/authors/) by passing `csl: path-to-style.csl` as a Pandoc option. diff --git a/docs/content/en/content-management/formats.md b/docs/content/en/content-management/formats.md index a1f203f3c7d..b23d0cc5491 100644 --- a/docs/content/en/content-management/formats.md +++ b/docs/content/en/content-management/formats.md @@ -111,6 +111,12 @@ Hugo passes these CLI flags when calling the Pandoc executable: --mathjax ``` +If your Pandoc has version 2.11 or later, it also passes this CLI flag: + +```text +--citeproc +``` + [Pandoc]: https://pandoc.org/ ### reStructuredText diff --git a/markup/pandoc/convert.go b/markup/pandoc/convert.go index 8f2d99c9a83..3da923d3e96 100644 --- a/markup/pandoc/convert.go +++ b/markup/pandoc/convert.go @@ -15,10 +15,14 @@ package pandoc import ( + "bytes" + "strconv" + "strings" + "sync" + "github.com/gohugoio/hugo/common/hexec" "github.com/gohugoio/hugo/htesting" "github.com/gohugoio/hugo/identity" - "github.com/gohugoio/hugo/markup/converter" "github.com/gohugoio/hugo/markup/internal" ) @@ -64,6 +68,9 @@ func (c *pandocConverter) getPandocContent(src []byte, ctx converter.DocumentCon return src, nil } args := []string{"--mathjax"} + if supportsCitations(c.cfg) { + args = append(args[:], "--citeproc") + } return internal.ExternallyRenderContent(c.cfg, ctx, src, binaryName, args) } @@ -76,6 +83,69 @@ func getPandocBinaryName() string { return "" } +type pandocVersion struct { + major, minor int64 +} + +func (left pandocVersion) greaterThanOrEqual(right pandocVersion) bool { + return left.major > right.major || (left.major == right.major && left.minor >= right.minor) +} + +var versionOnce sync.Once +var foundPandocVersion pandocVersion + +// getPandocVersion parses the pandoc version output +func getPandocVersion(cfg converter.ProviderConfig) (pandocVersion, error) { + var err error + + versionOnce.Do(func() { + argsv := []any{"--version"} + + var out bytes.Buffer + argsv = append(argsv, hexec.WithStdout(&out)) + + cmd, err := cfg.Exec.New(pandocBinary, argsv...) + if err != nil { + cfg.Logger.Errorf("Could not call pandoc: %v", err) + foundPandocVersion = pandocVersion{0, 0} + return + } + + err = cmd.Run() + if err != nil { + cfg.Logger.Errorf("%s --version: %v", pandocBinary, err) + foundPandocVersion = pandocVersion{0, 0} + return + } + + outbytes := bytes.Replace(out.Bytes(), []byte("\r"), []byte(""), -1) + output := strings.Split(string(outbytes), "\n")[0] + // Split, e.g., "pandoc 2.5" into 2 and 5 and convert them to integers + versionStrings := strings.Split(strings.Split(output, " ")[1], ".") + majorVersion, err := strconv.ParseInt(versionStrings[0], 10, 64) + if err != nil { + println(err) + } + minorVersion, err := strconv.ParseInt(versionStrings[1], 10, 64) + if err != nil { + println(err) + } + foundPandocVersion = pandocVersion{majorVersion, minorVersion} + }) + + return foundPandocVersion, err +} + +// SupportsCitations returns true for pandoc versions >= 2.11, which include citeproc +func supportsCitations(cfg converter.ProviderConfig) bool { + if Supports() { + foundPandocVersion, err := getPandocVersion(cfg) + supportsCitations := foundPandocVersion.greaterThanOrEqual(pandocVersion{2, 11}) && err == nil + return supportsCitations + } + return false +} + // Supports returns whether Pandoc is installed on this computer. func Supports() bool { hasBin := getPandocBinaryName() != "" diff --git a/markup/pandoc/convert_test.go b/markup/pandoc/convert_test.go index dff6b1ed37a..4565338479f 100644 --- a/markup/pandoc/convert_test.go +++ b/markup/pandoc/convert_test.go @@ -25,7 +25,7 @@ import ( qt "github.com/frankban/quicktest" ) -func TestConvert(t *testing.T) { +func setupTestConverter(t *testing.T) (*qt.C, converter.Converter, converter.ProviderConfig) { if !Supports() { t.Skip("pandoc not installed") } @@ -34,11 +34,140 @@ func TestConvert(t *testing.T) { var err error sc.Exec.Allow, err = security.NewWhitelist("pandoc") c.Assert(err, qt.IsNil) - p, err := Provider.New(converter.ProviderConfig{Exec: hexec.New(sc, "", loggers.NewDefault()), Logger: loggers.NewDefault()}) + cfg := converter.ProviderConfig{Exec: hexec.New(sc, "", loggers.NewDefault()), Logger: loggers.NewDefault()} + p, err := Provider.New(cfg) c.Assert(err, qt.IsNil) conv, err := p.New(converter.DocumentContext{}) c.Assert(err, qt.IsNil) - b, err := conv.Convert(converter.RenderContext{Src: []byte("testContent")}) + return c, conv, cfg +} + +func TestConvert(t *testing.T) { + c, conv, _ := setupTestConverter(t) + output, err := conv.Convert(converter.RenderContext{Src: []byte("testContent")}) c.Assert(err, qt.IsNil) - c.Assert(string(b.Bytes()), qt.Equals, "
testContent
\n") + c.Assert(string(output.Bytes()), qt.Equals, "testContent
\n") +} + +func runCiteprocTest(t *testing.T, content string, expectContained []string, expectNotContained []string) { + c, conv, cfg := setupTestConverter(t) + if !supportsCitations(cfg) { + t.Skip("pandoc does not support citations") + } + output, err := conv.Convert(converter.RenderContext{Src: []byte(content)}) + c.Assert(err, qt.IsNil) + for _, expected := range expectContained { + c.Assert(string(output.Bytes()), qt.Contains, expected) + } + for _, notExpected := range expectNotContained { + c.Assert(string(output.Bytes()), qt.Not(qt.Contains), notExpected) + } +} + +func TestGetPandocVersionCallTwice(t *testing.T) { + c, _, cfg := setupTestConverter(t) + + version1, err1 := getPandocVersion(cfg) + version2, err2 := getPandocVersion(cfg) + c.Assert(version1, qt.Equals, version2) + c.Assert(err1, qt.IsNil) + c.Assert(err2, qt.IsNil) +} + +func TestPandocVersionEquality(t *testing.T) { + c := qt.New(t) + v1 := pandocVersion{1, 0} + v2 := pandocVersion{2, 0} + v2_2 := pandocVersion{2, 2} + v1_2 := pandocVersion{1, 2} + v2_11 := pandocVersion{2, 11} + v3_9 := pandocVersion{3, 9} + v1_15 := pandocVersion{1, 15} + + c.Assert(v1.greaterThanOrEqual(v1), qt.IsTrue) + + c.Assert(v1.greaterThanOrEqual(v2), qt.IsFalse) + c.Assert(v2.greaterThanOrEqual(v1), qt.IsTrue) + + c.Assert(v2.greaterThanOrEqual(v2_2), qt.IsFalse) + c.Assert(v2_2.greaterThanOrEqual(v2), qt.IsTrue) + + c.Assert(v2_2.greaterThanOrEqual(v1_2), qt.IsTrue) + c.Assert(v1_2.greaterThanOrEqual(v2_2), qt.IsFalse) + + c.Assert(v2_11.greaterThanOrEqual(v2_2), qt.IsTrue) + c.Assert(v2_2.greaterThanOrEqual(v2_11), qt.IsFalse) + + c.Assert(v3_9.greaterThanOrEqual(v2_11), qt.IsTrue) + c.Assert(v2_11.greaterThanOrEqual(v3_9), qt.IsFalse) + + c.Assert(v2_11.greaterThanOrEqual(v1_15), qt.IsTrue) + c.Assert(v1_15.greaterThanOrEqual(v2_11), qt.IsFalse) +} + +func TestCiteprocWithHugoMeta(t *testing.T) { + content := ` +--- +title: Test +published: 2022-05-30 +--- +testContent +` + expected := []string{"testContent"} + unexpected := []string{"Doe", "Mustermann", "2022", "Treatise"} + runCiteprocTest(t, content, expected, unexpected) +} + +func TestCiteprocWithPandocMeta(t *testing.T) { + content := ` +--- +--- +--- +... +testContent +` + expected := []string{"testContent"} + unexpected := []string{"Doe", "Mustermann", "2022", "Treatise"} + runCiteprocTest(t, content, expected, unexpected) +} + +func TestCiteprocWithBibliography(t *testing.T) { + content := ` +--- +--- +--- +bibliography: testdata/bibliography.bib +... +testContent +` + expected := []string{"testContent"} + unexpected := []string{"Doe", "Mustermann", "2022", "Treatise"} + runCiteprocTest(t, content, expected, unexpected) +} + +func TestCiteprocWithExplicitCitation(t *testing.T) { + content := ` +--- +--- +--- +bibliography: testdata/bibliography.bib +... +@Doe2022 +` + expected := []string{"Doe", "Mustermann", "2022", "Treatise"} + runCiteprocTest(t, content, expected, []string{}) +} + +func TestCiteprocWithNocite(t *testing.T) { + content := ` +--- +--- +--- +bibliography: testdata/bibliography.bib +nocite: | + @* +... +` + expected := []string{"Doe", "Mustermann", "2022", "Treatise"} + runCiteprocTest(t, content, expected, []string{}) } diff --git a/markup/pandoc/testdata/bibliography.bib b/markup/pandoc/testdata/bibliography.bib new file mode 100644 index 00000000000..8fc1019b435 --- /dev/null +++ b/markup/pandoc/testdata/bibliography.bib @@ -0,0 +1,6 @@ +@article{Doe2022, + author = "Jane Doe and Max Mustermann", + title = "A Treatise on Hugo Tests", + journal = "Hugo Websites", + year = "2022", +}