Skip to content

Module manubot.pandoc.bibliography

View Source
import json

import logging

import os

import subprocess

from typing import Any, Dict, List, Optional

from manubot.pandoc.util import get_pandoc_info

from manubot.util import shlex_join

def load_bibliography(

    path: Optional[str] = None,

    text: Optional[str] = None,

    input_format: Optional[str] = None,

) -> List[Dict[str, Any]]:


    Convert a bibliography to CSL JSON using either `pandoc-citeproc --bib2json`

    or `pandoc --to=csljson`, depending on availability of pandoc commands on the system.

    Accepts either a bibliography path or text (string). If supplying text,

    pandoc-citeproc will likely require input_format be specified.

    The CSL JSON is returned as Python objects.

    If loading fails, log an error and return an empty list.



    path : str, pathlike, or None

        Path to a bibliography file. Extension is used by pandoc-citeproc to infer the

        format of the input.

    text : str or None

        Text representation of the bibliography, such as a JSON-formatted string.

        `input_format` should be specified if providing text input.

    input_format : str or None

        Manually specified input formatted that is supported by pandoc-citeproc:

        Use 'bib' for BibLaTeX. Use 'json' for CSL JSON.



    csl_json : JSON-like object

        CSL JSON Data for the references encoded by the input bibliography.


    use_text = path is None

    use_path = text is None

    if use_path:

        path = os.fspath(path)

    if not (use_text ^ use_path):

        raise ValueError("load_bibliography: specify either path or text but not both.")

    pdoc_info = get_pandoc_info()

    if pdoc_info["pandoc-citeproc"]:

        return _load_bibliography_pandoc_citeproc(path, text, input_format)

    if input_format == "bib" or (use_path and path.endswith(".bib")):

        return _load_bibliography_pandoc(path, text)


        "pandoc-citeproc not found on system, but is required to convert any format besides 'bib': "

        "manubot.pandoc.bibliography.load_bibliography returning empty CSL JSON"


    return []

def _load_bibliography_pandoc_citeproc(

    path: Optional[str] = None,

    text: Optional[str] = None,

    input_format: Optional[str] = None,

) -> List[Dict[str, Any]]:


    Convert a bibliography to CSL JSON using `pandoc-citeproc --bib2json`.

    Accepts either a bibliography path or text (string). If supplying text,

    pandoc-citeproc will likely require input_format be specified.

    The CSL JSON is returned as Python objects.

    If loading fails, log an error and return an empty list.



    path : str, pathlike, or None

        Path to a bibliography file. Extension is used by pandoc-citeproc to infer the

        format of the input.

    text : str or None

        Text representation of the bibliography, such as a JSON-formatted string.

        `input_format` should be specified if providing text input.

    input_format : str or None

        Manually specified input formatted that is supported by pandoc-citeproc:



    csl_json : JSON-like object

        CSL JSON Data for the references encoded by the input bibliography.


    command_args = ["pandoc-citeproc", "--bib2json"]

    if input_format:

        command_args.extend(["--format", input_format])

    return _pandoc_system_call(command_args, path, text)

def _load_bibliography_pandoc(

    path: Optional[str] = None,

    text: Optional[str] = None,

) -> List[Dict[str, Any]]:


    Convert a biblatex (.bib) bibliography to CSL JSON data using pandoc directly.

    Pandoc support for csljson output requires pandoc >= 2.11.


    pdoc_info = get_pandoc_info()

    if not pdoc_info["pandoc"]:


            "pandoc not found on system: "

            "manubot.pandoc.bibliography.load_bibliography returning empty CSL JSON"


        return []

    if pdoc_info["pandoc version"] < (2, 11):


            "pandoc >= version 2.11 required for biblatex to csljson conversion. "

            "manubot.pandoc.bibliography.load_bibliography returning empty CSL JSON"


        return []

    command_args = "pandoc --from=biblatex --to=csljson".split()

    return _pandoc_system_call(command_args, path, text)

def _pandoc_system_call(

    command_args: List[str], path: Optional[str], text: Optional[str]

) -> List[Dict[str, Any]]:


    Call "pandoc citeproc" or "pandoc" using input from a path or text.

    Return dict representing CSL JSON.


    assert command_args[0].startswith("pandoc")

    run_kwargs = {}

    if path:



        run_kwargs["input"] = text"load_bibliography subprocess args:\n>>> " + shlex_join(command_args))

    process =





    )"captured stderr:\n{process.stderr}")

    if process.returncode:


            f"Pandoc call returned nonzero exit code.\n"



        return []


        csl_json = json.loads(process.stdout)

    except (TypeError, json.decoder.JSONDecodeError):

        logging.error(f"Error parsing bib2json output as JSON:\n{process.stdout}")

        csl_json = []

    return csl_json



def load_bibliography(
    path: Optional[str] = None,
    text: Optional[str] = None,
    input_format: Optional[str] = None
) -> List[Dict[str, Any]]

Convert a bibliography to CSL JSON using either pandoc-citeproc --bib2json

or pandoc --to=csljson, depending on availability of pandoc commands on the system. Accepts either a bibliography path or text (string). If supplying text, pandoc-citeproc will likely require input_format be specified. The CSL JSON is returned as Python objects. If loading fails, log an error and return an empty list.


Name Type Description Default
path str, pathlike, or None Path to a bibliography file. Extension is used by pandoc-citeproc to infer the
format of the input.
text str or None Text representation of the bibliography, such as a JSON-formatted string.
input_format should be specified if providing text input.
input_format str or None Manually specified input formatted that is supported by pandoc-citeproc:
Use 'bib' for BibLaTeX. Use 'json' for CSL JSON.


Type Description
JSON-like object CSL JSON Data for the references encoded by the input bibliography.
View Source
def load_bibliography(

    path: Optional[str] = None,

    text: Optional[str] = None,

    input_format: Optional[str] = None,

) -> List[Dict[str, Any]]:


    Convert a bibliography to CSL JSON using either `pandoc-citeproc --bib2json`

    or `pandoc --to=csljson`, depending on availability of pandoc commands on the system.

    Accepts either a bibliography path or text (string). If supplying text,

    pandoc-citeproc will likely require input_format be specified.

    The CSL JSON is returned as Python objects.

    If loading fails, log an error and return an empty list.



    path : str, pathlike, or None

        Path to a bibliography file. Extension is used by pandoc-citeproc to infer the

        format of the input.

    text : str or None

        Text representation of the bibliography, such as a JSON-formatted string.

        `input_format` should be specified if providing text input.

    input_format : str or None

        Manually specified input formatted that is supported by pandoc-citeproc:

        Use 'bib' for BibLaTeX. Use 'json' for CSL JSON.



    csl_json : JSON-like object

        CSL JSON Data for the references encoded by the input bibliography.


    use_text = path is None

    use_path = text is None

    if use_path:

        path = os.fspath(path)

    if not (use_text ^ use_path):

        raise ValueError("load_bibliography: specify either path or text but not both.")

    pdoc_info = get_pandoc_info()

    if pdoc_info["pandoc-citeproc"]:

        return _load_bibliography_pandoc_citeproc(path, text, input_format)

    if input_format == "bib" or (use_path and path.endswith(".bib")):

        return _load_bibliography_pandoc(path, text)


        "pandoc-citeproc not found on system, but is required to convert any format besides 'bib': "

        "manubot.pandoc.bibliography.load_bibliography returning empty CSL JSON"


    return []