Skip to content

Module manubot.pandoc.bibliography

View Source
import json

import logging

import os

import subprocess

from typing import Any, Dict, List, Optional

from manubot.pandoc.util import get_pandoc_info

from manubot.util import shlex_join

def load_bibliography(

    path: Optional[str] = None,

    text: Optional[str] = None,

    input_format: Optional[str] = None,

) -> List[Dict[str, Any]]:

    """

    Convert a bibliography to CSL JSON using either `pandoc-citeproc --bib2json`

    or `pandoc --to=csljson`, depending on availability of pandoc commands on the system.

    Accepts either a bibliography path or text (string). If supplying text,

    pandoc-citeproc will likely require input_format be specified.

    The CSL JSON is returned as Python objects.

    If loading fails, log an error and return an empty list.

    Parameters

    ----------

    path : str, pathlike, or None

        Path to a bibliography file. Extension is used by pandoc-citeproc to infer the

        format of the input.

    text : str or None

        Text representation of the bibliography, such as a JSON-formatted string.

        `input_format` should be specified if providing text input.

    input_format : str or None

        Manually specified input formatted that is supported by pandoc-citeproc:

        https://github.com/jgm/pandoc-citeproc/blob/master/man/pandoc-citeproc.1.md#options

        Use 'bib' for BibLaTeX. Use 'json' for CSL JSON.

    Returns

    -------

    csl_json : JSON-like object

        CSL JSON Data for the references encoded by the input bibliography.

    """

    use_text = path is None

    use_path = text is None

    if use_path:

        path = os.fspath(path)

    if not (use_text ^ use_path):

        raise ValueError("load_bibliography: specify either path or text but not both.")

    pdoc_info = get_pandoc_info()

    if pdoc_info["pandoc-citeproc"]:

        return _load_bibliography_pandoc_citeproc(path, text, input_format)

    if input_format == "bib" or (use_path and path.endswith(".bib")):

        return _load_bibliography_pandoc(path, text)

    logging.error(

        "pandoc-citeproc not found on system, but is required to convert any format besides 'bib': "

        "manubot.pandoc.bibliography.load_bibliography returning empty CSL JSON"

    )

    return []

def _load_bibliography_pandoc_citeproc(

    path: Optional[str] = None,

    text: Optional[str] = None,

    input_format: Optional[str] = None,

) -> List[Dict[str, Any]]:

    """

    Convert a bibliography to CSL JSON using `pandoc-citeproc --bib2json`.

    Accepts either a bibliography path or text (string). If supplying text,

    pandoc-citeproc will likely require input_format be specified.

    The CSL JSON is returned as Python objects.

    If loading fails, log an error and return an empty list.

    Parameters

    ----------

    path : str, pathlike, or None

        Path to a bibliography file. Extension is used by pandoc-citeproc to infer the

        format of the input.

    text : str or None

        Text representation of the bibliography, such as a JSON-formatted string.

        `input_format` should be specified if providing text input.

    input_format : str or None

        Manually specified input formatted that is supported by pandoc-citeproc:

        https://github.com/jgm/pandoc-citeproc/blob/master/man/pandoc-citeproc.1.md#options

    Returns

    -------

    csl_json : JSON-like object

        CSL JSON Data for the references encoded by the input bibliography.

    """

    command_args = ["pandoc-citeproc", "--bib2json"]

    if input_format:

        command_args.extend(["--format", input_format])

    return _pandoc_system_call(command_args, path, text)

def _load_bibliography_pandoc(

    path: Optional[str] = None,

    text: Optional[str] = None,

) -> List[Dict[str, Any]]:

    """

    Convert a biblatex (.bib) bibliography to CSL JSON data using pandoc directly.

    Pandoc support for csljson output requires pandoc >= 2.11.

    """

    pdoc_info = get_pandoc_info()

    if not pdoc_info["pandoc"]:

        logging.error(

            "pandoc not found on system: "

            "manubot.pandoc.bibliography.load_bibliography returning empty CSL JSON"

        )

        return []

    if pdoc_info["pandoc version"] < (2, 11):

        logging.error(

            "pandoc >= version 2.11 required for biblatex to csljson conversion. "

            "manubot.pandoc.bibliography.load_bibliography returning empty CSL JSON"

        )

        return []

    command_args = "pandoc --from=biblatex --to=csljson".split()

    return _pandoc_system_call(command_args, path, text)

def _pandoc_system_call(

    command_args: List[str], path: Optional[str], text: Optional[str]

) -> List[Dict[str, Any]]:

    """

    Call "pandoc citeproc" or "pandoc" using input from a path or text.

    Return dict representing CSL JSON.

    """

    assert command_args[0].startswith("pandoc")

    run_kwargs = {}

    if path:

        command_args.append(os.fspath(path))

    else:

        run_kwargs["input"] = text

    logging.info("load_bibliography subprocess args:\n>>> " + shlex_join(command_args))

    process = subprocess.run(

        command_args,

        capture_output=True,

        encoding="utf-8",

        **run_kwargs,

    )

    logging.info(f"captured stderr:\n{process.stderr}")

    if process.returncode:

        logging.error(

            f"Pandoc call returned nonzero exit code.\n"

            f"{shlex_join(process.args)}\n{process.stderr}"

        )

        return []

    try:

        csl_json = json.loads(process.stdout)

    except (TypeError, json.decoder.JSONDecodeError):

        logging.error(f"Error parsing bib2json output as JSON:\n{process.stdout}")

        csl_json = []

    return csl_json

Functions

load_bibliography

def load_bibliography(
    path: Optional[str] = None,
    text: Optional[str] = None,
    input_format: Optional[str] = None
) -> List[Dict[str, Any]]

Convert a bibliography to CSL JSON using either pandoc-citeproc --bib2json

or pandoc --to=csljson, depending on availability of pandoc commands on the system. Accepts either a bibliography path or text (string). If supplying text, pandoc-citeproc will likely require input_format be specified. The CSL JSON is returned as Python objects. If loading fails, log an error and return an empty list.

Parameters:

Name Type Description Default
path str, pathlike, or None Path to a bibliography file. Extension is used by pandoc-citeproc to infer the
format of the input.
None
text str or None Text representation of the bibliography, such as a JSON-formatted string.
input_format should be specified if providing text input.
None
input_format str or None Manually specified input formatted that is supported by pandoc-citeproc:
https://github.com/jgm/pandoc-citeproc/blob/master/man/pandoc-citeproc.1.md#options
Use 'bib' for BibLaTeX. Use 'json' for CSL JSON.
None

Returns:

Type Description
JSON-like object CSL JSON Data for the references encoded by the input bibliography.
View Source
def load_bibliography(

    path: Optional[str] = None,

    text: Optional[str] = None,

    input_format: Optional[str] = None,

) -> List[Dict[str, Any]]:

    """

    Convert a bibliography to CSL JSON using either `pandoc-citeproc --bib2json`

    or `pandoc --to=csljson`, depending on availability of pandoc commands on the system.

    Accepts either a bibliography path or text (string). If supplying text,

    pandoc-citeproc will likely require input_format be specified.

    The CSL JSON is returned as Python objects.

    If loading fails, log an error and return an empty list.

    Parameters

    ----------

    path : str, pathlike, or None

        Path to a bibliography file. Extension is used by pandoc-citeproc to infer the

        format of the input.

    text : str or None

        Text representation of the bibliography, such as a JSON-formatted string.

        `input_format` should be specified if providing text input.

    input_format : str or None

        Manually specified input formatted that is supported by pandoc-citeproc:

        https://github.com/jgm/pandoc-citeproc/blob/master/man/pandoc-citeproc.1.md#options

        Use 'bib' for BibLaTeX. Use 'json' for CSL JSON.

    Returns

    -------

    csl_json : JSON-like object

        CSL JSON Data for the references encoded by the input bibliography.

    """

    use_text = path is None

    use_path = text is None

    if use_path:

        path = os.fspath(path)

    if not (use_text ^ use_path):

        raise ValueError("load_bibliography: specify either path or text but not both.")

    pdoc_info = get_pandoc_info()

    if pdoc_info["pandoc-citeproc"]:

        return _load_bibliography_pandoc_citeproc(path, text, input_format)

    if input_format == "bib" or (use_path and path.endswith(".bib")):

        return _load_bibliography_pandoc(path, text)

    logging.error(

        "pandoc-citeproc not found on system, but is required to convert any format besides 'bib': "

        "manubot.pandoc.bibliography.load_bibliography returning empty CSL JSON"

    )

    return []