Skip to content

Module manubot.process.bibliography

View Source
import json

import logging

import os

import pathlib

from manubot.cite.citekey import shorten_citekey

from manubot.util import read_serialized_data

def load_bibliography(path: str) -> list:

    """

    Load a bibliography as CSL Items (a CSL JSON Python object).

    For paths that already contain CSL Items (inferred from a .json or .yaml extension),

    parse these files directly (URLs supported).

    Otherwise, delegate conversion to CSL Items to pandoc-citeproc (URLs not supported).

    If loading fails, log an error and return an empty list.

    """

    path_obj = pathlib.Path(path)

    if path_obj.suffix in {".json", ".yaml"}:

        try:

            csl_items = read_serialized_data(path)

        except Exception as error:

            logging.error(f"load_bibliography: error reading {path!r}.\n{error}")

            logging.info("load_bibliography exception info", exc_info=True)

            csl_items = []

    else:

        from manubot.pandoc.bibliography import (

            load_bibliography as load_bibliography_pandoc,

        )

        csl_items = load_bibliography_pandoc(path)

    if not isinstance(csl_items, list):

        logging.error(

            f"process.load_bibliography: csl_items read from {path} are of type {type(csl_items)}. "

            "Setting csl_items to an empty list."

        )

        csl_items = []

    from manubot.cite.csl_item import CSL_Item

    csl_items = [CSL_Item(csl_item) for csl_item in csl_items]

    return csl_items

def load_manual_references(paths=None, extra_csl_items=None) -> dict:

    """

    Read manual references from bibliography text files specified by a list of paths.

    Returns a standard_citation to CSL Item dictionary.

    `extra_csl_items` specifies CSL Items stored as a Python object,

    to be used in addition to the CSL Items stored as text in the files specified by `paths`.

    Set `paths=[]` to only use extra_csl_items.

    When multiple references have the same standard_id,

    precedence is given to reference defined last.

    References in `extra_csl_items` take precedence over those from `paths`.

    """

    if paths is None:

        paths = []

    if extra_csl_items is None:

        extra_csl_items = []

    from manubot.cite.csl_item import CSL_Item

    csl_items = []

    paths = list(dict.fromkeys(paths))  # remove duplicates

    for path in paths:

        path = os.fspath(path)

        path_obj = pathlib.Path(path)

        bibliography = load_bibliography(path)

        for csl_item in bibliography:

            csl_item.note_append_text(

                "Loaded from an external bibliography file by Manubot."

            )

            csl_item.note_append_dict({"source_bibliography": path_obj.name})

            csl_items.append(csl_item)

    csl_items.extend(map(CSL_Item, extra_csl_items))

    manual_refs = {}

    for csl_item in csl_items:

        try:

            csl_item.standardize_id()

        except Exception:

            csl_item_str = json.dumps(csl_item, indent=2)

            logging.info(

                f"Skipping csl_item where setting standard_id failed:\n{csl_item_str}",

                exc_info=True,

            )

            continue

        standard_id = csl_item["id"]

        csl_item.set_id(shorten_citekey(standard_id))

        csl_item.clean()

        manual_refs[standard_id] = csl_item

    return manual_refs

Functions

load_bibliography

def load_bibliography(
    path: str
) -> list

Load a bibliography as CSL Items (a CSL JSON Python object).

For paths that already contain CSL Items (inferred from a .json or .yaml extension), parse these files directly (URLs supported). Otherwise, delegate conversion to CSL Items to pandoc-citeproc (URLs not supported). If loading fails, log an error and return an empty list.

View Source
def load_bibliography(path: str) -> list:

    """

    Load a bibliography as CSL Items (a CSL JSON Python object).

    For paths that already contain CSL Items (inferred from a .json or .yaml extension),

    parse these files directly (URLs supported).

    Otherwise, delegate conversion to CSL Items to pandoc-citeproc (URLs not supported).

    If loading fails, log an error and return an empty list.

    """

    path_obj = pathlib.Path(path)

    if path_obj.suffix in {".json", ".yaml"}:

        try:

            csl_items = read_serialized_data(path)

        except Exception as error:

            logging.error(f"load_bibliography: error reading {path!r}.\n{error}")

            logging.info("load_bibliography exception info", exc_info=True)

            csl_items = []

    else:

        from manubot.pandoc.bibliography import (

            load_bibliography as load_bibliography_pandoc,

        )

        csl_items = load_bibliography_pandoc(path)

    if not isinstance(csl_items, list):

        logging.error(

            f"process.load_bibliography: csl_items read from {path} are of type {type(csl_items)}. "

            "Setting csl_items to an empty list."

        )

        csl_items = []

    from manubot.cite.csl_item import CSL_Item

    csl_items = [CSL_Item(csl_item) for csl_item in csl_items]

    return csl_items

load_manual_references

def load_manual_references(
    paths=None,
    extra_csl_items=None
) -> dict

Read manual references from bibliography text files specified by a list of paths.

Returns a standard_citation to CSL Item dictionary. extra_csl_items specifies CSL Items stored as a Python object, to be used in addition to the CSL Items stored as text in the files specified by paths. Set paths=[] to only use extra_csl_items. When multiple references have the same standard_id, precedence is given to reference defined last. References in extra_csl_items take precedence over those from paths.

View Source
def load_manual_references(paths=None, extra_csl_items=None) -> dict:

    """

    Read manual references from bibliography text files specified by a list of paths.

    Returns a standard_citation to CSL Item dictionary.

    `extra_csl_items` specifies CSL Items stored as a Python object,

    to be used in addition to the CSL Items stored as text in the files specified by `paths`.

    Set `paths=[]` to only use extra_csl_items.

    When multiple references have the same standard_id,

    precedence is given to reference defined last.

    References in `extra_csl_items` take precedence over those from `paths`.

    """

    if paths is None:

        paths = []

    if extra_csl_items is None:

        extra_csl_items = []

    from manubot.cite.csl_item import CSL_Item

    csl_items = []

    paths = list(dict.fromkeys(paths))  # remove duplicates

    for path in paths:

        path = os.fspath(path)

        path_obj = pathlib.Path(path)

        bibliography = load_bibliography(path)

        for csl_item in bibliography:

            csl_item.note_append_text(

                "Loaded from an external bibliography file by Manubot."

            )

            csl_item.note_append_dict({"source_bibliography": path_obj.name})

            csl_items.append(csl_item)

    csl_items.extend(map(CSL_Item, extra_csl_items))

    manual_refs = {}

    for csl_item in csl_items:

        try:

            csl_item.standardize_id()

        except Exception:

            csl_item_str = json.dumps(csl_item, indent=2)

            logging.info(

                f"Skipping csl_item where setting standard_id failed:\n{csl_item_str}",

                exc_info=True,

            )

            continue

        standard_id = csl_item["id"]

        csl_item.set_id(shorten_citekey(standard_id))

        csl_item.clean()

        manual_refs[standard_id] = csl_item

    return manual_refs