Skip to content

Module manubot.cite.isbn

View Source
import json

import logging

import re

from .handlers import Handler

default_timeout = 3

def set_isbnlib_timeout(seconds=default_timeout):

    import isbnlib

    isbnlib.config.setthreadstimeout(seconds=seconds)

    isbnlib.config.seturlopentimeout(seconds=seconds)

    return isbnlib

class Handler_ISBN(Handler):

    standard_prefix = "isbn"

    prefixes = [

        "isbn",

    ]

    def inspect(self, citekey):

        isbnlib = set_isbnlib_timeout()

        fail = isbnlib.notisbn(citekey.accession, level="strict")

        if fail:

            return f"identifier violates the ISBN syntax according to isbnlib v{isbnlib.__version__}"

    def standardize_prefix_accession(self, accession):

        from isbnlib import to_isbn13

        accession = to_isbn13(accession)

        return self.standard_prefix, accession

    def get_csl_item(self, citekey):

        return get_isbn_csl_item(citekey.standard_accession)

def get_isbn_csl_item(isbn: str):

    """

    Generate CSL JSON Data for an ISBN. Converts all ISBNs to 13-digit format.

    This function uses a list of CSL JSON Item metadata retrievers, specified

    by the module-level variable `isbn_retrievers`. The methods are attempted

    in order, with this function returning the metadata from the first

    non-failing method.

    """

    isbnlib = set_isbnlib_timeout()

    isbn = isbnlib.to_isbn13(isbn)

    for retriever in isbn_retrievers:

        try:

            return retriever(isbn)

        except Exception as error:

            logging.warning(

                f"Error in {retriever.__name__} for {isbn} "

                f"due to a {error.__class__.__name__}:\n{error}"

            )

            logging.info(error, exc_info=True)

    raise Exception(f"all get_isbn_csl_item methods failed for {isbn}")

def get_isbn_csl_item_zotero(isbn: str):

    """

    Generate CSL JSON Data for an ISBN using Zotero's translation-server.

    """

    from manubot.cite.zotero import get_csl_item

    return get_csl_item(f"isbn:{isbn}")

def get_isbn_csl_item_citoid(isbn: str):

    """

    Return CSL JSON Data for an ISBN using the Wikipedia Citoid API.

    https://en.wikipedia.org/api/rest_v1/#!/Citation/getCitation

    """

    import requests

    from manubot.util import get_manubot_user_agent

    headers = {"User-Agent": get_manubot_user_agent()}

    url = f"https://en.wikipedia.org/api/rest_v1/data/citation/mediawiki/{isbn}"

    response = requests.get(url, headers=headers)

    result = response.json()

    if isinstance(result, dict):

        if result["title"] == "Not found.":

            raise KeyError(f"Metadata for ISBN {isbn} not found at {url}")

        else:

            raise Exception(

                f"Unable to extract CSL from JSON metadata for ISBN {isbn}:\n"

                f"{json.dumps(result.text)}"

            )

    (mediawiki,) = result

    csl_item = {}

    csl_item["type"] = mediawiki.get("itemType", "book")

    if "title" in mediawiki:

        csl_item["title"] = mediawiki["title"]

    if "author" in mediawiki:

        csl_author = []

        for first, last in mediawiki["author"]:

            csl_author.append({"given": first, "family": last})

        if csl_author:

            csl_item["author"] = csl_author

    if "date" in mediawiki:

        year_pattern = re.compile(r"[0-9]{4}")

        match = year_pattern.search(mediawiki["date"])

        if match:

            year = int(match.group())

            csl_item["issued"] = {"date-parts": [[year]]}

        else:

            logging.debug(

                f"get_isbn_csl_item_citoid: issue extracting date for ISBN {isbn}\n"

                f"metadata retrieved from {url}\n"

                f'unable to extract year from date field: {mediawiki["date"]}'

            )

    if "publisher" in mediawiki:

        csl_item["publisher"] = mediawiki["publisher"]

    if "place" in mediawiki:

        csl_item["publisher-place"] = mediawiki["place"]

    if "volume" in mediawiki:

        csl_item["volume"] = mediawiki["volume"]

    if "edition" in mediawiki:

        csl_item["edition"] = mediawiki["edition"]

    if "abstractNote" in mediawiki:

        csl_item["abstract"] = mediawiki["abstractNote"]

    csl_item["ISBN"] = isbn

    if "source" in mediawiki:

        csl_item["source"] = mediawiki["source"][0]

    if "url" in mediawiki:

        csl_item["URL"] = mediawiki["url"]

    return csl_item

def get_isbn_csl_item_isbnlib(isbn: str):

    """

    Generate CSL JSON Data for an ISBN using isbnlib.

    """

    isbnlib = set_isbnlib_timeout()

    metadata = isbnlib.meta(isbn)

    csl_json = isbnlib.registry.bibformatters["csl"](metadata)

    csl_data = json.loads(csl_json)

    return csl_data

isbn_retrievers = [

    get_isbn_csl_item_zotero,

    get_isbn_csl_item_citoid,

    get_isbn_csl_item_isbnlib,

]

Variables

default_timeout
isbn_retrievers

Functions

get_isbn_csl_item

def get_isbn_csl_item(
    isbn: str
)

Generate CSL JSON Data for an ISBN. Converts all ISBNs to 13-digit format.

This function uses a list of CSL JSON Item metadata retrievers, specified by the module-level variable isbn_retrievers. The methods are attempted in order, with this function returning the metadata from the first non-failing method.

View Source
def get_isbn_csl_item(isbn: str):

    """

    Generate CSL JSON Data for an ISBN. Converts all ISBNs to 13-digit format.

    This function uses a list of CSL JSON Item metadata retrievers, specified

    by the module-level variable `isbn_retrievers`. The methods are attempted

    in order, with this function returning the metadata from the first

    non-failing method.

    """

    isbnlib = set_isbnlib_timeout()

    isbn = isbnlib.to_isbn13(isbn)

    for retriever in isbn_retrievers:

        try:

            return retriever(isbn)

        except Exception as error:

            logging.warning(

                f"Error in {retriever.__name__} for {isbn} "

                f"due to a {error.__class__.__name__}:\n{error}"

            )

            logging.info(error, exc_info=True)

    raise Exception(f"all get_isbn_csl_item methods failed for {isbn}")

get_isbn_csl_item_citoid

def get_isbn_csl_item_citoid(
    isbn: str
)

Return CSL JSON Data for an ISBN using the Wikipedia Citoid API.

https://en.wikipedia.org/api/rest_v1/#!/Citation/getCitation

View Source
def get_isbn_csl_item_citoid(isbn: str):

    """

    Return CSL JSON Data for an ISBN using the Wikipedia Citoid API.

    https://en.wikipedia.org/api/rest_v1/#!/Citation/getCitation

    """

    import requests

    from manubot.util import get_manubot_user_agent

    headers = {"User-Agent": get_manubot_user_agent()}

    url = f"https://en.wikipedia.org/api/rest_v1/data/citation/mediawiki/{isbn}"

    response = requests.get(url, headers=headers)

    result = response.json()

    if isinstance(result, dict):

        if result["title"] == "Not found.":

            raise KeyError(f"Metadata for ISBN {isbn} not found at {url}")

        else:

            raise Exception(

                f"Unable to extract CSL from JSON metadata for ISBN {isbn}:\n"

                f"{json.dumps(result.text)}"

            )

    (mediawiki,) = result

    csl_item = {}

    csl_item["type"] = mediawiki.get("itemType", "book")

    if "title" in mediawiki:

        csl_item["title"] = mediawiki["title"]

    if "author" in mediawiki:

        csl_author = []

        for first, last in mediawiki["author"]:

            csl_author.append({"given": first, "family": last})

        if csl_author:

            csl_item["author"] = csl_author

    if "date" in mediawiki:

        year_pattern = re.compile(r"[0-9]{4}")

        match = year_pattern.search(mediawiki["date"])

        if match:

            year = int(match.group())

            csl_item["issued"] = {"date-parts": [[year]]}

        else:

            logging.debug(

                f"get_isbn_csl_item_citoid: issue extracting date for ISBN {isbn}\n"

                f"metadata retrieved from {url}\n"

                f'unable to extract year from date field: {mediawiki["date"]}'

            )

    if "publisher" in mediawiki:

        csl_item["publisher"] = mediawiki["publisher"]

    if "place" in mediawiki:

        csl_item["publisher-place"] = mediawiki["place"]

    if "volume" in mediawiki:

        csl_item["volume"] = mediawiki["volume"]

    if "edition" in mediawiki:

        csl_item["edition"] = mediawiki["edition"]

    if "abstractNote" in mediawiki:

        csl_item["abstract"] = mediawiki["abstractNote"]

    csl_item["ISBN"] = isbn

    if "source" in mediawiki:

        csl_item["source"] = mediawiki["source"][0]

    if "url" in mediawiki:

        csl_item["URL"] = mediawiki["url"]

    return csl_item

get_isbn_csl_item_isbnlib

def get_isbn_csl_item_isbnlib(
    isbn: str
)

Generate CSL JSON Data for an ISBN using isbnlib.

View Source
def get_isbn_csl_item_isbnlib(isbn: str):

    """

    Generate CSL JSON Data for an ISBN using isbnlib.

    """

    isbnlib = set_isbnlib_timeout()

    metadata = isbnlib.meta(isbn)

    csl_json = isbnlib.registry.bibformatters["csl"](metadata)

    csl_data = json.loads(csl_json)

    return csl_data

get_isbn_csl_item_zotero

def get_isbn_csl_item_zotero(
    isbn: str
)

Generate CSL JSON Data for an ISBN using Zotero's translation-server.

View Source
def get_isbn_csl_item_zotero(isbn: str):

    """

    Generate CSL JSON Data for an ISBN using Zotero's translation-server.

    """

    from manubot.cite.zotero import get_csl_item

    return get_csl_item(f"isbn:{isbn}")

set_isbnlib_timeout

def set_isbnlib_timeout(
    seconds=3
)
View Source
def set_isbnlib_timeout(seconds=default_timeout):

    import isbnlib

    isbnlib.config.setthreadstimeout(seconds=seconds)

    isbnlib.config.seturlopentimeout(seconds=seconds)

    return isbnlib

Classes

Handler_ISBN

class Handler_ISBN(
    prefix_lower: str
)

A Handler is a class that provides support for a certain type of citekey.

For example, a Handler subclass could provide support for DOI citekeys. Subclasses enable custom logic for different citekey prefixes, including how to standardize the citekey and how to retrieve CSL Item metadata.

View Source
class Handler_ISBN(Handler):

    standard_prefix = "isbn"

    prefixes = [

        "isbn",

    ]

    def inspect(self, citekey):

        isbnlib = set_isbnlib_timeout()

        fail = isbnlib.notisbn(citekey.accession, level="strict")

        if fail:

            return f"identifier violates the ISBN syntax according to isbnlib v{isbnlib.__version__}"

    def standardize_prefix_accession(self, accession):

        from isbnlib import to_isbn13

        accession = to_isbn13(accession)

        return self.standard_prefix, accession

    def get_csl_item(self, citekey):

        return get_isbn_csl_item(citekey.standard_accession)

Ancestors (in MRO)

  • manubot.cite.handlers.Handler

Class variables

prefixes
standard_prefix

Methods

get_csl_item

def get_csl_item(
    self,
    citekey
)

Return a CSL_Item with bibliographic details for citekey.

View Source
    def get_csl_item(self, citekey):

        return get_isbn_csl_item(citekey.standard_accession)

inspect

def inspect(
    self,
    citekey
)

Check citekeys adhere to expected formats. If an issue is detected a

string describing the issue is returned. Otherwise returns None.

View Source
    def inspect(self, citekey):

        isbnlib = set_isbnlib_timeout()

        fail = isbnlib.notisbn(citekey.accession, level="strict")

        if fail:

            return f"identifier violates the ISBN syntax according to isbnlib v{isbnlib.__version__}"

standardize_prefix_accession

def standardize_prefix_accession(
    self,
    accession
)

Return (prefix, accession) in standardized form.

This method defaults to returning self.standard_prefix (or self.prefix_lower if standard_prefix is not defined). Subclasses can override this method with more specific standardization logic.

View Source
    def standardize_prefix_accession(self, accession):

        from isbnlib import to_isbn13

        accession = to_isbn13(accession)

        return self.standard_prefix, accession