Module manubot.cite.pubmed

View Source

import functools

import json

import logging

import os

from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union

from xml.etree import ElementTree

import requests

from manubot.util import get_manubot_user_agent, request_with_retry

from .citekey import CiteKey

from .handlers import Handler

class Handler_PubMed(Handler):

    standard_prefix = "pubmed"

    prefixes = [

        "pubmed",

        "pmid",

    ]

    accession_pattern = r"[1-9][0-9]{0,7}"

    def inspect(self, citekey: CiteKey) -> Optional[str]:

        identifier = citekey.accession

        # https://www.nlm.nih.gov/bsd/mms/medlineelements.html#pmid

        if identifier.startswith("PMC"):

            return (

                "PubMed Identifiers should start with digits rather than PMC. "

                f"Should {citekey.dealiased_id!r} switch the citation source to 'pmc'?"

            )

        elif not self._get_pattern().fullmatch(identifier):

            return "PubMed Identifiers should be 1-8 digits with no leading zeros."

    def get_csl_item(self, citekey: CiteKey) -> Dict[str, Any]:

        return get_pubmed_csl_item(citekey.standard_accession)

class Handler_PMC(Handler):

    standard_prefix = "pmc"

    prefixes = [

        "pmc",

        "pmcid",

    ]

    accession_pattern = r"PMC[0-9]+"

    def inspect(self, citekey: CiteKey) -> Optional[str]:

        identifier = citekey.accession

        # https://www.nlm.nih.gov/bsd/mms/medlineelements.html#pmc

        if not identifier.startswith("PMC"):

            return "PubMed Central Identifiers must start with 'PMC'."

        elif not self._get_pattern().fullmatch(identifier):

            return (

                "Identifier does not conform to the PMCID regex. "

                "Double check the PMCID."

            )

    def get_csl_item(self, citekey: CiteKey):

        return get_pmc_csl_item(citekey.standard_accession)

def get_pmc_csl_item(pmcid: str) -> Dict[str, Any]:

    """

    Get the CSL Item for a PubMed Central record by its PMID, PMCID, or

    DOI, using the NCBI Citation Exporter API.

    https://api.ncbi.nlm.nih.gov/lit/ctxp

    https://github.com/manubot/manubot/issues/21

    https://twitter.com/dhimmel/status/1061787168820092929

    """

    assert pmcid.startswith("PMC")

    csl_item = _get_literature_citation_exporter_csl_item("pmc", pmcid[3:])

    if "URL" not in csl_item:

        csl_item["URL"] = (

            f"https://www.ncbi.nlm.nih.gov/pmc/articles/{csl_item.get('PMCID', pmcid)}/"

        )

    return csl_item

def _get_literature_citation_exporter_csl_item(

    database: str, identifier: str

) -> Dict[str, Any]:

    """

    https://api.ncbi.nlm.nih.gov/lit/ctxp

    """

    if database not in {"pubmed", "pmc"}:

        logging.error(

            f"Error calling _get_literature_citation_exporter_csl_item.\n"

            f'database must be either "pubmed" or "pmc", not {database}'

        )

        raise AssertionError()

    if not identifier:

        logging.error(

            "Error calling _get_literature_citation_exporter_csl_item.\n"

            "identifier cannot be blank"

        )

        raise AssertionError()

    params = {"format": "csl", "id": identifier}

    headers = {"User-Agent": get_manubot_user_agent()}

    url = f"https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/{database}/"

    response = requests.get(url, params, headers=headers)

    try:

        csl_item = response.json()

    except Exception as error:

        logging.error(

            f"Error fetching {database} metadata for {identifier}.\n"

            f"Invalid JSON response from {response.url}:\n{response.text}"

        )

        raise error

    assert isinstance(csl_item, dict)

    if csl_item.get("status", "okay") == "error":

        logging.error(

            f"Error fetching {database} metadata for {identifier}.\n"

            f"Literature Citation Exporter returned JSON indicating an error for {response.url}\n"

            f"{json.dumps(csl_item, indent=2)}"

        )

        raise AssertionError()

    return csl_item

def get_pubmed_csl_item(pmid: Union[str, int]) -> Dict[str, Any]:

    """

    Query NCBI E-Utilities to create CSL Items for PubMed IDs.

    https://github.com/manubot/manubot/issues/21

    https://github.com/ncbi/citation-exporter/issues/3#issuecomment-355313143

    """

    pmid = str(pmid)

    params = {"db": "pubmed", "id": pmid, "retmode": "xml"}

    headers = {"User-Agent": get_manubot_user_agent()}

    url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"

    with _get_eutils_rate_limiter():

        response = requests.get(url, params, headers=headers)

    try:

        xml_article_set = ElementTree.fromstring(response.text)

        assert isinstance(xml_article_set, ElementTree.Element)

        assert xml_article_set.tag == "PubmedArticleSet"

        (xml_article,) = list(xml_article_set)

        assert xml_article.tag in ["PubmedArticle", "PubmedBookArticle"]

    except Exception as error:

        logging.error(

            f"Error fetching PubMed metadata for {pmid}.\n"

            f"Unsupported XML response from {response.url}:\n{response.text}"

        )

        raise error

    try:

        csl_item = csl_item_from_pubmed_article(xml_article)

    except Exception as error:

        msg = f"Error parsing the following PubMed metadata for PMID {pmid}:\n{response.text}"

        logging.error(msg)

        raise error

    return csl_item

def csl_item_from_pubmed_article(article: ElementTree.Element) -> Dict[str, Any]:

    """

    Extract a CSL Item dictionary from a PubmedArticle XML element.

    https://github.com/citation-style-language/schema/blob/master/csl-data.json

    """

    if not article.tag == "PubmedArticle":

        raise ValueError(

            f"Expected article to be an XML element with tag PubmedArticle, received tag {article.tag!r}"

        )

    csl_item = {}

    if not article.find("MedlineCitation/Article"):

        raise NotImplementedError("Unsupported PubMed record: no <Article> element")

    title = article.findtext("MedlineCitation/Article/ArticleTitle")

    if title:

        csl_item["title"] = title

    volume = article.findtext("MedlineCitation/Article/Journal/JournalIssue/Volume")

    if volume:

        csl_item["volume"] = volume

    issue = article.findtext("MedlineCitation/Article/Journal/JournalIssue/Issue")

    if issue:

        csl_item["issue"] = issue

    page = article.findtext("MedlineCitation/Article/Pagination/MedlinePgn")

    if page:

        csl_item["page"] = page

    journal = article.findtext("MedlineCitation/Article/Journal/Title")

    if journal:

        csl_item["container-title"] = journal

    journal_short = article.findtext("MedlineCitation/Article/Journal/ISOAbbreviation")

    if journal_short:

        csl_item["container-title-short"] = journal_short

    issn = article.findtext("MedlineCitation/Article/Journal/ISSN")

    if issn:

        csl_item["ISSN"] = issn

    date_parts = extract_publication_date_parts(article)

    if date_parts:

        csl_item["issued"] = {"date-parts": [date_parts]}

    authors_csl = []

    authors = article.findall("MedlineCitation/Article/AuthorList/Author")

    for author in authors:

        author_csl = {}

        given = author.findtext("ForeName")

        if given:

            author_csl["given"] = given

        family = author.findtext("LastName")

        if family:

            author_csl["family"] = family

        authors_csl.append(author_csl)

    if authors_csl:

        csl_item["author"] = authors_csl

    for id_type, key in ("pubmed", "PMID"), ("pmc", "PMCID"), ("doi", "DOI"):

        xpath = f"PubmedData/ArticleIdList/ArticleId[@IdType='{id_type}']"

        value = article.findtext(xpath)

        if value:

            csl_item[key] = value.lower() if key == "DOI" else value

    abstract = article.findtext("MedlineCitation/Article/Abstract/AbstractText")

    if abstract:

        csl_item["abstract"] = abstract

    csl_item["URL"] = f"https://www.ncbi.nlm.nih.gov/pubmed/{csl_item['PMID']}"

    csl_item["type"] = "article-journal"

    return csl_item

month_abbrev_to_int: Dict[str, int] = {

    "Jan": 1,

    "Feb": 2,

    "Mar": 3,

    "Apr": 4,

    "May": 5,

    "Jun": 6,

    "Jul": 7,

    "Aug": 8,

    "Sep": 9,

    "Oct": 10,

    "Nov": 11,

    "Dec": 12,

}

def extract_publication_date_parts(article: ElementTree.Element) -> List[int]:

    """

    Extract date published from a PubmedArticle XML element.

    """

    date_parts = []

    # Electronic articles

    date = article.find("MedlineCitation/Article/ArticleDate")

    if date:

        for part in "Year", "Month", "Day":

            part = date.findtext(part)

            if not part:

                break

            date_parts.append(int(part))

        return date_parts

    # Print articles

    date = article.find("MedlineCitation/Article/Journal/JournalIssue/PubDate")

    year = date.findtext("Year")

    if year:

        date_parts.append(int(year))

    month = date.findtext("Month")

    if month:

        try:

            date_parts.append(month_abbrev_to_int[month])

        except KeyError:

            date_parts.append(int(month))

    day = date.findtext("Day")

    if day:

        date_parts.append(int(day))

    return date_parts

def get_pmcid_and_pmid_for_doi(doi: str) -> Dict[str, str]:

    """

    Query PMC's ID Converter API to retrieve the PMCID and PMID for a DOI.

    Does not work for DOIs that are in Pubmed but not PubMed Central.

    https://www.ncbi.nlm.nih.gov/pmc/tools/id-converter-api/

    """

    assert isinstance(doi, str)

    assert doi.startswith("10.")

    params = {"ids": doi, "tool": "manubot"}

    url = "https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/"

    response = request_with_retry(url, params=params)

    if not response.ok:

        logging.warning(f"Status code {response.status_code} querying {response.url}\n")

        return {}

    try:

        element_tree = ElementTree.fromstring(response.text)

        assert element_tree.tag == "pmcids"

    except Exception:

        logging.warning(

            f"Error fetching PMC ID conversion for {doi}.\n"

            f"Response from {response.url}:\n{response.text}"

        )

        return {}

    records = element_tree.findall("record")

    if len(records) != 1:

        logging.warning(

            f"Expected PubMed Central ID converter to return a single XML record for {doi}.\n"

            f"Response from {response.url}:\n{response.text}"

        )

        return {}

    (record,) = records

    if record.findtext("status", default="okay") == "error":

        return {}

    id_dict = {}

    for id_type in "pmcid", "pmid":

        id_ = record.get(id_type)

        if id_:

            id_dict[id_type.upper()] = id_

    return id_dict

def get_pmid_for_doi(doi: str) -> Optional[str]:

    """

    Query NCBI's E-utilities to retrieve the PMID for a DOI.

    """

    assert isinstance(doi, str)

    assert doi.startswith("10.")

    params = {"db": "pubmed", "term": f"{doi}[DOI]"}

    headers = {"User-Agent": get_manubot_user_agent()}

    url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"

    with _get_eutils_rate_limiter():

        response = requests.get(url, params, headers=headers)

    if not response.ok:

        logging.warning(f"Status code {response.status_code} querying {response.url}\n")

        return None

    try:

        element_tree = ElementTree.fromstring(response.text)

        assert isinstance(element_tree, ElementTree.Element)

        assert element_tree.tag == "eSearchResult"

    except Exception:

        logging.warning(

            f"Error in ESearch XML for DOI: {doi}.\n"

            f"Response from {response.url}:\n{response.text}"

        )

        return None

    id_elems = element_tree.findall("IdList/Id")

    if len(id_elems) != 1:

        logging.debug(

            f"No PMIDs found for {doi}.\n"

            f"Response from {response.url}:\n{response.text}"

        )

        return None

    (id_elem,) = id_elems

    return id_elem.text

def get_pubmed_ids_for_doi(doi: str) -> Dict[str, str]:

    """

    Return a dictionary with PMCID and PMID, if they exist, for the specified

    DOI. See https://github.com/manubot/manubot/issues/45.

    """

    pubmed_ids = get_pmcid_and_pmid_for_doi(doi)

    if not pubmed_ids:

        pmid = get_pmid_for_doi(doi)

        if pmid:

            pubmed_ids["PMID"] = pmid

    return pubmed_ids

if TYPE_CHECKING:

    # support PyrateLimiter return type while avoiding unused runtime import

    # https://stackoverflow.com/a/39757388/4651668

    from pyrate_limiter import Limiter

@functools.lru_cache

def _get_eutils_rate_limiter() -> "Limiter":

    """

    Rate limiter to cap NCBI E-utilities queries to <= 3 per second as per

    https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/

    https://pyratelimiter.readthedocs.io/en/latest/

    https://github.com/vutran1710/PyrateLimiter

    """

    # does not work pyrate_limiter v3

    # https://github.com/manubot/manubot/issues/367

    from pyrate_limiter import Duration, Limiter, RequestRate

    if "CI" in os.environ:

        # multiple CI jobs might be running concurrently

        rate = RequestRate(limit=1, interval=Duration.SECOND * 2)

    else:

        rate = RequestRate(limit=2, interval=Duration.SECOND)

    return Limiter(rate).ratelimit("ncbi_eutils", delay=True)

Variables

TYPE_CHECKING

month_abbrev_to_int

Functions

csl_item_from_pubmed_article

def csl_item_from_pubmed_article(
    article: xml.etree.ElementTree.Element
) -> Dict[str, Any]

Extract a CSL Item dictionary from a PubmedArticle XML element.

https://github.com/citation-style-language/schema/blob/master/csl-data.json

View Source

def csl_item_from_pubmed_article(article: ElementTree.Element) -> Dict[str, Any]:

    """

    Extract a CSL Item dictionary from a PubmedArticle XML element.

    https://github.com/citation-style-language/schema/blob/master/csl-data.json

    """

    if not article.tag == "PubmedArticle":

        raise ValueError(

            f"Expected article to be an XML element with tag PubmedArticle, received tag {article.tag!r}"

        )

    csl_item = {}

    if not article.find("MedlineCitation/Article"):

        raise NotImplementedError("Unsupported PubMed record: no <Article> element")

    title = article.findtext("MedlineCitation/Article/ArticleTitle")

    if title:

        csl_item["title"] = title

    volume = article.findtext("MedlineCitation/Article/Journal/JournalIssue/Volume")

    if volume:

        csl_item["volume"] = volume

    issue = article.findtext("MedlineCitation/Article/Journal/JournalIssue/Issue")

    if issue:

        csl_item["issue"] = issue

    page = article.findtext("MedlineCitation/Article/Pagination/MedlinePgn")

    if page:

        csl_item["page"] = page

    journal = article.findtext("MedlineCitation/Article/Journal/Title")

    if journal:

        csl_item["container-title"] = journal

    journal_short = article.findtext("MedlineCitation/Article/Journal/ISOAbbreviation")

    if journal_short:

        csl_item["container-title-short"] = journal_short

    issn = article.findtext("MedlineCitation/Article/Journal/ISSN")

    if issn:

        csl_item["ISSN"] = issn

    date_parts = extract_publication_date_parts(article)

    if date_parts:

        csl_item["issued"] = {"date-parts": [date_parts]}

    authors_csl = []

    authors = article.findall("MedlineCitation/Article/AuthorList/Author")

    for author in authors:

        author_csl = {}

        given = author.findtext("ForeName")

        if given:

            author_csl["given"] = given

        family = author.findtext("LastName")

        if family:

            author_csl["family"] = family

        authors_csl.append(author_csl)

    if authors_csl:

        csl_item["author"] = authors_csl

    for id_type, key in ("pubmed", "PMID"), ("pmc", "PMCID"), ("doi", "DOI"):

        xpath = f"PubmedData/ArticleIdList/ArticleId[@IdType='{id_type}']"

        value = article.findtext(xpath)

        if value:

            csl_item[key] = value.lower() if key == "DOI" else value

    abstract = article.findtext("MedlineCitation/Article/Abstract/AbstractText")

    if abstract:

        csl_item["abstract"] = abstract

    csl_item["URL"] = f"https://www.ncbi.nlm.nih.gov/pubmed/{csl_item['PMID']}"

    csl_item["type"] = "article-journal"

    return csl_item

extract_publication_date_parts

def extract_publication_date_parts(
    article: xml.etree.ElementTree.Element
) -> List[int]

Extract date published from a PubmedArticle XML element.

View Source

def extract_publication_date_parts(article: ElementTree.Element) -> List[int]:

    """

    Extract date published from a PubmedArticle XML element.

    """

    date_parts = []

    # Electronic articles

    date = article.find("MedlineCitation/Article/ArticleDate")

    if date:

        for part in "Year", "Month", "Day":

            part = date.findtext(part)

            if not part:

                break

            date_parts.append(int(part))

        return date_parts

    # Print articles

    date = article.find("MedlineCitation/Article/Journal/JournalIssue/PubDate")

    year = date.findtext("Year")

    if year:

        date_parts.append(int(year))

    month = date.findtext("Month")

    if month:

        try:

            date_parts.append(month_abbrev_to_int[month])

        except KeyError:

            date_parts.append(int(month))

    day = date.findtext("Day")

    if day:

        date_parts.append(int(day))

    return date_parts

get_pmc_csl_item

def get_pmc_csl_item(
    pmcid: str
) -> Dict[str, Any]

Get the CSL Item for a PubMed Central record by its PMID, PMCID, or

DOI, using the NCBI Citation Exporter API.

https://api.ncbi.nlm.nih.gov/lit/ctxp https://github.com/manubot/manubot/issues/21 https://twitter.com/dhimmel/status/1061787168820092929

View Source

def get_pmc_csl_item(pmcid: str) -> Dict[str, Any]:

    """

    Get the CSL Item for a PubMed Central record by its PMID, PMCID, or

    DOI, using the NCBI Citation Exporter API.

    https://api.ncbi.nlm.nih.gov/lit/ctxp

    https://github.com/manubot/manubot/issues/21

    https://twitter.com/dhimmel/status/1061787168820092929

    """

    assert pmcid.startswith("PMC")

    csl_item = _get_literature_citation_exporter_csl_item("pmc", pmcid[3:])

    if "URL" not in csl_item:

        csl_item["URL"] = (

            f"https://www.ncbi.nlm.nih.gov/pmc/articles/{csl_item.get('PMCID', pmcid)}/"

        )

    return csl_item

get_pmcid_and_pmid_for_doi

def get_pmcid_and_pmid_for_doi(
    doi: str
) -> Dict[str, str]

Query PMC's ID Converter API to retrieve the PMCID and PMID for a DOI.

Does not work for DOIs that are in Pubmed but not PubMed Central. https://www.ncbi.nlm.nih.gov/pmc/tools/id-converter-api/

View Source

def get_pmcid_and_pmid_for_doi(doi: str) -> Dict[str, str]:

    """

    Query PMC's ID Converter API to retrieve the PMCID and PMID for a DOI.

    Does not work for DOIs that are in Pubmed but not PubMed Central.

    https://www.ncbi.nlm.nih.gov/pmc/tools/id-converter-api/

    """

    assert isinstance(doi, str)

    assert doi.startswith("10.")

    params = {"ids": doi, "tool": "manubot"}

    url = "https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/"

    response = request_with_retry(url, params=params)

    if not response.ok:

        logging.warning(f"Status code {response.status_code} querying {response.url}\n")

        return {}

    try:

        element_tree = ElementTree.fromstring(response.text)

        assert element_tree.tag == "pmcids"

    except Exception:

        logging.warning(

            f"Error fetching PMC ID conversion for {doi}.\n"

            f"Response from {response.url}:\n{response.text}"

        )

        return {}

    records = element_tree.findall("record")

    if len(records) != 1:

        logging.warning(

            f"Expected PubMed Central ID converter to return a single XML record for {doi}.\n"

            f"Response from {response.url}:\n{response.text}"

        )

        return {}

    (record,) = records

    if record.findtext("status", default="okay") == "error":

        return {}

    id_dict = {}

    for id_type in "pmcid", "pmid":

        id_ = record.get(id_type)

        if id_:

            id_dict[id_type.upper()] = id_

    return id_dict

get_pmid_for_doi

def get_pmid_for_doi(
    doi: str
) -> Optional[str]

Query NCBI's E-utilities to retrieve the PMID for a DOI.

View Source

def get_pmid_for_doi(doi: str) -> Optional[str]:

    """

    Query NCBI's E-utilities to retrieve the PMID for a DOI.

    """

    assert isinstance(doi, str)

    assert doi.startswith("10.")

    params = {"db": "pubmed", "term": f"{doi}[DOI]"}

    headers = {"User-Agent": get_manubot_user_agent()}

    url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"

    with _get_eutils_rate_limiter():

        response = requests.get(url, params, headers=headers)

    if not response.ok:

        logging.warning(f"Status code {response.status_code} querying {response.url}\n")

        return None

    try:

        element_tree = ElementTree.fromstring(response.text)

        assert isinstance(element_tree, ElementTree.Element)

        assert element_tree.tag == "eSearchResult"

    except Exception:

        logging.warning(

            f"Error in ESearch XML for DOI: {doi}.\n"

            f"Response from {response.url}:\n{response.text}"

        )

        return None

    id_elems = element_tree.findall("IdList/Id")

    if len(id_elems) != 1:

        logging.debug(

            f"No PMIDs found for {doi}.\n"

            f"Response from {response.url}:\n{response.text}"

        )

        return None

    (id_elem,) = id_elems

    return id_elem.text

get_pubmed_csl_item

def get_pubmed_csl_item(
    pmid: Union[str, int]
) -> Dict[str, Any]

Query NCBI E-Utilities to create CSL Items for PubMed IDs.

https://github.com/manubot/manubot/issues/21 https://github.com/ncbi/citation-exporter/issues/3#issuecomment-355313143

View Source

def get_pubmed_csl_item(pmid: Union[str, int]) -> Dict[str, Any]:

    """

    Query NCBI E-Utilities to create CSL Items for PubMed IDs.

    https://github.com/manubot/manubot/issues/21

    https://github.com/ncbi/citation-exporter/issues/3#issuecomment-355313143

    """

    pmid = str(pmid)

    params = {"db": "pubmed", "id": pmid, "retmode": "xml"}

    headers = {"User-Agent": get_manubot_user_agent()}

    url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"

    with _get_eutils_rate_limiter():

        response = requests.get(url, params, headers=headers)

    try:

        xml_article_set = ElementTree.fromstring(response.text)

        assert isinstance(xml_article_set, ElementTree.Element)

        assert xml_article_set.tag == "PubmedArticleSet"

        (xml_article,) = list(xml_article_set)

        assert xml_article.tag in ["PubmedArticle", "PubmedBookArticle"]

    except Exception as error:

        logging.error(

            f"Error fetching PubMed metadata for {pmid}.\n"

            f"Unsupported XML response from {response.url}:\n{response.text}"

        )

        raise error

    try:

        csl_item = csl_item_from_pubmed_article(xml_article)

    except Exception as error:

        msg = f"Error parsing the following PubMed metadata for PMID {pmid}:\n{response.text}"

        logging.error(msg)

        raise error

    return csl_item

get_pubmed_ids_for_doi

def get_pubmed_ids_for_doi(
    doi: str
) -> Dict[str, str]

Return a dictionary with PMCID and PMID, if they exist, for the specified

DOI. See https://github.com/manubot/manubot/issues/45.

View Source

def get_pubmed_ids_for_doi(doi: str) -> Dict[str, str]:

    """

    Return a dictionary with PMCID and PMID, if they exist, for the specified

    DOI. See https://github.com/manubot/manubot/issues/45.

    """

    pubmed_ids = get_pmcid_and_pmid_for_doi(doi)

    if not pubmed_ids:

        pmid = get_pmid_for_doi(doi)

        if pmid:

            pubmed_ids["PMID"] = pmid

    return pubmed_ids

Classes

Handler_PMC

class Handler_PMC(
    prefix_lower: str
)

A Handler is a class that provides support for a certain type of citekey.

For example, a Handler subclass could provide support for DOI citekeys. Subclasses enable custom logic for different citekey prefixes, including how to standardize the citekey and how to retrieve CSL Item metadata.

View Source

class Handler_PMC(Handler):

    standard_prefix = "pmc"

    prefixes = [

        "pmc",

        "pmcid",

    ]

    accession_pattern = r"PMC[0-9]+"

    def inspect(self, citekey: CiteKey) -> Optional[str]:

        identifier = citekey.accession

        # https://www.nlm.nih.gov/bsd/mms/medlineelements.html#pmc

        if not identifier.startswith("PMC"):

            return "PubMed Central Identifiers must start with 'PMC'."

        elif not self._get_pattern().fullmatch(identifier):

            return (

                "Identifier does not conform to the PMCID regex. "

                "Double check the PMCID."

            )

    def get_csl_item(self, citekey: CiteKey):

        return get_pmc_csl_item(citekey.standard_accession)

Ancestors (in MRO)

manubot.cite.handlers.Handler

Class variables

accession_pattern

prefixes

standard_prefix

Methods

get_csl_item

def get_csl_item(
    self,
    citekey: manubot.cite.citekey.CiteKey
)

Return a CSL_Item with bibliographic details for citekey.

View Source

    def get_csl_item(self, citekey: CiteKey):

        return get_pmc_csl_item(citekey.standard_accession)

inspect

def inspect(
    self,
    citekey: manubot.cite.citekey.CiteKey
) -> Optional[str]

Check citekeys adhere to expected formats. If an issue is detected a

string describing the issue is returned. Otherwise returns None.

View Source

    def inspect(self, citekey: CiteKey) -> Optional[str]:

        identifier = citekey.accession

        # https://www.nlm.nih.gov/bsd/mms/medlineelements.html#pmc

        if not identifier.startswith("PMC"):

            return "PubMed Central Identifiers must start with 'PMC'."

        elif not self._get_pattern().fullmatch(identifier):

            return (

                "Identifier does not conform to the PMCID regex. "

                "Double check the PMCID."

            )

standardize_prefix_accession

def standardize_prefix_accession(
    self,
    accession: str
) -> Tuple[str, str]

Return (prefix, accession) in standardized form.

This method defaults to returning self.standard_prefix (or self.prefix_lower if standard_prefix is not defined). Subclasses can override this method with more specific standardization logic.

View Source

    def standardize_prefix_accession(self, accession: str) -> Tuple[str, str]:

        """

        Return (prefix, accession) in standardized form.

        This method defaults to returning `self.standard_prefix`

        (or `self.prefix_lower` if standard_prefix is not defined).

        Subclasses can override this method with more specific standardization logic.

        """

        standard_prefix = getattr(self, "standard_prefix", self.prefix_lower)

        standard_accession = accession

        return standard_prefix, standard_accession

Handler_PubMed

class Handler_PubMed(
    prefix_lower: str
)

A Handler is a class that provides support for a certain type of citekey.

For example, a Handler subclass could provide support for DOI citekeys. Subclasses enable custom logic for different citekey prefixes, including how to standardize the citekey and how to retrieve CSL Item metadata.

View Source

class Handler_PubMed(Handler):

    standard_prefix = "pubmed"

    prefixes = [

        "pubmed",

        "pmid",

    ]

    accession_pattern = r"[1-9][0-9]{0,7}"

    def inspect(self, citekey: CiteKey) -> Optional[str]:

        identifier = citekey.accession

        # https://www.nlm.nih.gov/bsd/mms/medlineelements.html#pmid

        if identifier.startswith("PMC"):

            return (

                "PubMed Identifiers should start with digits rather than PMC. "

                f"Should {citekey.dealiased_id!r} switch the citation source to 'pmc'?"

            )

        elif not self._get_pattern().fullmatch(identifier):

            return "PubMed Identifiers should be 1-8 digits with no leading zeros."

    def get_csl_item(self, citekey: CiteKey) -> Dict[str, Any]:

        return get_pubmed_csl_item(citekey.standard_accession)

Ancestors (in MRO)

manubot.cite.handlers.Handler

Class variables

accession_pattern

prefixes

standard_prefix

Methods

get_csl_item

def get_csl_item(
    self,
    citekey: manubot.cite.citekey.CiteKey
) -> Dict[str, Any]

Return a CSL_Item with bibliographic details for citekey.

View Source

    def get_csl_item(self, citekey: CiteKey) -> Dict[str, Any]:

        return get_pubmed_csl_item(citekey.standard_accession)

inspect

def inspect(
    self,
    citekey: manubot.cite.citekey.CiteKey
) -> Optional[str]

Check citekeys adhere to expected formats. If an issue is detected a

string describing the issue is returned. Otherwise returns None.

View Source

    def inspect(self, citekey: CiteKey) -> Optional[str]:

        identifier = citekey.accession

        # https://www.nlm.nih.gov/bsd/mms/medlineelements.html#pmid

        if identifier.startswith("PMC"):

            return (

                "PubMed Identifiers should start with digits rather than PMC. "

                f"Should {citekey.dealiased_id!r} switch the citation source to 'pmc'?"

            )

        elif not self._get_pattern().fullmatch(identifier):

            return "PubMed Identifiers should be 1-8 digits with no leading zeros."

standardize_prefix_accession

def standardize_prefix_accession(
    self,
    accession: str
) -> Tuple[str, str]

Return (prefix, accession) in standardized form.

This method defaults to returning self.standard_prefix (or self.prefix_lower if standard_prefix is not defined). Subclasses can override this method with more specific standardization logic.

View Source

    def standardize_prefix_accession(self, accession: str) -> Tuple[str, str]:

        """

        Return (prefix, accession) in standardized form.

        This method defaults to returning `self.standard_prefix`

        (or `self.prefix_lower` if standard_prefix is not defined).

        Subclasses can override this method with more specific standardization logic.

        """

        standard_prefix = getattr(self, "standard_prefix", self.prefix_lower)

        standard_accession = accession

        return standard_prefix, standard_accession