Skip to content

Module manubot.cite.tests.test_citekey_api

Tests API-level functions in manubot.cite. Both functions are found in citekey.py

View Source
"""Tests API-level functions in manubot.cite. Both functions are found in citekey.py"""

import pytest

from manubot.cite import citekey_to_csl_item

from manubot.cite.citekey import CiteKey

@pytest.mark.parametrize(

    "input_id,expected",

    [

        ("doi:10.5061/DRYAD.q447c/1", "doi:10.5061/dryad.q447c/1"),

        ("doi:10.5061/dryad.q447c/1", "doi:10.5061/dryad.q447c/1"),

        # infers by default

        ("10.5061/dryad.q447c/1", "doi:10.5061/dryad.q447c/1"),

        ("doi:10/b6vnmd", "doi:10.1016/s0933-3657(96)00367-3"),

        ("doi:10/B6VNMD", "doi:10.1016/s0933-3657(96)00367-3"),

        (

            "doi:10/xxxxxxxxxxxxxYY",

            "doi:10/xxxxxxxxxxxxxyy",

        ),  # passthrough non-existent shortDOI

        ("pmid:24159271", "pubmed:24159271"),

        ("isbn:1339919885", "isbn:9781339919881"),

        ("isbn:1-339-91988-5", "isbn:9781339919881"),

        ("isbn:978-0-387-95069-3", "isbn:9780387950693"),

        ("isbn:9780387950938", "isbn:9780387950938"),

        ("isbn:1-55860-510-X", "isbn:9781558605107"),

        ("isbn:1-55860-510-x", "isbn:9781558605107"),

    ],

)

def test_citekey_standard_id(input_id, expected):

    """

    Test CiteKey.standard_id property for common prefixes.

    """

    citekey = CiteKey(input_id)

    assert citekey.standard_id == expected

test_citekey_infer_prefix_params = [

    ("10.5061/dryad.q447c/1", "doi"),

    ("10/b6vnmd", "doi"),

    ("24159271", "pubmed"),

    ("1", "pubmed"),

    ("PMC3041534", "pmc"),

    ("Q50051684", "wikidata"),

    ("1407.3561v1", "arxiv"),

    ("no-prefix-to-infer", None),

]

@pytest.mark.parametrize("input_id,prefix", test_citekey_infer_prefix_params)

def test_citekey_infer_prefix(input_id, prefix):

    citekey = CiteKey(input_id, infer_prefix=True)

    assert citekey.prefix == prefix

@pytest.mark.parametrize("input_id,prefix", test_citekey_infer_prefix_params)

def test_citekey_no_infer_prefix(input_id, prefix):

    citekey = CiteKey(input_id, infer_prefix=False)

    assert citekey.prefix is None

    assert citekey.accession is None

@pytest.mark.xfail(reason="https://twitter.com/dhimmel/status/950443969313419264")

def test_citekey_to_csl_item_doi_datacite():

    citekey = "doi:10.7287/peerj.preprints.3100v1"

    csl_item = citekey_to_csl_item(citekey)

    assert csl_item["id"] == "11cb5HXoY"

    assert csl_item["URL"] == "https://doi.org/10.7287/peerj.preprints.3100v1"

    assert csl_item["DOI"] == "10.7287/peerj.preprints.3100v1"

    assert csl_item["type"] == "report"

    assert (

        csl_item["title"]

        == "Sci-Hub provides access to nearly all scholarly literature"

    )

    authors = csl_item["author"]

    assert authors[0]["family"] == "Himmelstein"

    assert authors[-1]["family"] == "Greene"

def test_citekey_to_csl_item_arxiv():

    citekey = "arxiv:cond-mat/0703470v2"

    csl_item = citekey_to_csl_item(citekey)

    assert csl_item["id"] == "ES92tcdg"

    assert csl_item["URL"] == "https://arxiv.org/abs/cond-mat/0703470v2"

    assert csl_item["number"] == "cond-mat/0703470v2"

    assert csl_item["version"] == "v2"

    assert csl_item["type"] == "report"

    assert csl_item["container-title"] == "arXiv"

    assert csl_item["title"] == "Portraits of Complex Networks"

    authors = csl_item["author"]

    assert authors[0]["literal"] == "J. P. Bagrow"

    assert csl_item["DOI"] == "10.1209/0295-5075/81/68004"

def test_citekey_to_csl_item_pmc():

    """

    https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pmc/?format=csl&id=3041534

    """

    citekey = "pmc:PMC3041534"

    csl_item = citekey_to_csl_item(citekey)

    assert csl_item["id"] == "1CGP1eifE"

    assert csl_item["URL"] == "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3041534/"

    assert csl_item["container-title-short"] == "Summit Transl Bioinform"

    assert (

        csl_item["title"]

        == "Secondary Use of EHR: Data Quality Issues and Informatics Opportunities"

    )

    authors = csl_item["author"]

    assert authors[0]["family"] == "Botsis"

    assert csl_item["PMID"] == "21347133"

    assert csl_item["PMCID"] == "PMC3041534"

    assert "generated by Manubot" in csl_item["note"]

    assert "standard_id: pmc:PMC3041534" in csl_item["note"]

def test_citekey_to_csl_item_pubmed_1():

    """

    Generated from XML returned by

    https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=21347133&rettype=full

    """

    citekey = "pubmed:21347133"

    csl_item = citekey_to_csl_item(citekey)

    assert csl_item["id"] == "o7hs4FTC"

    assert csl_item["type"] == "article-journal"

    assert csl_item["URL"] == "https://www.ncbi.nlm.nih.gov/pubmed/21347133"

    assert csl_item["container-title"] == "Summit on translational bioinformatics"

    assert (

        csl_item["title"]

        == "Secondary Use of EHR: Data Quality Issues and Informatics Opportunities."

    )

    assert csl_item["issued"]["date-parts"] == [[2010, 3, 1]]

    authors = csl_item["author"]

    assert authors[0]["given"] == "Taxiarchis"

    assert authors[0]["family"] == "Botsis"

    assert csl_item["PMID"] == "21347133"

    assert csl_item["PMCID"] == "PMC3041534"

def test_citekey_to_csl_item_pubmed_2():

    """

    Generated from XML returned by

    https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=27094199&rettype=full

    """

    citekey = "pubmed:27094199"

    csl_item = citekey_to_csl_item(citekey)

    print(csl_item)

    assert csl_item["id"] == "5v0vabZu"

    assert csl_item["type"] == "article-journal"

    assert csl_item["URL"] == "https://www.ncbi.nlm.nih.gov/pubmed/27094199"

    assert csl_item["container-title"] == "Circulation. Cardiovascular genetics"

    assert csl_item["container-title-short"] == "Circ Cardiovasc Genet"

    assert csl_item["page"] == "179-84"

    assert (

        csl_item["title"]

        == "Genetic Association-Guided Analysis of Gene Networks for the Study of Complex Traits."

    )

    assert csl_item["issued"]["date-parts"] == [[2016, 4]]

    authors = csl_item["author"]

    assert authors[0]["given"] == "Casey S"

    assert authors[0]["family"] == "Greene"

    assert csl_item["PMID"] == "27094199"

    assert csl_item["DOI"] == "10.1161/circgenetics.115.001181"

def test_citekey_to_csl_item_pubmed_with_numeric_month():

    """

    Generated from XML returned by

    https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=29028984&rettype=full

    See https://github.com/manubot/manubot/issues/69

    """

    citekey = "pmid:29028984"

    csl_item = citekey_to_csl_item(citekey)

    print(csl_item)

    assert csl_item["issued"]["date-parts"] == [[2018, 3, 15]]

def test_citekey_to_csl_item_pubmed_book(caplog):

    """

    Extracting CSL metadata from books in PubMed is not supported.

    Logic not implemented to parse XML returned by

    https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=29227604&rettype=full

    """

    csl_item = citekey_to_csl_item("pmid:29227604", log_level="ERROR")

    assert csl_item is None

    assert (

        "Expected article to be an XML element with tag PubmedArticle, received tag 'PubmedBookArticle'"

        in caplog.text

    )

def test_citekey_to_csl_item_isbn():

    csl_item = citekey_to_csl_item("isbn:9780387950693")

    assert csl_item["type"] == "book"

    assert csl_item["title"] == "Complex analysis"

def test_citekey_to_csl_item_clinical_trial():

    """

    Test clinicaltrials.gov citation support using CURIEs.

    https://github.com/manubot/manubot/issues/216

    """

    csl_item = citekey_to_csl_item("clinicaltrials:NCT04292899")

    assert csl_item["title"].startswith("A Phase 3 Randomized Study")

    assert csl_item["source"].startswith("clinicaltrials.gov")

    assert csl_item["URL"] == "https://clinicaltrials.gov/ct2/show/NCT04292899"

Variables

test_citekey_infer_prefix_params

Functions

test_citekey_infer_prefix

def test_citekey_infer_prefix(
    input_id,
    prefix
)
View Source
@pytest.mark.parametrize("input_id,prefix", test_citekey_infer_prefix_params)

def test_citekey_infer_prefix(input_id, prefix):

    citekey = CiteKey(input_id, infer_prefix=True)

    assert citekey.prefix == prefix

test_citekey_no_infer_prefix

def test_citekey_no_infer_prefix(
    input_id,
    prefix
)
View Source
@pytest.mark.parametrize("input_id,prefix", test_citekey_infer_prefix_params)

def test_citekey_no_infer_prefix(input_id, prefix):

    citekey = CiteKey(input_id, infer_prefix=False)

    assert citekey.prefix is None

    assert citekey.accession is None

test_citekey_standard_id

def test_citekey_standard_id(
    input_id,
    expected
)

Test CiteKey.standard_id property for common prefixes.

View Source
@pytest.mark.parametrize(

    "input_id,expected",

    [

        ("doi:10.5061/DRYAD.q447c/1", "doi:10.5061/dryad.q447c/1"),

        ("doi:10.5061/dryad.q447c/1", "doi:10.5061/dryad.q447c/1"),

        # infers by default

        ("10.5061/dryad.q447c/1", "doi:10.5061/dryad.q447c/1"),

        ("doi:10/b6vnmd", "doi:10.1016/s0933-3657(96)00367-3"),

        ("doi:10/B6VNMD", "doi:10.1016/s0933-3657(96)00367-3"),

        (

            "doi:10/xxxxxxxxxxxxxYY",

            "doi:10/xxxxxxxxxxxxxyy",

        ),  # passthrough non-existent shortDOI

        ("pmid:24159271", "pubmed:24159271"),

        ("isbn:1339919885", "isbn:9781339919881"),

        ("isbn:1-339-91988-5", "isbn:9781339919881"),

        ("isbn:978-0-387-95069-3", "isbn:9780387950693"),

        ("isbn:9780387950938", "isbn:9780387950938"),

        ("isbn:1-55860-510-X", "isbn:9781558605107"),

        ("isbn:1-55860-510-x", "isbn:9781558605107"),

    ],

)

def test_citekey_standard_id(input_id, expected):

    """

    Test CiteKey.standard_id property for common prefixes.

    """

    citekey = CiteKey(input_id)

    assert citekey.standard_id == expected

test_citekey_to_csl_item_arxiv

def test_citekey_to_csl_item_arxiv(

)
View Source
def test_citekey_to_csl_item_arxiv():

    citekey = "arxiv:cond-mat/0703470v2"

    csl_item = citekey_to_csl_item(citekey)

    assert csl_item["id"] == "ES92tcdg"

    assert csl_item["URL"] == "https://arxiv.org/abs/cond-mat/0703470v2"

    assert csl_item["number"] == "cond-mat/0703470v2"

    assert csl_item["version"] == "v2"

    assert csl_item["type"] == "report"

    assert csl_item["container-title"] == "arXiv"

    assert csl_item["title"] == "Portraits of Complex Networks"

    authors = csl_item["author"]

    assert authors[0]["literal"] == "J. P. Bagrow"

    assert csl_item["DOI"] == "10.1209/0295-5075/81/68004"

test_citekey_to_csl_item_clinical_trial

def test_citekey_to_csl_item_clinical_trial(

)

Test clinicaltrials.gov citation support using CURIEs.

https://github.com/manubot/manubot/issues/216

View Source
def test_citekey_to_csl_item_clinical_trial():

    """

    Test clinicaltrials.gov citation support using CURIEs.

    https://github.com/manubot/manubot/issues/216

    """

    csl_item = citekey_to_csl_item("clinicaltrials:NCT04292899")

    assert csl_item["title"].startswith("A Phase 3 Randomized Study")

    assert csl_item["source"].startswith("clinicaltrials.gov")

    assert csl_item["URL"] == "https://clinicaltrials.gov/ct2/show/NCT04292899"

test_citekey_to_csl_item_doi_datacite

def test_citekey_to_csl_item_doi_datacite(

)
View Source
@pytest.mark.xfail(reason="https://twitter.com/dhimmel/status/950443969313419264")

def test_citekey_to_csl_item_doi_datacite():

    citekey = "doi:10.7287/peerj.preprints.3100v1"

    csl_item = citekey_to_csl_item(citekey)

    assert csl_item["id"] == "11cb5HXoY"

    assert csl_item["URL"] == "https://doi.org/10.7287/peerj.preprints.3100v1"

    assert csl_item["DOI"] == "10.7287/peerj.preprints.3100v1"

    assert csl_item["type"] == "report"

    assert (

        csl_item["title"]

        == "Sci-Hub provides access to nearly all scholarly literature"

    )

    authors = csl_item["author"]

    assert authors[0]["family"] == "Himmelstein"

    assert authors[-1]["family"] == "Greene"

test_citekey_to_csl_item_isbn

def test_citekey_to_csl_item_isbn(

)
View Source
def test_citekey_to_csl_item_isbn():

    csl_item = citekey_to_csl_item("isbn:9780387950693")

    assert csl_item["type"] == "book"

    assert csl_item["title"] == "Complex analysis"

test_citekey_to_csl_item_pmc

def test_citekey_to_csl_item_pmc(

)

https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pmc/?format=csl&id=3041534

View Source
def test_citekey_to_csl_item_pmc():

    """

    https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pmc/?format=csl&id=3041534

    """

    citekey = "pmc:PMC3041534"

    csl_item = citekey_to_csl_item(citekey)

    assert csl_item["id"] == "1CGP1eifE"

    assert csl_item["URL"] == "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3041534/"

    assert csl_item["container-title-short"] == "Summit Transl Bioinform"

    assert (

        csl_item["title"]

        == "Secondary Use of EHR: Data Quality Issues and Informatics Opportunities"

    )

    authors = csl_item["author"]

    assert authors[0]["family"] == "Botsis"

    assert csl_item["PMID"] == "21347133"

    assert csl_item["PMCID"] == "PMC3041534"

    assert "generated by Manubot" in csl_item["note"]

    assert "standard_id: pmc:PMC3041534" in csl_item["note"]

test_citekey_to_csl_item_pubmed_1

def test_citekey_to_csl_item_pubmed_1(

)

Generated from XML returned by

https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=21347133&rettype=full

View Source
def test_citekey_to_csl_item_pubmed_1():

    """

    Generated from XML returned by

    https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=21347133&rettype=full

    """

    citekey = "pubmed:21347133"

    csl_item = citekey_to_csl_item(citekey)

    assert csl_item["id"] == "o7hs4FTC"

    assert csl_item["type"] == "article-journal"

    assert csl_item["URL"] == "https://www.ncbi.nlm.nih.gov/pubmed/21347133"

    assert csl_item["container-title"] == "Summit on translational bioinformatics"

    assert (

        csl_item["title"]

        == "Secondary Use of EHR: Data Quality Issues and Informatics Opportunities."

    )

    assert csl_item["issued"]["date-parts"] == [[2010, 3, 1]]

    authors = csl_item["author"]

    assert authors[0]["given"] == "Taxiarchis"

    assert authors[0]["family"] == "Botsis"

    assert csl_item["PMID"] == "21347133"

    assert csl_item["PMCID"] == "PMC3041534"

test_citekey_to_csl_item_pubmed_2

def test_citekey_to_csl_item_pubmed_2(

)

Generated from XML returned by

https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=27094199&rettype=full

View Source
def test_citekey_to_csl_item_pubmed_2():

    """

    Generated from XML returned by

    https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=27094199&rettype=full

    """

    citekey = "pubmed:27094199"

    csl_item = citekey_to_csl_item(citekey)

    print(csl_item)

    assert csl_item["id"] == "5v0vabZu"

    assert csl_item["type"] == "article-journal"

    assert csl_item["URL"] == "https://www.ncbi.nlm.nih.gov/pubmed/27094199"

    assert csl_item["container-title"] == "Circulation. Cardiovascular genetics"

    assert csl_item["container-title-short"] == "Circ Cardiovasc Genet"

    assert csl_item["page"] == "179-84"

    assert (

        csl_item["title"]

        == "Genetic Association-Guided Analysis of Gene Networks for the Study of Complex Traits."

    )

    assert csl_item["issued"]["date-parts"] == [[2016, 4]]

    authors = csl_item["author"]

    assert authors[0]["given"] == "Casey S"

    assert authors[0]["family"] == "Greene"

    assert csl_item["PMID"] == "27094199"

    assert csl_item["DOI"] == "10.1161/circgenetics.115.001181"

test_citekey_to_csl_item_pubmed_book

def test_citekey_to_csl_item_pubmed_book(
    caplog
)

Extracting CSL metadata from books in PubMed is not supported.

Logic not implemented to parse XML returned by https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=29227604&rettype=full

View Source
def test_citekey_to_csl_item_pubmed_book(caplog):

    """

    Extracting CSL metadata from books in PubMed is not supported.

    Logic not implemented to parse XML returned by

    https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=29227604&rettype=full

    """

    csl_item = citekey_to_csl_item("pmid:29227604", log_level="ERROR")

    assert csl_item is None

    assert (

        "Expected article to be an XML element with tag PubmedArticle, received tag 'PubmedBookArticle'"

        in caplog.text

    )

test_citekey_to_csl_item_pubmed_with_numeric_month

def test_citekey_to_csl_item_pubmed_with_numeric_month(

)

Generated from XML returned by

https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=29028984&rettype=full

See https://github.com/manubot/manubot/issues/69

View Source
def test_citekey_to_csl_item_pubmed_with_numeric_month():

    """

    Generated from XML returned by

    https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=29028984&rettype=full

    See https://github.com/manubot/manubot/issues/69

    """

    citekey = "pmid:29028984"

    csl_item = citekey_to_csl_item(citekey)

    print(csl_item)

    assert csl_item["issued"]["date-parts"] == [[2018, 3, 15]]