Module manubot.cite.tests.test_citekey_api
Tests API-level functions in manubot.cite. Both functions are found in citekey.py
View Source
"""Tests API-level functions in manubot.cite. Both functions are found in citekey.py"""
import pytest
from manubot.cite import citekey_to_csl_item
from manubot.cite.citekey import CiteKey
@pytest.mark.parametrize(
"input_id,expected",
[
("doi:10.5061/DRYAD.q447c/1", "doi:10.5061/dryad.q447c/1"),
("doi:10.5061/dryad.q447c/1", "doi:10.5061/dryad.q447c/1"),
# infers by default
("10.5061/dryad.q447c/1", "doi:10.5061/dryad.q447c/1"),
("doi:10/b6vnmd", "doi:10.1016/s0933-3657(96)00367-3"),
("doi:10/B6VNMD", "doi:10.1016/s0933-3657(96)00367-3"),
(
"doi:10/xxxxxxxxxxxxxYY",
"doi:10/xxxxxxxxxxxxxyy",
), # passthrough non-existent shortDOI
("pmid:24159271", "pubmed:24159271"),
("isbn:1339919885", "isbn:9781339919881"),
("isbn:1-339-91988-5", "isbn:9781339919881"),
("isbn:978-0-387-95069-3", "isbn:9780387950693"),
("isbn:9780387950938", "isbn:9780387950938"),
("isbn:1-55860-510-X", "isbn:9781558605107"),
("isbn:1-55860-510-x", "isbn:9781558605107"),
],
)
def test_citekey_standard_id(input_id, expected):
"""
Test CiteKey.standard_id property for common prefixes.
"""
citekey = CiteKey(input_id)
assert citekey.standard_id == expected
test_citekey_infer_prefix_params = [
("10.5061/dryad.q447c/1", "doi"),
("10/b6vnmd", "doi"),
("24159271", "pubmed"),
("1", "pubmed"),
("PMC3041534", "pmc"),
("Q50051684", "wikidata"),
("1407.3561v1", "arxiv"),
("no-prefix-to-infer", None),
]
@pytest.mark.parametrize("input_id,prefix", test_citekey_infer_prefix_params)
def test_citekey_infer_prefix(input_id, prefix):
citekey = CiteKey(input_id, infer_prefix=True)
assert citekey.prefix == prefix
@pytest.mark.parametrize("input_id,prefix", test_citekey_infer_prefix_params)
def test_citekey_no_infer_prefix(input_id, prefix):
citekey = CiteKey(input_id, infer_prefix=False)
assert citekey.prefix is None
assert citekey.accession is None
@pytest.mark.xfail(reason="https://twitter.com/dhimmel/status/950443969313419264")
def test_citekey_to_csl_item_doi_datacite():
citekey = "doi:10.7287/peerj.preprints.3100v1"
csl_item = citekey_to_csl_item(citekey)
assert csl_item["id"] == "11cb5HXoY"
assert csl_item["URL"] == "https://doi.org/10.7287/peerj.preprints.3100v1"
assert csl_item["DOI"] == "10.7287/peerj.preprints.3100v1"
assert csl_item["type"] == "report"
assert (
csl_item["title"]
== "Sci-Hub provides access to nearly all scholarly literature"
)
authors = csl_item["author"]
assert authors[0]["family"] == "Himmelstein"
assert authors[-1]["family"] == "Greene"
def test_citekey_to_csl_item_arxiv():
citekey = "arxiv:cond-mat/0703470v2"
csl_item = citekey_to_csl_item(citekey)
assert csl_item["id"] == "ES92tcdg"
assert csl_item["URL"] == "https://arxiv.org/abs/cond-mat/0703470v2"
assert csl_item["number"] == "cond-mat/0703470v2"
assert csl_item["version"] == "v2"
assert csl_item["type"] == "report"
assert csl_item["container-title"] == "arXiv"
assert csl_item["title"] == "Portraits of Complex Networks"
authors = csl_item["author"]
assert authors[0]["literal"] == "J. P. Bagrow"
assert csl_item["DOI"] == "10.1209/0295-5075/81/68004"
def test_citekey_to_csl_item_pmc():
"""
https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pmc/?format=csl&id=3041534
"""
citekey = "pmc:PMC3041534"
csl_item = citekey_to_csl_item(citekey)
assert csl_item["id"] == "1CGP1eifE"
assert csl_item["URL"] == "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3041534/"
assert csl_item["container-title-short"] == "Summit Transl Bioinform"
assert (
csl_item["title"]
== "Secondary Use of EHR: Data Quality Issues and Informatics Opportunities"
)
authors = csl_item["author"]
assert authors[0]["family"] == "Botsis"
assert csl_item["PMID"] == "21347133"
assert csl_item["PMCID"] == "PMC3041534"
assert "generated by Manubot" in csl_item["note"]
assert "standard_id: pmc:PMC3041534" in csl_item["note"]
def test_citekey_to_csl_item_pubmed_1():
"""
Generated from XML returned by
https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=21347133&rettype=full
"""
citekey = "pubmed:21347133"
csl_item = citekey_to_csl_item(citekey)
assert csl_item["id"] == "o7hs4FTC"
assert csl_item["type"] == "article-journal"
assert csl_item["URL"] == "https://www.ncbi.nlm.nih.gov/pubmed/21347133"
assert csl_item["container-title"] == "Summit on translational bioinformatics"
assert (
csl_item["title"]
== "Secondary Use of EHR: Data Quality Issues and Informatics Opportunities."
)
assert csl_item["issued"]["date-parts"] == [[2010, 3, 1]]
authors = csl_item["author"]
assert authors[0]["given"] == "Taxiarchis"
assert authors[0]["family"] == "Botsis"
assert csl_item["PMID"] == "21347133"
assert csl_item["PMCID"] == "PMC3041534"
def test_citekey_to_csl_item_pubmed_2():
"""
Generated from XML returned by
https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=27094199&rettype=full
"""
citekey = "pubmed:27094199"
csl_item = citekey_to_csl_item(citekey)
print(csl_item)
assert csl_item["id"] == "5v0vabZu"
assert csl_item["type"] == "article-journal"
assert csl_item["URL"] == "https://www.ncbi.nlm.nih.gov/pubmed/27094199"
assert csl_item["container-title"] == "Circulation. Cardiovascular genetics"
assert csl_item["container-title-short"] == "Circ Cardiovasc Genet"
assert csl_item["page"] == "179-84"
assert (
csl_item["title"]
== "Genetic Association-Guided Analysis of Gene Networks for the Study of Complex Traits."
)
assert csl_item["issued"]["date-parts"] == [[2016, 4]]
authors = csl_item["author"]
assert authors[0]["given"] == "Casey S"
assert authors[0]["family"] == "Greene"
assert csl_item["PMID"] == "27094199"
assert csl_item["DOI"] == "10.1161/circgenetics.115.001181"
def test_citekey_to_csl_item_pubmed_with_numeric_month():
"""
Generated from XML returned by
https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=29028984&rettype=full
See https://github.com/manubot/manubot/issues/69
"""
citekey = "pmid:29028984"
csl_item = citekey_to_csl_item(citekey)
print(csl_item)
assert csl_item["issued"]["date-parts"] == [[2018, 3, 15]]
def test_citekey_to_csl_item_pubmed_book(caplog):
"""
Extracting CSL metadata from books in PubMed is not supported.
Logic not implemented to parse XML returned by
https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=29227604&rettype=full
"""
csl_item = citekey_to_csl_item("pmid:29227604", log_level="ERROR")
assert csl_item is None
assert (
"Expected article to be an XML element with tag PubmedArticle, received tag 'PubmedBookArticle'"
in caplog.text
)
def test_citekey_to_csl_item_isbn():
csl_item = citekey_to_csl_item("isbn:9780387950693")
assert csl_item["type"] == "book"
assert csl_item["title"] == "Complex analysis"
def test_citekey_to_csl_item_clinical_trial():
"""
Test clinicaltrials.gov citation support using CURIEs.
https://github.com/manubot/manubot/issues/216
"""
csl_item = citekey_to_csl_item("clinicaltrials:NCT04292899")
assert csl_item["title"].startswith("A Phase 3 Randomized Study")
assert csl_item["source"].startswith("clinicaltrials.gov")
assert csl_item["URL"] == "https://clinicaltrials.gov/ct2/show/NCT04292899"
Variables
test_citekey_infer_prefix_params
Functions
test_citekey_infer_prefix
def test_citekey_infer_prefix(
input_id,
prefix
)
View Source
@pytest.mark.parametrize("input_id,prefix", test_citekey_infer_prefix_params)
def test_citekey_infer_prefix(input_id, prefix):
citekey = CiteKey(input_id, infer_prefix=True)
assert citekey.prefix == prefix
test_citekey_no_infer_prefix
def test_citekey_no_infer_prefix(
input_id,
prefix
)
View Source
@pytest.mark.parametrize("input_id,prefix", test_citekey_infer_prefix_params)
def test_citekey_no_infer_prefix(input_id, prefix):
citekey = CiteKey(input_id, infer_prefix=False)
assert citekey.prefix is None
assert citekey.accession is None
test_citekey_standard_id
def test_citekey_standard_id(
input_id,
expected
)
Test CiteKey.standard_id property for common prefixes.
View Source
@pytest.mark.parametrize(
"input_id,expected",
[
("doi:10.5061/DRYAD.q447c/1", "doi:10.5061/dryad.q447c/1"),
("doi:10.5061/dryad.q447c/1", "doi:10.5061/dryad.q447c/1"),
# infers by default
("10.5061/dryad.q447c/1", "doi:10.5061/dryad.q447c/1"),
("doi:10/b6vnmd", "doi:10.1016/s0933-3657(96)00367-3"),
("doi:10/B6VNMD", "doi:10.1016/s0933-3657(96)00367-3"),
(
"doi:10/xxxxxxxxxxxxxYY",
"doi:10/xxxxxxxxxxxxxyy",
), # passthrough non-existent shortDOI
("pmid:24159271", "pubmed:24159271"),
("isbn:1339919885", "isbn:9781339919881"),
("isbn:1-339-91988-5", "isbn:9781339919881"),
("isbn:978-0-387-95069-3", "isbn:9780387950693"),
("isbn:9780387950938", "isbn:9780387950938"),
("isbn:1-55860-510-X", "isbn:9781558605107"),
("isbn:1-55860-510-x", "isbn:9781558605107"),
],
)
def test_citekey_standard_id(input_id, expected):
"""
Test CiteKey.standard_id property for common prefixes.
"""
citekey = CiteKey(input_id)
assert citekey.standard_id == expected
test_citekey_to_csl_item_arxiv
def test_citekey_to_csl_item_arxiv(
)
View Source
def test_citekey_to_csl_item_arxiv():
citekey = "arxiv:cond-mat/0703470v2"
csl_item = citekey_to_csl_item(citekey)
assert csl_item["id"] == "ES92tcdg"
assert csl_item["URL"] == "https://arxiv.org/abs/cond-mat/0703470v2"
assert csl_item["number"] == "cond-mat/0703470v2"
assert csl_item["version"] == "v2"
assert csl_item["type"] == "report"
assert csl_item["container-title"] == "arXiv"
assert csl_item["title"] == "Portraits of Complex Networks"
authors = csl_item["author"]
assert authors[0]["literal"] == "J. P. Bagrow"
assert csl_item["DOI"] == "10.1209/0295-5075/81/68004"
test_citekey_to_csl_item_clinical_trial
def test_citekey_to_csl_item_clinical_trial(
)
Test clinicaltrials.gov citation support using CURIEs.
https://github.com/manubot/manubot/issues/216
View Source
def test_citekey_to_csl_item_clinical_trial():
"""
Test clinicaltrials.gov citation support using CURIEs.
https://github.com/manubot/manubot/issues/216
"""
csl_item = citekey_to_csl_item("clinicaltrials:NCT04292899")
assert csl_item["title"].startswith("A Phase 3 Randomized Study")
assert csl_item["source"].startswith("clinicaltrials.gov")
assert csl_item["URL"] == "https://clinicaltrials.gov/ct2/show/NCT04292899"
test_citekey_to_csl_item_doi_datacite
def test_citekey_to_csl_item_doi_datacite(
)
View Source
@pytest.mark.xfail(reason="https://twitter.com/dhimmel/status/950443969313419264")
def test_citekey_to_csl_item_doi_datacite():
citekey = "doi:10.7287/peerj.preprints.3100v1"
csl_item = citekey_to_csl_item(citekey)
assert csl_item["id"] == "11cb5HXoY"
assert csl_item["URL"] == "https://doi.org/10.7287/peerj.preprints.3100v1"
assert csl_item["DOI"] == "10.7287/peerj.preprints.3100v1"
assert csl_item["type"] == "report"
assert (
csl_item["title"]
== "Sci-Hub provides access to nearly all scholarly literature"
)
authors = csl_item["author"]
assert authors[0]["family"] == "Himmelstein"
assert authors[-1]["family"] == "Greene"
test_citekey_to_csl_item_isbn
def test_citekey_to_csl_item_isbn(
)
View Source
def test_citekey_to_csl_item_isbn():
csl_item = citekey_to_csl_item("isbn:9780387950693")
assert csl_item["type"] == "book"
assert csl_item["title"] == "Complex analysis"
test_citekey_to_csl_item_pmc
def test_citekey_to_csl_item_pmc(
)
https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pmc/?format=csl&id=3041534
View Source
def test_citekey_to_csl_item_pmc():
"""
https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pmc/?format=csl&id=3041534
"""
citekey = "pmc:PMC3041534"
csl_item = citekey_to_csl_item(citekey)
assert csl_item["id"] == "1CGP1eifE"
assert csl_item["URL"] == "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3041534/"
assert csl_item["container-title-short"] == "Summit Transl Bioinform"
assert (
csl_item["title"]
== "Secondary Use of EHR: Data Quality Issues and Informatics Opportunities"
)
authors = csl_item["author"]
assert authors[0]["family"] == "Botsis"
assert csl_item["PMID"] == "21347133"
assert csl_item["PMCID"] == "PMC3041534"
assert "generated by Manubot" in csl_item["note"]
assert "standard_id: pmc:PMC3041534" in csl_item["note"]
test_citekey_to_csl_item_pubmed_1
def test_citekey_to_csl_item_pubmed_1(
)
Generated from XML returned by
https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=21347133&rettype=full
View Source
def test_citekey_to_csl_item_pubmed_1():
"""
Generated from XML returned by
https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=21347133&rettype=full
"""
citekey = "pubmed:21347133"
csl_item = citekey_to_csl_item(citekey)
assert csl_item["id"] == "o7hs4FTC"
assert csl_item["type"] == "article-journal"
assert csl_item["URL"] == "https://www.ncbi.nlm.nih.gov/pubmed/21347133"
assert csl_item["container-title"] == "Summit on translational bioinformatics"
assert (
csl_item["title"]
== "Secondary Use of EHR: Data Quality Issues and Informatics Opportunities."
)
assert csl_item["issued"]["date-parts"] == [[2010, 3, 1]]
authors = csl_item["author"]
assert authors[0]["given"] == "Taxiarchis"
assert authors[0]["family"] == "Botsis"
assert csl_item["PMID"] == "21347133"
assert csl_item["PMCID"] == "PMC3041534"
test_citekey_to_csl_item_pubmed_2
def test_citekey_to_csl_item_pubmed_2(
)
Generated from XML returned by
https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=27094199&rettype=full
View Source
def test_citekey_to_csl_item_pubmed_2():
"""
Generated from XML returned by
https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=27094199&rettype=full
"""
citekey = "pubmed:27094199"
csl_item = citekey_to_csl_item(citekey)
print(csl_item)
assert csl_item["id"] == "5v0vabZu"
assert csl_item["type"] == "article-journal"
assert csl_item["URL"] == "https://www.ncbi.nlm.nih.gov/pubmed/27094199"
assert csl_item["container-title"] == "Circulation. Cardiovascular genetics"
assert csl_item["container-title-short"] == "Circ Cardiovasc Genet"
assert csl_item["page"] == "179-84"
assert (
csl_item["title"]
== "Genetic Association-Guided Analysis of Gene Networks for the Study of Complex Traits."
)
assert csl_item["issued"]["date-parts"] == [[2016, 4]]
authors = csl_item["author"]
assert authors[0]["given"] == "Casey S"
assert authors[0]["family"] == "Greene"
assert csl_item["PMID"] == "27094199"
assert csl_item["DOI"] == "10.1161/circgenetics.115.001181"
test_citekey_to_csl_item_pubmed_book
def test_citekey_to_csl_item_pubmed_book(
caplog
)
Extracting CSL metadata from books in PubMed is not supported.
Logic not implemented to parse XML returned by https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=29227604&rettype=full
View Source
def test_citekey_to_csl_item_pubmed_book(caplog):
"""
Extracting CSL metadata from books in PubMed is not supported.
Logic not implemented to parse XML returned by
https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=29227604&rettype=full
"""
csl_item = citekey_to_csl_item("pmid:29227604", log_level="ERROR")
assert csl_item is None
assert (
"Expected article to be an XML element with tag PubmedArticle, received tag 'PubmedBookArticle'"
in caplog.text
)
test_citekey_to_csl_item_pubmed_with_numeric_month
def test_citekey_to_csl_item_pubmed_with_numeric_month(
)
Generated from XML returned by
https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=29028984&rettype=full
See https://github.com/manubot/manubot/issues/69
View Source
def test_citekey_to_csl_item_pubmed_with_numeric_month():
"""
Generated from XML returned by
https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=29028984&rettype=full
See https://github.com/manubot/manubot/issues/69
"""
citekey = "pmid:29028984"
csl_item = citekey_to_csl_item(citekey)
print(csl_item)
assert csl_item["issued"]["date-parts"] == [[2018, 3, 15]]