Skip to content

Module manubot.cite.tests.test_citekey

Tests rest of functions in manubot.cite, not covered by test_citekey_api.py.

View Source
"""Tests rest of functions in manubot.cite, not covered by test_citekey_api.py."""

import pytest

from manubot.cite.citekey import CiteKey, shorten_citekey, url_to_citekey

@pytest.mark.parametrize(

    ["input_id", "citekey_attrs"],

    [

        pytest.param(

            "DOI:10.5061/DRYad.q447c/1",

            {

                "prefix": "DOI",

                "prefix_lower": "doi",

                "standard_accession": "10.5061/dryad.q447c/1",

                "standard_id": "doi:10.5061/dryad.q447c/1",

            },

            id="doi",

        ),

        pytest.param(

            "10.5061/DRYad.q447c/1",

            {

                "prefix": "doi",

                "prefix_lower": "doi",

                "standard_prefix": "doi",

                "standard_accession": "10.5061/dryad.q447c/1",

                "standard_id": "doi:10.5061/dryad.q447c/1",

            },

            id="doi-infer-prefix",

        ),

        pytest.param(

            "arXiv:1407.3561v1",

            {

                "prefix": "arXiv",

                "prefix_lower": "arxiv",

                "standard_prefix": "arxiv",

                "standard_accession": "1407.3561v1",

                "standard_id": "arxiv:1407.3561v1",

            },

            id="arxiv",

        ),

        pytest.param(

            "pmid:24159271",

            {

                "standard_id": "pubmed:24159271",

            },

            id="pmid",

        ),

        pytest.param(

            "pmcid:PMC4304851",

            {

                "prefix": "pmcid",

                "prefix_lower": "pmcid",

                "standard_prefix": "pmc",

                "standard_id": "pmc:PMC4304851",

            },

            id="pmcid",

        ),

        pytest.param(

            "https://greenelab.github.io/manubot-rootstock/",

            {

                "prefix": "https",

                "prefix_lower": "https",

                "standard_prefix": "url",

                "standard_id": "url:https://greenelab.github.io/manubot-rootstock/",

            },

            id="https",

        ),

        pytest.param(

            "isbn:1-339-91988-5",

            {

                "standard_id": "isbn:9781339919881",

            },

            id="isbn",

        ),

        pytest.param(

            "DOID:14330",

            {

                "standard_id": "DOID:14330",

            },

            id="doid",

        ),

        pytest.param(

            "PubChem.substance:100101",

            {

                "standard_id": "pubchem.substance:100101",

            },

            id="PubChem.substance",

        ),

        pytest.param(

            "Wikidata:Q50051684",

            {

                "standard_id": "wikidata:Q50051684",

            },

            id="wikidata",

        ),

    ],

)

def test_citekey_class(input_id, citekey_attrs):

    citekey = CiteKey(input_id)

    print(citekey)

    for key, value in citekey_attrs.items():

        assert getattr(citekey, key) == value

    assert citekey.short_id

def test_citekey_check_input_id_type():

    with pytest.raises(TypeError) as excinfo:

        CiteKey(None)

    assert "input_id should be type 'str' not 'NoneType': None" == str(excinfo.value)

    with pytest.raises(TypeError):

        CiteKey(0)

def test_citekey_check_input_id_at_prefix():

    with pytest.raises(ValueError) as excinfo:

        CiteKey("@my-citekey")

    assert "input_id: '@my-citekey'\nstarts with '@'" in str(excinfo.value)

@pytest.mark.parametrize(

    "standard_citekey,expected",

    [

        ("doi:10.5061/dryad.q447c/1", "kQFQ8EaO"),

        ("arxiv:1407.3561v1", "16kozZ9Ys"),

        ("pmid:24159271", "11sli93ov"),

        ("url:http://blog.dhimmel.com/irreproducible-timestamps/", "QBWMEuxW"),

    ],

)

def test_shorten_citekey(standard_citekey, expected):

    short_citekey = shorten_citekey(standard_citekey)

    assert short_citekey == expected

@pytest.mark.parametrize(

    "citekey",

    [

        "doi:10.7717/peerj.705",

        "doi:10/b6vnmd",

        "pmc:PMC4304851",

        "pubmed:25648772",

        "arxiv:1407.3561",

        "arxiv:1407.3561v1",

        "arxiv:math.GT/0309136",

        "arxiv:math.GT/0309136v1",

        "arxiv:hep-th/9305059",

        "arxiv:hep-th/9305059v2",

        "isbn:978-1-339-91988-1",

        "isbn:1-339-91988-5",

        "wikidata:Q1",

        "wikidata:Q50051684",

        "url:https://peerj.com/articles/705/",

        "https://peerj.com/articles/705/",

        "GO:0006915",

        "go:0006915",

        "clinicaltrials:NCT04372602",

    ],

)

def test_inspect_citekey_passes(citekey):

    """

    These citekeys should pass inspection by inspect_citekey.

    """

    report = CiteKey(citekey).inspect()

    assert report is None

@pytest.mark.parametrize(

    ["citekey", "contains"],

    [

        ("doi:10.771/peerj.705", "Double check the DOI"),

        ("doi:10/b6v_nmd", "Double check the shortDOI"),

        ("doi:7717/peerj.705", "must start with '10.'"),

        ("doi:b6vnmd", "must start with '10.'"),

        ("pmcid:25648772", "must start with 'PMC'"),

        (

            "pmid:PMC4304851",

            "Should 'pmid:PMC4304851' switch the citation source to 'pmc'?",

        ),

        ("isbn:1-339-91988-X", "identifier violates the ISBN syntax"),

        ("wikidata:P212", "item IDs must start with 'Q'"),

        ("wikidata:QABCD", "does not conform to the Wikidata regex"),

        ("arxiv:YYMM.number", "must conform to syntax"),

        ("GO:GO:0006915", "GO:0006915 does not match regex"),

    ],

)

def test_inspect_citekey_fails(citekey, contains):

    """

    These citekeys should fail inspection by inspect_citekey.

    """

    report = CiteKey(citekey).inspect()

    assert report is not None

    assert isinstance(report, str)

    assert contains in report

@pytest.mark.parametrize(

    ["url", "citekey"],

    [

        (

            "https://www.doi.org/",

            "url:https://www.doi.org/",

        ),

        (

            "https://www.doi.org/factsheets/Identifier_Interoper.html",

            "url:https://www.doi.org/factsheets/Identifier_Interoper.html",

        ),

        (

            "https://doi.org/10.1097%2F00004032-200403000-00012",

            "doi:10.1097/00004032-200403000-00012",

        ),

        ("http://dx.doi.org/10.7554/eLife.46574", "doi:10.7554/eLife.46574"),

        ("https://doi.org/10/b6vnmd#anchor", "doi:10/b6vnmd"),

        # ShortDOI URLs without `10/` prefix not yet supported`

        ("https://doi.org/b6vnmd", "url:https://doi.org/b6vnmd"),

        (

            "https://www.biorxiv.org/about-biorxiv",

            "url:https://www.biorxiv.org/about-biorxiv",

        ),

        ("https://sci-hub.tw/10.1038/nature19057", "doi:10.1038/nature19057"),

        ("https://www.biorxiv.org/content/10.1101/087619v3", "doi:10.1101/087619"),

        ("https://www.biorxiv.org/content/10.1101/087619v3.full", "doi:10.1101/087619"),

        (

            "https://www.biorxiv.org/content/early/2017/08/31/087619.full.pdf",

            "doi:10.1101/087619",

        ),

        (

            "https://www.biorxiv.org/content/10.1101/2019.12.11.872580v1",

            "doi:10.1101/2019.12.11.872580",

        ),

        (

            "https://www.biorxiv.org/content/10.1101/2019.12.11.872580v1.full.pdf+html",

            "doi:10.1101/2019.12.11.872580",

        ),

        (

            "https://www.biorxiv.org/content/10.1101/2019.12.11.872580v1.full.pdf",

            "doi:10.1101/2019.12.11.872580",

        ),

        ("https://www.ncbi.nlm.nih.gov", "url:https://www.ncbi.nlm.nih.gov"),

        (

            "https://www.ncbi.nlm.nih.gov/pubmed",

            "url:https://www.ncbi.nlm.nih.gov/pubmed",

        ),

        ("https://www.ncbi.nlm.nih.gov/pubmed/31233491", "pmid:31233491"),

        ("https://www.ncbi.nlm.nih.gov/pmc/", "url:https://www.ncbi.nlm.nih.gov/pmc/"),

        (

            "https://www.ncbi.nlm.nih.gov/pmc/about/intro/",

            "url:https://www.ncbi.nlm.nih.gov/pmc/about/intro/",

        ),

        ("https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4304851/", "pmcid:PMC4304851"),

        (

            "https://www.wikidata.org/wiki/Wikidata:Main_Page",

            "url:https://www.wikidata.org/wiki/Wikidata:Main_Page",

        ),

        ("https://www.wikidata.org/wiki/Q50051684", "wikidata:Q50051684"),

        ("https://arxiv.org/", "url:https://arxiv.org/"),

        (

            "https://arxiv.org/list/q-fin/recent",

            "url:https://arxiv.org/list/q-fin/recent",

        ),

        ("https://arxiv.org/abs/1912.03529v1", "arxiv:1912.03529v1"),

        ("https://arxiv.org/pdf/1912.03529v1.pdf", "arxiv:1912.03529v1"),

        ("https://arxiv.org/ps/1912.03529v1", "arxiv:1912.03529v1"),

        ("https://arxiv.org/abs/math.GT/0309136", "arxiv:math.GT/0309136"),

        ("https://arxiv.org/abs/hep-th/9305059", "arxiv:hep-th/9305059"),

        ("https://arxiv.org/pdf/hep-th/9305059.pdf", "arxiv:hep-th/9305059"),

    ],

)

def test_url_to_citekey(url, citekey):

    assert url_to_citekey(url) == citekey

Functions

test_citekey_check_input_id_at_prefix

def test_citekey_check_input_id_at_prefix(

)
View Source
def test_citekey_check_input_id_at_prefix():

    with pytest.raises(ValueError) as excinfo:

        CiteKey("@my-citekey")

    assert "input_id: '@my-citekey'\nstarts with '@'" in str(excinfo.value)

test_citekey_check_input_id_type

def test_citekey_check_input_id_type(

)
View Source
def test_citekey_check_input_id_type():

    with pytest.raises(TypeError) as excinfo:

        CiteKey(None)

    assert "input_id should be type 'str' not 'NoneType': None" == str(excinfo.value)

    with pytest.raises(TypeError):

        CiteKey(0)

test_citekey_class

def test_citekey_class(
    input_id,
    citekey_attrs
)
View Source
@pytest.mark.parametrize(

    ["input_id", "citekey_attrs"],

    [

        pytest.param(

            "DOI:10.5061/DRYad.q447c/1",

            {

                "prefix": "DOI",

                "prefix_lower": "doi",

                "standard_accession": "10.5061/dryad.q447c/1",

                "standard_id": "doi:10.5061/dryad.q447c/1",

            },

            id="doi",

        ),

        pytest.param(

            "10.5061/DRYad.q447c/1",

            {

                "prefix": "doi",

                "prefix_lower": "doi",

                "standard_prefix": "doi",

                "standard_accession": "10.5061/dryad.q447c/1",

                "standard_id": "doi:10.5061/dryad.q447c/1",

            },

            id="doi-infer-prefix",

        ),

        pytest.param(

            "arXiv:1407.3561v1",

            {

                "prefix": "arXiv",

                "prefix_lower": "arxiv",

                "standard_prefix": "arxiv",

                "standard_accession": "1407.3561v1",

                "standard_id": "arxiv:1407.3561v1",

            },

            id="arxiv",

        ),

        pytest.param(

            "pmid:24159271",

            {

                "standard_id": "pubmed:24159271",

            },

            id="pmid",

        ),

        pytest.param(

            "pmcid:PMC4304851",

            {

                "prefix": "pmcid",

                "prefix_lower": "pmcid",

                "standard_prefix": "pmc",

                "standard_id": "pmc:PMC4304851",

            },

            id="pmcid",

        ),

        pytest.param(

            "https://greenelab.github.io/manubot-rootstock/",

            {

                "prefix": "https",

                "prefix_lower": "https",

                "standard_prefix": "url",

                "standard_id": "url:https://greenelab.github.io/manubot-rootstock/",

            },

            id="https",

        ),

        pytest.param(

            "isbn:1-339-91988-5",

            {

                "standard_id": "isbn:9781339919881",

            },

            id="isbn",

        ),

        pytest.param(

            "DOID:14330",

            {

                "standard_id": "DOID:14330",

            },

            id="doid",

        ),

        pytest.param(

            "PubChem.substance:100101",

            {

                "standard_id": "pubchem.substance:100101",

            },

            id="PubChem.substance",

        ),

        pytest.param(

            "Wikidata:Q50051684",

            {

                "standard_id": "wikidata:Q50051684",

            },

            id="wikidata",

        ),

    ],

)

def test_citekey_class(input_id, citekey_attrs):

    citekey = CiteKey(input_id)

    print(citekey)

    for key, value in citekey_attrs.items():

        assert getattr(citekey, key) == value

    assert citekey.short_id

test_inspect_citekey_fails

def test_inspect_citekey_fails(
    citekey,
    contains
)

These citekeys should fail inspection by inspect_citekey.

View Source
@pytest.mark.parametrize(

    ["citekey", "contains"],

    [

        ("doi:10.771/peerj.705", "Double check the DOI"),

        ("doi:10/b6v_nmd", "Double check the shortDOI"),

        ("doi:7717/peerj.705", "must start with '10.'"),

        ("doi:b6vnmd", "must start with '10.'"),

        ("pmcid:25648772", "must start with 'PMC'"),

        (

            "pmid:PMC4304851",

            "Should 'pmid:PMC4304851' switch the citation source to 'pmc'?",

        ),

        ("isbn:1-339-91988-X", "identifier violates the ISBN syntax"),

        ("wikidata:P212", "item IDs must start with 'Q'"),

        ("wikidata:QABCD", "does not conform to the Wikidata regex"),

        ("arxiv:YYMM.number", "must conform to syntax"),

        ("GO:GO:0006915", "GO:0006915 does not match regex"),

    ],

)

def test_inspect_citekey_fails(citekey, contains):

    """

    These citekeys should fail inspection by inspect_citekey.

    """

    report = CiteKey(citekey).inspect()

    assert report is not None

    assert isinstance(report, str)

    assert contains in report

test_inspect_citekey_passes

def test_inspect_citekey_passes(
    citekey
)

These citekeys should pass inspection by inspect_citekey.

View Source
@pytest.mark.parametrize(

    "citekey",

    [

        "doi:10.7717/peerj.705",

        "doi:10/b6vnmd",

        "pmc:PMC4304851",

        "pubmed:25648772",

        "arxiv:1407.3561",

        "arxiv:1407.3561v1",

        "arxiv:math.GT/0309136",

        "arxiv:math.GT/0309136v1",

        "arxiv:hep-th/9305059",

        "arxiv:hep-th/9305059v2",

        "isbn:978-1-339-91988-1",

        "isbn:1-339-91988-5",

        "wikidata:Q1",

        "wikidata:Q50051684",

        "url:https://peerj.com/articles/705/",

        "https://peerj.com/articles/705/",

        "GO:0006915",

        "go:0006915",

        "clinicaltrials:NCT04372602",

    ],

)

def test_inspect_citekey_passes(citekey):

    """

    These citekeys should pass inspection by inspect_citekey.

    """

    report = CiteKey(citekey).inspect()

    assert report is None

test_shorten_citekey

def test_shorten_citekey(
    standard_citekey,
    expected
)
View Source
@pytest.mark.parametrize(

    "standard_citekey,expected",

    [

        ("doi:10.5061/dryad.q447c/1", "kQFQ8EaO"),

        ("arxiv:1407.3561v1", "16kozZ9Ys"),

        ("pmid:24159271", "11sli93ov"),

        ("url:http://blog.dhimmel.com/irreproducible-timestamps/", "QBWMEuxW"),

    ],

)

def test_shorten_citekey(standard_citekey, expected):

    short_citekey = shorten_citekey(standard_citekey)

    assert short_citekey == expected

test_url_to_citekey

def test_url_to_citekey(
    url,
    citekey
)
View Source
@pytest.mark.parametrize(

    ["url", "citekey"],

    [

        (

            "https://www.doi.org/",

            "url:https://www.doi.org/",

        ),

        (

            "https://www.doi.org/factsheets/Identifier_Interoper.html",

            "url:https://www.doi.org/factsheets/Identifier_Interoper.html",

        ),

        (

            "https://doi.org/10.1097%2F00004032-200403000-00012",

            "doi:10.1097/00004032-200403000-00012",

        ),

        ("http://dx.doi.org/10.7554/eLife.46574", "doi:10.7554/eLife.46574"),

        ("https://doi.org/10/b6vnmd#anchor", "doi:10/b6vnmd"),

        # ShortDOI URLs without `10/` prefix not yet supported`

        ("https://doi.org/b6vnmd", "url:https://doi.org/b6vnmd"),

        (

            "https://www.biorxiv.org/about-biorxiv",

            "url:https://www.biorxiv.org/about-biorxiv",

        ),

        ("https://sci-hub.tw/10.1038/nature19057", "doi:10.1038/nature19057"),

        ("https://www.biorxiv.org/content/10.1101/087619v3", "doi:10.1101/087619"),

        ("https://www.biorxiv.org/content/10.1101/087619v3.full", "doi:10.1101/087619"),

        (

            "https://www.biorxiv.org/content/early/2017/08/31/087619.full.pdf",

            "doi:10.1101/087619",

        ),

        (

            "https://www.biorxiv.org/content/10.1101/2019.12.11.872580v1",

            "doi:10.1101/2019.12.11.872580",

        ),

        (

            "https://www.biorxiv.org/content/10.1101/2019.12.11.872580v1.full.pdf+html",

            "doi:10.1101/2019.12.11.872580",

        ),

        (

            "https://www.biorxiv.org/content/10.1101/2019.12.11.872580v1.full.pdf",

            "doi:10.1101/2019.12.11.872580",

        ),

        ("https://www.ncbi.nlm.nih.gov", "url:https://www.ncbi.nlm.nih.gov"),

        (

            "https://www.ncbi.nlm.nih.gov/pubmed",

            "url:https://www.ncbi.nlm.nih.gov/pubmed",

        ),

        ("https://www.ncbi.nlm.nih.gov/pubmed/31233491", "pmid:31233491"),

        ("https://www.ncbi.nlm.nih.gov/pmc/", "url:https://www.ncbi.nlm.nih.gov/pmc/"),

        (

            "https://www.ncbi.nlm.nih.gov/pmc/about/intro/",

            "url:https://www.ncbi.nlm.nih.gov/pmc/about/intro/",

        ),

        ("https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4304851/", "pmcid:PMC4304851"),

        (

            "https://www.wikidata.org/wiki/Wikidata:Main_Page",

            "url:https://www.wikidata.org/wiki/Wikidata:Main_Page",

        ),

        ("https://www.wikidata.org/wiki/Q50051684", "wikidata:Q50051684"),

        ("https://arxiv.org/", "url:https://arxiv.org/"),

        (

            "https://arxiv.org/list/q-fin/recent",

            "url:https://arxiv.org/list/q-fin/recent",

        ),

        ("https://arxiv.org/abs/1912.03529v1", "arxiv:1912.03529v1"),

        ("https://arxiv.org/pdf/1912.03529v1.pdf", "arxiv:1912.03529v1"),

        ("https://arxiv.org/ps/1912.03529v1", "arxiv:1912.03529v1"),

        ("https://arxiv.org/abs/math.GT/0309136", "arxiv:math.GT/0309136"),

        ("https://arxiv.org/abs/hep-th/9305059", "arxiv:hep-th/9305059"),

        ("https://arxiv.org/pdf/hep-th/9305059.pdf", "arxiv:hep-th/9305059"),

    ],

)

def test_url_to_citekey(url, citekey):

    assert url_to_citekey(url) == citekey