Module manubot.cite.tests.test_cite_command

View Source

import json

import pathlib

import shutil

import subprocess

import pytest

from manubot.cite.csl_item import CSL_Item

from manubot.pandoc.util import get_pandoc_version

from manubot.util import shlex_join

data_dir = pathlib.Path(__file__).parent.joinpath("cite-command-rendered")

def test_cite_command_preserves_order():

    """

    https://github.com/manubot/manubot/issues/240

    """

    citekeys = [

        "pmid:29618526",

        "doi:10.7717/peerj.338",

        "arxiv:1806.05726v1",

        "pubmed:29618526",

        "DOI:10.7717/PEERJ.338",

    ]

    args = [

        "manubot",

        "cite",

        "--bibliography=input-bibliography.json",

        *citekeys,

    ]

    output = subprocess.check_output(

        args,

        encoding="utf-8",

        cwd=data_dir,

    )

    csl_items = json.loads(output)

    csl_items = [CSL_Item(x) for x in csl_items]

    standard_ids = [csl_item.note_dict.get("standard_id") for csl_item in csl_items]

    assert standard_ids == [

        "pubmed:29618526",

        "doi:10.7717/peerj.338",

        "arxiv:1806.05726v1",

    ]

def test_cite_command_empty():

    process = subprocess.run(

        ["manubot", "cite"],

        capture_output=True,

        encoding="utf-8",

    )

    print(process.stderr)

    assert process.returncode == 2

    assert "the following arguments are required: citekeys" in process.stderr

def test_cite_command_stdout():

    process = subprocess.run(

        ["manubot", "cite", "arxiv:1806.05726v1"],

        capture_output=True,

        encoding="utf-8",

    )

    print(process.stderr)

    assert process.returncode == 0

    (csl,) = json.loads(process.stdout)

    assert csl["URL"] == "https://arxiv.org/abs/1806.05726v1"

def test_cite_command_file(tmpdir):

    path = pathlib.Path(tmpdir) / "csl-items.json"

    process = subprocess.run(

        ["manubot", "cite", "--output", str(path), "arxiv:1806.05726v1"],

        capture_output=True,

    )

    print(process.stderr.decode())

    assert process.returncode == 0

    with path.open() as read_file:

        (csl,) = json.load(read_file)

    assert csl["URL"] == "https://arxiv.org/abs/1806.05726v1"

@pytest.mark.integration

@pytest.mark.parametrize(

    ["args", "filename"],

    [

        pytest.param(

            ["--format", "plain"], "references-plain-{}.txt", id="--format=plain"

        ),

        pytest.param(

            ["--format", "markdown"],

            "references-markdown-{}.md",

            id="--format=markdown",

        ),

        pytest.param(

            ["--format", "html"], "references-html-{}.html", id="--format=html"

        ),

        pytest.param(

            ["--format", "jats"], "references-jats-{}.xml", id="--format=jats"

        ),

    ],

)

@pytest.mark.skipif(

    not shutil.which("pandoc"), reason="pandoc installation not found on system"

)

@pytest.mark.pandoc_version_sensitive

def test_cite_command_render_stdout(args, filename):

    """

    Test the stdout output of `manubot cite` with various Pandoc-output formats.

    The output is sensitive to the version of Pandoc used, so expected output

    files include the pandoc version stamp in their filename.

    When the expected version is missing, the test fails but writes the

    command output to that file. Therefore, subsequent runs of the same test

    will pass. Before committing the auto-generated output, do look to ensure

    its integrity.

    This test uses --bibliography to avoid slow network calls.

    Regenerate the CSL JSON using:

    ```shell

    manubot cite \

      --output=manubot/cite/tests/cite-command-rendered/input-bibliography.json \

      arxiv:1806.05726v1 doi:10.7717/peerj.338 pmid:29618526

    ```

    """

    # get pandoc version info

    pandoc_version = get_pandoc_version()

    pandoc_stamp = ".".join(map(str, pandoc_version))

    path = data_dir.joinpath(filename.format(pandoc_stamp))

    # skip test on old pandoc versions

    for output in "markdown", "html", "jats":

        if output in args and pandoc_version < (2, 5):

            pytest.skip(f"Test {output} output assumes pandoc >= 2.5")

    if pandoc_version < (2, 0):

        pytest.skip("Test requires pandoc >= 2.0 to support --lua-filter and --csl=URL")

    args = [

        "manubot",

        "cite",

        "--bibliography=input-bibliography.json",

        "--csl=https://github.com/greenelab/manubot-rootstock/raw/e83e51dcd89256403bb787c3d9a46e4ee8d04a9e/build/assets/style.csl",

        "arxiv:1806.05726v1",

        "doi:10.7717/peerj.338",

        "pmid:29618526",

    ] + args

    process = subprocess.run(

        args,

        capture_output=True,

        encoding="utf-8",

        cwd=data_dir,

    )

    print(shlex_join(process.args))

    print(process.stderr)

    assert process.returncode == 0

    print(process.stdout)

    if not path.exists():

        # https://github.com/manubot/manubot/pull/146#discussion_r333132261

        print(

            f"Missing expected output at {path}\n"

            "Writing output to file such that future tests will pass."

        )

        path.write_text(process.stdout, encoding="utf-8")

        raise AssertionError()

    expected = path.read_text(encoding="utf-8-sig")

    assert process.stdout == expected

def test_cite_command_bibliography():

    bib_dir = pathlib.Path(__file__).parent.parent.parent.joinpath(

        "pandoc/tests/bibliographies"

    )

    args = [

        "manubot",

        "cite",

        "--bibliography=bibliography.json",

        "DOI:10.7554/elife.32822",

    ]

    csl_json = subprocess.check_output(args, encoding="utf-8", cwd=bib_dir)

    csl_items = json.loads(csl_json)

    assert len(csl_items) == 1

    csl_item = csl_items[0]

    assert "source_bibliography: bibliography.json" in csl_item["note"]

def teardown_module(module):

    """

    Avoid too many requests to NCBI E-Utils in the test_pubmed.py,

    which is executed following this module. E-Utility requests are

    capped at 3 per second, which is usually controlled by _get_eutils_rate_limiter,

    but this does not seem to work across test modules.

    """

    import time

    time.sleep(1)

Variables

data_dir

Functions

teardown_module

def teardown_module(
    module
)

Avoid too many requests to NCBI E-Utils in the test_pubmed.py,

which is executed following this module. E-Utility requests are capped at 3 per second, which is usually controlled by _get_eutils_rate_limiter, but this does not seem to work across test modules.

View Source

def teardown_module(module):

    """

    Avoid too many requests to NCBI E-Utils in the test_pubmed.py,

    which is executed following this module. E-Utility requests are

    capped at 3 per second, which is usually controlled by _get_eutils_rate_limiter,

    but this does not seem to work across test modules.

    """

    import time

    time.sleep(1)

test_cite_command_bibliography

def test_cite_command_bibliography(

)

View Source

def test_cite_command_bibliography():

    bib_dir = pathlib.Path(__file__).parent.parent.parent.joinpath(

        "pandoc/tests/bibliographies"

    )

    args = [

        "manubot",

        "cite",

        "--bibliography=bibliography.json",

        "DOI:10.7554/elife.32822",

    ]

    csl_json = subprocess.check_output(args, encoding="utf-8", cwd=bib_dir)

    csl_items = json.loads(csl_json)

    assert len(csl_items) == 1

    csl_item = csl_items[0]

    assert "source_bibliography: bibliography.json" in csl_item["note"]

test_cite_command_empty

def test_cite_command_empty(

)

View Source

def test_cite_command_empty():

    process = subprocess.run(

        ["manubot", "cite"],

        capture_output=True,

        encoding="utf-8",

    )

    print(process.stderr)

    assert process.returncode == 2

    assert "the following arguments are required: citekeys" in process.stderr

test_cite_command_file

def test_cite_command_file(
    tmpdir
)

View Source

def test_cite_command_file(tmpdir):

    path = pathlib.Path(tmpdir) / "csl-items.json"

    process = subprocess.run(

        ["manubot", "cite", "--output", str(path), "arxiv:1806.05726v1"],

        capture_output=True,

    )

    print(process.stderr.decode())

    assert process.returncode == 0

    with path.open() as read_file:

        (csl,) = json.load(read_file)

    assert csl["URL"] == "https://arxiv.org/abs/1806.05726v1"

test_cite_command_preserves_order

def test_cite_command_preserves_order(

)

https://github.com/manubot/manubot/issues/240

View Source

def test_cite_command_preserves_order():

    """

    https://github.com/manubot/manubot/issues/240

    """

    citekeys = [

        "pmid:29618526",

        "doi:10.7717/peerj.338",

        "arxiv:1806.05726v1",

        "pubmed:29618526",

        "DOI:10.7717/PEERJ.338",

    ]

    args = [

        "manubot",

        "cite",

        "--bibliography=input-bibliography.json",

        *citekeys,

    ]

    output = subprocess.check_output(

        args,

        encoding="utf-8",

        cwd=data_dir,

    )

    csl_items = json.loads(output)

    csl_items = [CSL_Item(x) for x in csl_items]

    standard_ids = [csl_item.note_dict.get("standard_id") for csl_item in csl_items]

    assert standard_ids == [

        "pubmed:29618526",

        "doi:10.7717/peerj.338",

        "arxiv:1806.05726v1",

    ]

test_cite_command_render_stdout

def test_cite_command_render_stdout(
    args,
    filename
)

Test the stdout output of manubot cite with various Pandoc-output formats.

The output is sensitive to the version of Pandoc used, so expected output files include the pandoc version stamp in their filename. When the expected version is missing, the test fails but writes the command output to that file. Therefore, subsequent runs of the same test will pass. Before committing the auto-generated output, do look to ensure its integrity.

This test uses --bibliography to avoid slow network calls. Regenerate the CSL JSON using:

manubot cite       --output=manubot/cite/tests/cite-command-rendered/input-bibliography.json       arxiv:1806.05726v1 doi:10.7717/peerj.338 pmid:29618526

View Source

@pytest.mark.integration

@pytest.mark.parametrize(

    ["args", "filename"],

    [

        pytest.param(

            ["--format", "plain"], "references-plain-{}.txt", id="--format=plain"

        ),

        pytest.param(

            ["--format", "markdown"],

            "references-markdown-{}.md",

            id="--format=markdown",

        ),

        pytest.param(

            ["--format", "html"], "references-html-{}.html", id="--format=html"

        ),

        pytest.param(

            ["--format", "jats"], "references-jats-{}.xml", id="--format=jats"

        ),

    ],

)

@pytest.mark.skipif(

    not shutil.which("pandoc"), reason="pandoc installation not found on system"

)

@pytest.mark.pandoc_version_sensitive

def test_cite_command_render_stdout(args, filename):

    """

    Test the stdout output of `manubot cite` with various Pandoc-output formats.

    The output is sensitive to the version of Pandoc used, so expected output

    files include the pandoc version stamp in their filename.

    When the expected version is missing, the test fails but writes the

    command output to that file. Therefore, subsequent runs of the same test

    will pass. Before committing the auto-generated output, do look to ensure

    its integrity.

    This test uses --bibliography to avoid slow network calls.

    Regenerate the CSL JSON using:

    ```shell

    manubot cite \

      --output=manubot/cite/tests/cite-command-rendered/input-bibliography.json \

      arxiv:1806.05726v1 doi:10.7717/peerj.338 pmid:29618526

    ```

    """

    # get pandoc version info

    pandoc_version = get_pandoc_version()

    pandoc_stamp = ".".join(map(str, pandoc_version))

    path = data_dir.joinpath(filename.format(pandoc_stamp))

    # skip test on old pandoc versions

    for output in "markdown", "html", "jats":

        if output in args and pandoc_version < (2, 5):

            pytest.skip(f"Test {output} output assumes pandoc >= 2.5")

    if pandoc_version < (2, 0):

        pytest.skip("Test requires pandoc >= 2.0 to support --lua-filter and --csl=URL")

    args = [

        "manubot",

        "cite",

        "--bibliography=input-bibliography.json",

        "--csl=https://github.com/greenelab/manubot-rootstock/raw/e83e51dcd89256403bb787c3d9a46e4ee8d04a9e/build/assets/style.csl",

        "arxiv:1806.05726v1",

        "doi:10.7717/peerj.338",

        "pmid:29618526",

    ] + args

    process = subprocess.run(

        args,

        capture_output=True,

        encoding="utf-8",

        cwd=data_dir,

    )

    print(shlex_join(process.args))

    print(process.stderr)

    assert process.returncode == 0

    print(process.stdout)

    if not path.exists():

        # https://github.com/manubot/manubot/pull/146#discussion_r333132261

        print(

            f"Missing expected output at {path}\n"

            "Writing output to file such that future tests will pass."

        )

        path.write_text(process.stdout, encoding="utf-8")

        raise AssertionError()

    expected = path.read_text(encoding="utf-8-sig")

    assert process.stdout == expected

test_cite_command_stdout

def test_cite_command_stdout(

)

View Source

def test_cite_command_stdout():

    process = subprocess.run(

        ["manubot", "cite", "arxiv:1806.05726v1"],

        capture_output=True,

        encoding="utf-8",

    )

    print(process.stderr)

    assert process.returncode == 0

    (csl,) = json.loads(process.stdout)

    assert csl["URL"] == "https://arxiv.org/abs/1806.05726v1"