Skip to content

Module manubot.cite.tests.test_url

View Source
import pytest

from manubot.cite.url import get_url_csl_item_zotero

@pytest.mark.xfail(

    reason="Fails due to ratelimiting https://github.com/zotero/translation-server/issues/133"

)

def test_get_url_csl_item_zotero_nyt():

    """

    This command creates two translation-server queries.

    The first query is equivalent to:

    ```

    curl --verbose \

      --header "Content-Type: text/plain" \

      --data 'https://nyti.ms/1NuB0WJ' \

      'https://translate.manubot.org/web'

    ```

    Can fail due to NYT ratelimiting, see

    https://github.com/zotero/translation-server/issues/133

    """

    url = "https://nyti.ms/1NuB0WJ"

    csl_item = get_url_csl_item_zotero(url)

    assert csl_item["title"].startswith(

        "Unraveling the Ties of Altitude, Oxygen and Lung Cancer"

    )

    assert csl_item["author"][0]["family"] == "Johnson"

def test_get_url_csl_item_zotero_manubot():

    """

    This command creates two translation-server queries. The first query is

    equivalent to:

    ```

    curl --verbose \

      --header "Content-Type: text/plain" \

      --data 'https://greenelab.github.io/meta-review/v/0770300e1d5490a1ae8ff3a85ddca2cdc4ae0613/' \

      'https://translate.manubot.org/web'

    ```

    """

    url = "https://greenelab.github.io/meta-review/v/0770300e1d5490a1ae8ff3a85ddca2cdc4ae0613/"

    csl_item = get_url_csl_item_zotero(url)

    assert csl_item["title"] == "Open collaborative writing with Manubot"

    assert csl_item["author"][1]["family"] == "Slochower"

    # Zotero CSL exporter returns mixed string/int date-parts

    # https://github.com/zotero/zotero/issues/1603

    assert [int(x) for x in csl_item["issued"]["date-parts"][0]] == [2018, 12, 18]

@pytest.mark.skip(

    reason="test intermittently fails as metadata varies between two states"

)

def test_get_url_csl_item_zotero_github():

    """

    This command creates two translation-server queries. The first query is

    equivalent to:

    ```

    curl --verbose \

      --header "Content-Type: text/plain" \

      --data 'https://github.com/pandas-dev/pandas/tree/d5e5bf761092c59eeb9b8750f05f2bc29fb45927' \

      'https://translate.manubot.org/web'

    ```

    Note: this test may have temporary failures, due to performance of

          translation-server. It seems that sometimes translation-server

          returns a different title for the same URL. A real mystery.

    See also:

        https://github.com/manubot/manubot/pull/139#discussion_r328703233

    Proposed action:

        Probably should inquire upstream or change the test.

    """

    url = "https://github.com/pandas-dev/pandas/tree/d5e5bf761092c59eeb9b8750f05f2bc29fb45927"

    csl_item = get_url_csl_item_zotero(url)

    # FIXME: arbitrarily, csl_item['abstract'], and not csl_item['title'] contains the title.

    assert csl_item["title"].startswith("Flexible and powerful data analysis")

    assert csl_item["source"] == "GitHub"

def test_get_url_csl_item_zotero_no_url(monkeypatch):

    """

    Ensure get_url_csl_item_zotero sets URL to the query URL,

    when the Zotero translator does not return it.

    https://github.com/manubot/manubot/issues/244

    """

    query_url = "http://icbo2016.cgrb.oregonstate.edu/node/251"

    def mock_web_query(url: str):

        assert url == query_url

        return [

            {

                "key": "J86G3MS7",

                "version": 0,

                "itemType": "webpage",

                "creators": [

                    {

                        "firstName": "Senay",

                        "lastName": "Kafkas",

                        "creatorType": "author",

                    },

                    {"firstName": "Ian", "lastName": "Dunham", "creatorType": "author"},

                    {

                        "firstName": "Helen",

                        "lastName": "Parkinson",

                        "creatorType": "author",

                    },

                    {

                        "firstName": "Johanna",

                        "lastName": "Mcentyre",

                        "creatorType": "author",

                    },

                ],

                "tags": [],

                "title": "BIT106: Use of text mining for Experimental Factor Ontology coverage expansion in the scope of target validation",

                "date": "2016",

                "shortTitle": "BIT106",

            }

        ]

    monkeypatch.setattr("manubot.cite.zotero.web_query", mock_web_query)

    csl_item = get_url_csl_item_zotero(query_url)

    assert "URL" in csl_item

    assert csl_item["URL"] == query_url

Functions

test_get_url_csl_item_zotero_github

def test_get_url_csl_item_zotero_github(

)

This command creates two translation-server queries. The first query is

equivalent to:

curl --verbose       --header "Content-Type: text/plain"       --data 'https://github.com/pandas-dev/pandas/tree/d5e5bf761092c59eeb9b8750f05f2bc29fb45927'       'https://translate.manubot.org/web'

Note: this test may have temporary failures, due to performance of translation-server. It seems that sometimes translation-server returns a different title for the same URL. A real mystery.

See also: https://github.com/manubot/manubot/pull/139#discussion_r328703233

Proposed action: Probably should inquire upstream or change the test.

View Source
@pytest.mark.skip(

    reason="test intermittently fails as metadata varies between two states"

)

def test_get_url_csl_item_zotero_github():

    """

    This command creates two translation-server queries. The first query is

    equivalent to:

    ```

    curl --verbose \

      --header "Content-Type: text/plain" \

      --data 'https://github.com/pandas-dev/pandas/tree/d5e5bf761092c59eeb9b8750f05f2bc29fb45927' \

      'https://translate.manubot.org/web'

    ```

    Note: this test may have temporary failures, due to performance of

          translation-server. It seems that sometimes translation-server

          returns a different title for the same URL. A real mystery.

    See also:

        https://github.com/manubot/manubot/pull/139#discussion_r328703233

    Proposed action:

        Probably should inquire upstream or change the test.

    """

    url = "https://github.com/pandas-dev/pandas/tree/d5e5bf761092c59eeb9b8750f05f2bc29fb45927"

    csl_item = get_url_csl_item_zotero(url)

    # FIXME: arbitrarily, csl_item['abstract'], and not csl_item['title'] contains the title.

    assert csl_item["title"].startswith("Flexible and powerful data analysis")

    assert csl_item["source"] == "GitHub"

test_get_url_csl_item_zotero_manubot

def test_get_url_csl_item_zotero_manubot(

)

This command creates two translation-server queries. The first query is

equivalent to:

curl --verbose       --header "Content-Type: text/plain"       --data 'https://greenelab.github.io/meta-review/v/0770300e1d5490a1ae8ff3a85ddca2cdc4ae0613/'       'https://translate.manubot.org/web'
View Source
def test_get_url_csl_item_zotero_manubot():

    """

    This command creates two translation-server queries. The first query is

    equivalent to:

    ```

    curl --verbose \

      --header "Content-Type: text/plain" \

      --data 'https://greenelab.github.io/meta-review/v/0770300e1d5490a1ae8ff3a85ddca2cdc4ae0613/' \

      'https://translate.manubot.org/web'

    ```

    """

    url = "https://greenelab.github.io/meta-review/v/0770300e1d5490a1ae8ff3a85ddca2cdc4ae0613/"

    csl_item = get_url_csl_item_zotero(url)

    assert csl_item["title"] == "Open collaborative writing with Manubot"

    assert csl_item["author"][1]["family"] == "Slochower"

    # Zotero CSL exporter returns mixed string/int date-parts

    # https://github.com/zotero/zotero/issues/1603

    assert [int(x) for x in csl_item["issued"]["date-parts"][0]] == [2018, 12, 18]

test_get_url_csl_item_zotero_no_url

def test_get_url_csl_item_zotero_no_url(
    monkeypatch
)

Ensure get_url_csl_item_zotero sets URL to the query URL,

when the Zotero translator does not return it. https://github.com/manubot/manubot/issues/244

View Source
def test_get_url_csl_item_zotero_no_url(monkeypatch):

    """

    Ensure get_url_csl_item_zotero sets URL to the query URL,

    when the Zotero translator does not return it.

    https://github.com/manubot/manubot/issues/244

    """

    query_url = "http://icbo2016.cgrb.oregonstate.edu/node/251"

    def mock_web_query(url: str):

        assert url == query_url

        return [

            {

                "key": "J86G3MS7",

                "version": 0,

                "itemType": "webpage",

                "creators": [

                    {

                        "firstName": "Senay",

                        "lastName": "Kafkas",

                        "creatorType": "author",

                    },

                    {"firstName": "Ian", "lastName": "Dunham", "creatorType": "author"},

                    {

                        "firstName": "Helen",

                        "lastName": "Parkinson",

                        "creatorType": "author",

                    },

                    {

                        "firstName": "Johanna",

                        "lastName": "Mcentyre",

                        "creatorType": "author",

                    },

                ],

                "tags": [],

                "title": "BIT106: Use of text mining for Experimental Factor Ontology coverage expansion in the scope of target validation",

                "date": "2016",

                "shortTitle": "BIT106",

            }

        ]

    monkeypatch.setattr("manubot.cite.zotero.web_query", mock_web_query)

    csl_item = get_url_csl_item_zotero(query_url)

    assert "URL" in csl_item

    assert csl_item["URL"] == query_url

test_get_url_csl_item_zotero_nyt

def test_get_url_csl_item_zotero_nyt(

)

This command creates two translation-server queries.

The first query is equivalent to:

curl --verbose       --header "Content-Type: text/plain"       --data 'https://nyti.ms/1NuB0WJ'       'https://translate.manubot.org/web'

Can fail due to NYT ratelimiting, see https://github.com/zotero/translation-server/issues/133

View Source
@pytest.mark.xfail(

    reason="Fails due to ratelimiting https://github.com/zotero/translation-server/issues/133"

)

def test_get_url_csl_item_zotero_nyt():

    """

    This command creates two translation-server queries.

    The first query is equivalent to:

    ```

    curl --verbose \

      --header "Content-Type: text/plain" \

      --data 'https://nyti.ms/1NuB0WJ' \

      'https://translate.manubot.org/web'

    ```

    Can fail due to NYT ratelimiting, see

    https://github.com/zotero/translation-server/issues/133

    """

    url = "https://nyti.ms/1NuB0WJ"

    csl_item = get_url_csl_item_zotero(url)

    assert csl_item["title"].startswith(

        "Unraveling the Ties of Altitude, Oxygen and Lung Cancer"

    )

    assert csl_item["author"][0]["family"] == "Johnson"