Skip to content

Module manubot.cite.tests.test_url

View Source
import pytest

from manubot.cite.url import get_url_csl_item_zotero


    reason="Fails due to ratelimiting"


def test_get_url_csl_item_zotero_nyt():


    This command creates two translation-server queries.

    The first query is equivalent to:


    curl --verbose \

      --header "Content-Type: text/plain" \

      --data '' \



    Can fail due to NYT ratelimiting, see


    url = ""

    csl_item = get_url_csl_item_zotero(url)

    assert csl_item["title"].startswith(

        "Unraveling the Ties of Altitude, Oxygen and Lung Cancer"


    assert csl_item["author"][0]["family"] == "Johnson"

def test_get_url_csl_item_zotero_manubot():


    This command creates two translation-server queries. The first query is

    equivalent to:


    curl --verbose \

      --header "Content-Type: text/plain" \

      --data '' \




    url = ""

    csl_item = get_url_csl_item_zotero(url)

    assert csl_item["title"] == "Open collaborative writing with Manubot"

    assert csl_item["author"][1]["family"] == "Slochower"

    # Zotero CSL exporter returns mixed string/int date-parts


    assert [int(x) for x in csl_item["issued"]["date-parts"][0]] == [2018, 12, 18]


    reason="test intermittently fails as metadata varies between two states"


def test_get_url_csl_item_zotero_github():


    This command creates two translation-server queries. The first query is

    equivalent to:


    curl --verbose \

      --header "Content-Type: text/plain" \

      --data '' \



    Note: this test may have temporary failures, due to performance of

          translation-server. It seems that sometimes translation-server

          returns a different title for the same URL. A real mystery.

    See also:

    Proposed action:

        Probably should inquire upstream or change the test.


    url = ""

    csl_item = get_url_csl_item_zotero(url)

    # FIXME: arbitrarily, csl_item['abstract'], and not csl_item['title'] contains the title.

    assert csl_item["title"].startswith("Flexible and powerful data analysis")

    assert csl_item["source"] == "GitHub"

def test_get_url_csl_item_zotero_no_url(monkeypatch):


    Ensure get_url_csl_item_zotero sets URL to the query URL,

    when the Zotero translator does not return it.


    query_url = ""

    def mock_web_query(url: str):

        assert url == query_url

        return [


                "key": "J86G3MS7",

                "version": 0,

                "itemType": "webpage",

                "creators": [


                        "firstName": "Senay",

                        "lastName": "Kafkas",

                        "creatorType": "author",


                    {"firstName": "Ian", "lastName": "Dunham", "creatorType": "author"},


                        "firstName": "Helen",

                        "lastName": "Parkinson",

                        "creatorType": "author",



                        "firstName": "Johanna",

                        "lastName": "Mcentyre",

                        "creatorType": "author",



                "tags": [],

                "title": "BIT106: Use of text mining for Experimental Factor Ontology coverage expansion in the scope of target validation",

                "date": "2016",

                "shortTitle": "BIT106",



    monkeypatch.setattr("manubot.cite.zotero.web_query", mock_web_query)

    csl_item = get_url_csl_item_zotero(query_url)

    assert "URL" in csl_item

    assert csl_item["URL"] == query_url



def test_get_url_csl_item_zotero_github(


This command creates two translation-server queries. The first query is

equivalent to:

curl --verbose       --header "Content-Type: text/plain"       --data ''       ''

Note: this test may have temporary failures, due to performance of translation-server. It seems that sometimes translation-server returns a different title for the same URL. A real mystery.

See also:

Proposed action: Probably should inquire upstream or change the test.

View Source

    reason="test intermittently fails as metadata varies between two states"


def test_get_url_csl_item_zotero_github():


    This command creates two translation-server queries. The first query is

    equivalent to:


    curl --verbose \

      --header "Content-Type: text/plain" \

      --data '' \



    Note: this test may have temporary failures, due to performance of

          translation-server. It seems that sometimes translation-server

          returns a different title for the same URL. A real mystery.

    See also:

    Proposed action:

        Probably should inquire upstream or change the test.


    url = ""

    csl_item = get_url_csl_item_zotero(url)

    # FIXME: arbitrarily, csl_item['abstract'], and not csl_item['title'] contains the title.

    assert csl_item["title"].startswith("Flexible and powerful data analysis")

    assert csl_item["source"] == "GitHub"


def test_get_url_csl_item_zotero_manubot(


This command creates two translation-server queries. The first query is

equivalent to:

curl --verbose       --header "Content-Type: text/plain"       --data ''       ''
View Source
def test_get_url_csl_item_zotero_manubot():


    This command creates two translation-server queries. The first query is

    equivalent to:


    curl --verbose \

      --header "Content-Type: text/plain" \

      --data '' \




    url = ""

    csl_item = get_url_csl_item_zotero(url)

    assert csl_item["title"] == "Open collaborative writing with Manubot"

    assert csl_item["author"][1]["family"] == "Slochower"

    # Zotero CSL exporter returns mixed string/int date-parts


    assert [int(x) for x in csl_item["issued"]["date-parts"][0]] == [2018, 12, 18]


def test_get_url_csl_item_zotero_no_url(

Ensure get_url_csl_item_zotero sets URL to the query URL,

when the Zotero translator does not return it.

View Source
def test_get_url_csl_item_zotero_no_url(monkeypatch):


    Ensure get_url_csl_item_zotero sets URL to the query URL,

    when the Zotero translator does not return it.


    query_url = ""

    def mock_web_query(url: str):

        assert url == query_url

        return [


                "key": "J86G3MS7",

                "version": 0,

                "itemType": "webpage",

                "creators": [


                        "firstName": "Senay",

                        "lastName": "Kafkas",

                        "creatorType": "author",


                    {"firstName": "Ian", "lastName": "Dunham", "creatorType": "author"},


                        "firstName": "Helen",

                        "lastName": "Parkinson",

                        "creatorType": "author",



                        "firstName": "Johanna",

                        "lastName": "Mcentyre",

                        "creatorType": "author",



                "tags": [],

                "title": "BIT106: Use of text mining for Experimental Factor Ontology coverage expansion in the scope of target validation",

                "date": "2016",

                "shortTitle": "BIT106",



    monkeypatch.setattr("manubot.cite.zotero.web_query", mock_web_query)

    csl_item = get_url_csl_item_zotero(query_url)

    assert "URL" in csl_item

    assert csl_item["URL"] == query_url


def test_get_url_csl_item_zotero_nyt(


This command creates two translation-server queries.

The first query is equivalent to:

curl --verbose       --header "Content-Type: text/plain"       --data ''       ''

Can fail due to NYT ratelimiting, see

View Source

    reason="Fails due to ratelimiting"


def test_get_url_csl_item_zotero_nyt():


    This command creates two translation-server queries.

    The first query is equivalent to:


    curl --verbose \

      --header "Content-Type: text/plain" \

      --data '' \



    Can fail due to NYT ratelimiting, see


    url = ""

    csl_item = get_url_csl_item_zotero(url)

    assert csl_item["title"].startswith(

        "Unraveling the Ties of Altitude, Oxygen and Lung Cancer"


    assert csl_item["author"][0]["family"] == "Johnson"