Module manubot.cite.tests.test_zotero
View Source
import pytest
from manubot.cite.zotero import export_as_csl, search_query, web_query
def test_web_query():
"""
The translation-server web endpoint can be tested via curl:
```
curl \
--header "Content-Type: text/plain" \
--data 'https://bigthink.com/neurobonkers/a-pirate-bay-for-science' \
'https://translate.manubot.org/web'
```
An outdated installation of translation-server caused the web query for this
URL to be extraordinarily slow but has now been fixed. See
https://github.com/zotero/translation-server/issues/63
"""
url = "https://bigthink.com/neurobonkers/a-pirate-bay-for-science"
zotero_data = web_query(url)
assert isinstance(zotero_data, list)
assert len(zotero_data) == 1
assert zotero_data[0]["title"].startswith("Meet the Robin Hood of Science")
def test_export_as_csl():
"""
CSL export can be tested via curl:
```
curl \
--header "Content-Type: application/json" \
--data '[{"key": "IN22XN53", "itemType": "webpage", "date": "2016-02-09T20:12:00"}]' \
'https://translate.manubot.org/export?format=csljson'
```
"""
zotero_data = [
{
"key": "IN22XN53",
"version": 0,
"itemType": "webpage",
"creators": [],
"tags": [],
"title": "Meet the Robin Hood of Science",
"websiteTitle": "Big Think",
"date": "2016-02-09T20:12:00",
"url": "https://bigthink.com/neurobonkers/a-pirate-bay-for-science",
"abstractNote": "How one researcher created a pirate bay for science more powerful than even libraries at top universities.",
"language": "en",
"accessDate": "2018-12-06T20:10:14Z",
}
]
csl_item = export_as_csl(zotero_data)[0]
assert csl_item["title"] == "Meet the Robin Hood of Science"
assert csl_item["container-title"] == "Big Think"
def test_web_query_returns_single_result_legacy_manubot_url():
"""
Check that single=1 is specified for web queries. Without this, Zotero
can prefer translators that return multiple choices. This occurs with legacy
Manubot mansucripts, which get assigned the DOI translator as top priority.
https://github.com/zotero/translation-server/issues/65
```
curl \
--header "Content-Type: text/plain" \
--data 'https://greenelab.github.io/scihub-manuscript/v/cfe599e25405d38092bf972b6ea1c9e0dcf3deb9/' \
'https://translate.manubot.org/web?single=1'
```
"""
url = "https://greenelab.github.io/scihub-manuscript/v/cfe599e25405d38092bf972b6ea1c9e0dcf3deb9/"
zotero_metadata = web_query(url)
assert isinstance(zotero_metadata, list)
assert len(zotero_metadata) == 1
(zotero_metadata,) = zotero_metadata
assert (
zotero_metadata["title"]
== "Sci-Hub provides access to nearly all scholarly literature"
)
def test_web_query_returns_single_result_pubmed_url():
"""
See test_web_query_returns_single_result_legacy_manubot_url docstring.
```
curl \
--header "Content-Type: text/plain" \
--data 'https://www.ncbi.nlm.nih.gov/pubmed/?term=sci-hub%5Btitle%5D' \
'https://translate.manubot.org/web?single=1'
```
"""
url = "https://www.ncbi.nlm.nih.gov/pubmed/?term=sci-hub%5Btitle%5D"
zotero_metadata = web_query(url)
assert isinstance(zotero_metadata, list)
assert len(zotero_metadata) == 1
(zotero_metadata,) = zotero_metadata
assert zotero_metadata["title"].startswith("sci-hub[title]")
def test_search_query_isbn():
"""
The translation-server search endpoint can be tested via curl:
```
curl \
--header "Content-Type: text/plain" \
--data 'isbn:9781339919881' \
'https://translate.manubot.org/search'
```
"""
identifier = "isbn:9781339919881"
zotero_data = search_query(identifier)
assert zotero_data[0]["title"].startswith("The hetnet awakens")
def test_search_query_arxiv():
"""
Test citing https://arxiv.org/abs/1604.05363v1
The translation-server search endpoint can be tested via curl:
```
curl --verbose \
--header "Content-Type: text/plain" \
--data 'arxiv:1604.05363v1' \
'https://translate.manubot.org/search'
```
"""
identifier = "arxiv:1604.05363v1"
zotero_data = search_query(identifier)
assert (
zotero_data[0]["title"]
== "Comparing Published Scientific Journal Articles to Their Pre-print Versions"
)
assert zotero_data[0]["creators"][-1]["firstName"] == "Todd"
assert zotero_data[0]["date"] == "2016-04-18"
@pytest.mark.parametrize(
"identifier",
[
"30571677", # https://www.ncbi.nlm.nih.gov/pubmed/30571677
"doi:10.1371/journal.pcbi.1006561", # https://doi.org/10.1371/journal.pcbi.1006561
],
)
def test_search_query(identifier):
"""
The translation-server search endpoint can be tested via curl:
```
curl --verbose \
--header "Content-Type: text/plain" \
--data '30571677' \
'https://translate.manubot.org/search'
```
translation-server does not support PMIDs with a `pmid:` prefix.
https://github.com/zotero/translation-server/issues/71
"""
zotero_data = search_query(identifier)
assert zotero_data[0]["title"].startswith(
"Ten simple rules for documenting scientific software"
)
assert zotero_data[0]["creators"][0]["lastName"] == "Lee"
Functions
test_export_as_csl
def test_export_as_csl(
)
CSL export can be tested via curl:
curl --header "Content-Type: application/json" --data '[{"key": "IN22XN53", "itemType": "webpage", "date": "2016-02-09T20:12:00"}]' 'https://translate.manubot.org/export?format=csljson'
View Source
def test_export_as_csl():
"""
CSL export can be tested via curl:
```
curl \
--header "Content-Type: application/json" \
--data '[{"key": "IN22XN53", "itemType": "webpage", "date": "2016-02-09T20:12:00"}]' \
'https://translate.manubot.org/export?format=csljson'
```
"""
zotero_data = [
{
"key": "IN22XN53",
"version": 0,
"itemType": "webpage",
"creators": [],
"tags": [],
"title": "Meet the Robin Hood of Science",
"websiteTitle": "Big Think",
"date": "2016-02-09T20:12:00",
"url": "https://bigthink.com/neurobonkers/a-pirate-bay-for-science",
"abstractNote": "How one researcher created a pirate bay for science more powerful than even libraries at top universities.",
"language": "en",
"accessDate": "2018-12-06T20:10:14Z",
}
]
csl_item = export_as_csl(zotero_data)[0]
assert csl_item["title"] == "Meet the Robin Hood of Science"
assert csl_item["container-title"] == "Big Think"
test_search_query
def test_search_query(
identifier
)
The translation-server search endpoint can be tested via curl:
curl --verbose --header "Content-Type: text/plain" --data '30571677' 'https://translate.manubot.org/search'
translation-server does not support PMIDs with a pmid:
prefix.
https://github.com/zotero/translation-server/issues/71
View Source
@pytest.mark.parametrize(
"identifier",
[
"30571677", # https://www.ncbi.nlm.nih.gov/pubmed/30571677
"doi:10.1371/journal.pcbi.1006561", # https://doi.org/10.1371/journal.pcbi.1006561
],
)
def test_search_query(identifier):
"""
The translation-server search endpoint can be tested via curl:
```
curl --verbose \
--header "Content-Type: text/plain" \
--data '30571677' \
'https://translate.manubot.org/search'
```
translation-server does not support PMIDs with a `pmid:` prefix.
https://github.com/zotero/translation-server/issues/71
"""
zotero_data = search_query(identifier)
assert zotero_data[0]["title"].startswith(
"Ten simple rules for documenting scientific software"
)
assert zotero_data[0]["creators"][0]["lastName"] == "Lee"
test_search_query_arxiv
def test_search_query_arxiv(
)
Test citing https://arxiv.org/abs/1604.05363v1
The translation-server search endpoint can be tested via curl:
curl --verbose --header "Content-Type: text/plain" --data 'arxiv:1604.05363v1' 'https://translate.manubot.org/search'
View Source
def test_search_query_arxiv():
"""
Test citing https://arxiv.org/abs/1604.05363v1
The translation-server search endpoint can be tested via curl:
```
curl --verbose \
--header "Content-Type: text/plain" \
--data 'arxiv:1604.05363v1' \
'https://translate.manubot.org/search'
```
"""
identifier = "arxiv:1604.05363v1"
zotero_data = search_query(identifier)
assert (
zotero_data[0]["title"]
== "Comparing Published Scientific Journal Articles to Their Pre-print Versions"
)
assert zotero_data[0]["creators"][-1]["firstName"] == "Todd"
assert zotero_data[0]["date"] == "2016-04-18"
test_search_query_isbn
def test_search_query_isbn(
)
The translation-server search endpoint can be tested via curl:
curl --header "Content-Type: text/plain" --data 'isbn:9781339919881' 'https://translate.manubot.org/search'
View Source
def test_search_query_isbn():
"""
The translation-server search endpoint can be tested via curl:
```
curl \
--header "Content-Type: text/plain" \
--data 'isbn:9781339919881' \
'https://translate.manubot.org/search'
```
"""
identifier = "isbn:9781339919881"
zotero_data = search_query(identifier)
assert zotero_data[0]["title"].startswith("The hetnet awakens")
test_web_query
def test_web_query(
)
The translation-server web endpoint can be tested via curl:
curl --header "Content-Type: text/plain" --data 'https://bigthink.com/neurobonkers/a-pirate-bay-for-science' 'https://translate.manubot.org/web'
An outdated installation of translation-server caused the web query for this URL to be extraordinarily slow but has now been fixed. See https://github.com/zotero/translation-server/issues/63
View Source
def test_web_query():
"""
The translation-server web endpoint can be tested via curl:
```
curl \
--header "Content-Type: text/plain" \
--data 'https://bigthink.com/neurobonkers/a-pirate-bay-for-science' \
'https://translate.manubot.org/web'
```
An outdated installation of translation-server caused the web query for this
URL to be extraordinarily slow but has now been fixed. See
https://github.com/zotero/translation-server/issues/63
"""
url = "https://bigthink.com/neurobonkers/a-pirate-bay-for-science"
zotero_data = web_query(url)
assert isinstance(zotero_data, list)
assert len(zotero_data) == 1
assert zotero_data[0]["title"].startswith("Meet the Robin Hood of Science")
test_web_query_returns_single_result_legacy_manubot_url
def test_web_query_returns_single_result_legacy_manubot_url(
)
Check that single=1 is specified for web queries. Without this, Zotero
can prefer translators that return multiple choices. This occurs with legacy Manubot mansucripts, which get assigned the DOI translator as top priority. https://github.com/zotero/translation-server/issues/65
curl --header "Content-Type: text/plain" --data 'https://greenelab.github.io/scihub-manuscript/v/cfe599e25405d38092bf972b6ea1c9e0dcf3deb9/' 'https://translate.manubot.org/web?single=1'
View Source
def test_web_query_returns_single_result_legacy_manubot_url():
"""
Check that single=1 is specified for web queries. Without this, Zotero
can prefer translators that return multiple choices. This occurs with legacy
Manubot mansucripts, which get assigned the DOI translator as top priority.
https://github.com/zotero/translation-server/issues/65
```
curl \
--header "Content-Type: text/plain" \
--data 'https://greenelab.github.io/scihub-manuscript/v/cfe599e25405d38092bf972b6ea1c9e0dcf3deb9/' \
'https://translate.manubot.org/web?single=1'
```
"""
url = "https://greenelab.github.io/scihub-manuscript/v/cfe599e25405d38092bf972b6ea1c9e0dcf3deb9/"
zotero_metadata = web_query(url)
assert isinstance(zotero_metadata, list)
assert len(zotero_metadata) == 1
(zotero_metadata,) = zotero_metadata
assert (
zotero_metadata["title"]
== "Sci-Hub provides access to nearly all scholarly literature"
)
test_web_query_returns_single_result_pubmed_url
def test_web_query_returns_single_result_pubmed_url(
)
See test_web_query_returns_single_result_legacy_manubot_url docstring.
curl --header "Content-Type: text/plain" --data 'https://www.ncbi.nlm.nih.gov/pubmed/?term=sci-hub%5Btitle%5D' 'https://translate.manubot.org/web?single=1'
View Source
def test_web_query_returns_single_result_pubmed_url():
"""
See test_web_query_returns_single_result_legacy_manubot_url docstring.
```
curl \
--header "Content-Type: text/plain" \
--data 'https://www.ncbi.nlm.nih.gov/pubmed/?term=sci-hub%5Btitle%5D' \
'https://translate.manubot.org/web?single=1'
```
"""
url = "https://www.ncbi.nlm.nih.gov/pubmed/?term=sci-hub%5Btitle%5D"
zotero_metadata = web_query(url)
assert isinstance(zotero_metadata, list)
assert len(zotero_metadata) == 1
(zotero_metadata,) = zotero_metadata
assert zotero_metadata["title"].startswith("sci-hub[title]")