Module manubot.cite.zotero
Methods to interact with a Zotero translation-server.
https://github.com/zotero/translation-server
The Manubot team currently hosts a public translation server at https://translate.manubot.org. More information on this instance at https://github.com/manubot/manubot/issues/82.
View Source
"""
Methods to interact with a Zotero translation-server.
https://github.com/zotero/translation-server
The Manubot team currently hosts a public translation server at
https://translate.manubot.org. More information on this instance at
https://github.com/manubot/manubot/issues/82.
"""
import json
import logging
from typing import Any, Dict, List
import requests
from manubot.util import get_manubot_user_agent, is_http_url
ZoteroRecord = Dict[str, Any]
ZoteroData = List[ZoteroRecord]
# for the purposes of this module, the CSL Items and Zotero Data have the same type
CSLItem = ZoteroRecord
CSLItems = ZoteroData
base_url = "https://translate.manubot.org"
"""URL that provides access to the Zotero translation-server API"""
def web_query(url: str) -> ZoteroData:
"""
Return Zotero citation metadata for a URL as a list containing a single element that
is a dictionary with the URL's metadata.
"""
headers = {"User-Agent": get_manubot_user_agent(), "Content-Type": "text/plain"}
params = {"single": 1}
api_url = f"{base_url}/web"
response = requests.post(api_url, params=params, headers=headers, data=str(url))
try:
zotero_data = response.json()
except Exception as error:
logging.warning(
f"Error parsing web_query output as JSON for {url}:\n{response.text}"
)
raise error
if response.status_code == 300:
# When single=1 is specified, multiple results should never be returned
logging.warning(
f"web_query returned multiple results for {url}:\n"
+ json.dumps(zotero_data, indent=2)
)
raise ValueError(f"multiple results for {url}")
zotero_data = _passthrough_zotero_data(zotero_data)
return zotero_data
def search_query(identifier: str) -> ZoteroData:
"""
Retrive Zotero metadata for a DOI, ISBN, PMID, or arXiv ID.
Example usage:
```shell
curl --silent \
--data '10.2307/4486062' \
--header 'Content-Type: text/plain' \
http://127.0.0.1:1969/search
```
"""
api_url = f"{base_url}/search"
headers = {"User-Agent": get_manubot_user_agent(), "Content-Type": "text/plain"}
response = requests.post(api_url, headers=headers, data=str(identifier))
try:
zotero_data = response.json()
except Exception as error:
logging.warning(
f"Error parsing search_query output as JSON for {identifier}:\n{response.text}"
)
raise error
zotero_data = _passthrough_zotero_data(zotero_data)
return zotero_data
def _passthrough_zotero_data(zotero_data: ZoteroData) -> ZoteroData:
"""
Address known issues with Zotero metadata.
Assumes zotero data should contain a single bibliographic record.
"""
if not isinstance(zotero_data, list):
raise ValueError("_passthrough_zotero_data: zotero_data should be a list")
if len(zotero_data) > 1:
# Sometimes translation-server creates multiple data items for a single record.
# If so, keep only the parent item, and remove child items (such as notes).
# https://github.com/zotero/translation-server/issues/67
zotero_data = zotero_data[:1]
return zotero_data
def export_as_csl(zotero_data: ZoteroData) -> CSLItems:
"""
Export Zotero JSON data to CSL JSON using a translation-server /export query.
Performs a similar query to the following curl command:
```
curl --verbose \
--data @items.json \
--header 'Content-Type: application/json' \
'https://translate.manubot.org/export?format=csljson'
```
"""
api_url = f"{base_url}/export"
params = {"format": "csljson"}
headers = {"User-Agent": get_manubot_user_agent()}
response = requests.post(api_url, params=params, headers=headers, json=zotero_data)
if not response.ok:
message = f"export_as_csl: translation-server returned status code {response.status_code}"
logging.warning(f"{message} with the following output:\n{response.text}")
raise requests.HTTPError(message)
try:
csl_items = response.json()
except Exception as error:
logging.warning(f"Error parsing export_as_csl output as JSON:\n{response.text}")
raise error
return csl_items
def get_csl_item(identifier: str) -> CSLItem:
"""
Use a translation-server search query followed by an export query
to return a CSL Item (the first & only record of the returned CSL JSON).
"""
zotero_data = search_query(identifier)
csl_items = export_as_csl(zotero_data)
(csl_item,) = csl_items
return csl_item
def search_or_web_query(identifier: str) -> ZoteroData:
"""
Detect whether `identifier` is a URL. If so,
retrieve zotero metadata using a /web query.
Otherwise, retrieve zotero metadata using a /search query.
"""
if is_http_url(identifier):
zotero_data = web_query(identifier)
else:
zotero_data = search_query(identifier)
return zotero_data
Variables
CSLItem
CSLItems
ZoteroData
ZoteroRecord
base_url
URL that provides access to the Zotero translation-server API
Functions
export_as_csl
def export_as_csl(
zotero_data: List[Dict[str, Any]]
) -> List[Dict[str, Any]]
Export Zotero JSON data to CSL JSON using a translation-server /export query.
Performs a similar query to the following curl command:
curl --verbose --data @items.json --header 'Content-Type: application/json' 'https://translate.manubot.org/export?format=csljson'
View Source
def export_as_csl(zotero_data: ZoteroData) -> CSLItems:
"""
Export Zotero JSON data to CSL JSON using a translation-server /export query.
Performs a similar query to the following curl command:
```
curl --verbose \
--data @items.json \
--header 'Content-Type: application/json' \
'https://translate.manubot.org/export?format=csljson'
```
"""
api_url = f"{base_url}/export"
params = {"format": "csljson"}
headers = {"User-Agent": get_manubot_user_agent()}
response = requests.post(api_url, params=params, headers=headers, json=zotero_data)
if not response.ok:
message = f"export_as_csl: translation-server returned status code {response.status_code}"
logging.warning(f"{message} with the following output:\n{response.text}")
raise requests.HTTPError(message)
try:
csl_items = response.json()
except Exception as error:
logging.warning(f"Error parsing export_as_csl output as JSON:\n{response.text}")
raise error
return csl_items
get_csl_item
def get_csl_item(
identifier: str
) -> Dict[str, Any]
Use a translation-server search query followed by an export query
to return a CSL Item (the first & only record of the returned CSL JSON).
View Source
def get_csl_item(identifier: str) -> CSLItem:
"""
Use a translation-server search query followed by an export query
to return a CSL Item (the first & only record of the returned CSL JSON).
"""
zotero_data = search_query(identifier)
csl_items = export_as_csl(zotero_data)
(csl_item,) = csl_items
return csl_item
search_or_web_query
def search_or_web_query(
identifier: str
) -> List[Dict[str, Any]]
Detect whether identifier
is a URL. If so,
retrieve zotero metadata using a /web query. Otherwise, retrieve zotero metadata using a /search query.
View Source
def search_or_web_query(identifier: str) -> ZoteroData:
"""
Detect whether `identifier` is a URL. If so,
retrieve zotero metadata using a /web query.
Otherwise, retrieve zotero metadata using a /search query.
"""
if is_http_url(identifier):
zotero_data = web_query(identifier)
else:
zotero_data = search_query(identifier)
return zotero_data
search_query
def search_query(
identifier: str
) -> List[Dict[str, Any]]
Retrive Zotero metadata for a DOI, ISBN, PMID, or arXiv ID.
Example usage:
curl --silent --data '10.2307/4486062' --header 'Content-Type: text/plain' http://127.0.0.1:1969/search
View Source
def search_query(identifier: str) -> ZoteroData:
"""
Retrive Zotero metadata for a DOI, ISBN, PMID, or arXiv ID.
Example usage:
```shell
curl --silent \
--data '10.2307/4486062' \
--header 'Content-Type: text/plain' \
http://127.0.0.1:1969/search
```
"""
api_url = f"{base_url}/search"
headers = {"User-Agent": get_manubot_user_agent(), "Content-Type": "text/plain"}
response = requests.post(api_url, headers=headers, data=str(identifier))
try:
zotero_data = response.json()
except Exception as error:
logging.warning(
f"Error parsing search_query output as JSON for {identifier}:\n{response.text}"
)
raise error
zotero_data = _passthrough_zotero_data(zotero_data)
return zotero_data
web_query
def web_query(
url: str
) -> List[Dict[str, Any]]
Return Zotero citation metadata for a URL as a list containing a single element that
is a dictionary with the URL's metadata.
View Source
def web_query(url: str) -> ZoteroData:
"""
Return Zotero citation metadata for a URL as a list containing a single element that
is a dictionary with the URL's metadata.
"""
headers = {"User-Agent": get_manubot_user_agent(), "Content-Type": "text/plain"}
params = {"single": 1}
api_url = f"{base_url}/web"
response = requests.post(api_url, params=params, headers=headers, data=str(url))
try:
zotero_data = response.json()
except Exception as error:
logging.warning(
f"Error parsing web_query output as JSON for {url}:\n{response.text}"
)
raise error
if response.status_code == 300:
# When single=1 is specified, multiple results should never be returned
logging.warning(
f"web_query returned multiple results for {url}:\n"
+ json.dumps(zotero_data, indent=2)
)
raise ValueError(f"multiple results for {url}")
zotero_data = _passthrough_zotero_data(zotero_data)
return zotero_data