Module manubot.cite.unpaywall
Utilities for accessing https://unpaywall.org/ data to provide access
information for DOIs. Also supports identifier sources not directly supported by Unpaywall, such as arXiv IDs.
View Source
"""
Utilities for accessing <https://unpaywall.org/> data to provide access
information for DOIs. Also supports identifier sources not directly
supported by Unpaywall, such as arXiv IDs.
"""
import abc
import requests
"""
Unpaywall license choices used by Location.has_open_license.
Defaults to licenses that conform to <https://opendefinition.org/>.
"""
open_licenses = {"cc0", "cc-by", "cc-by-sa", "pd"}
class Unpaywall:
"""
A class to handle open access locations in the Unpaywall data format.
Create new Unpaywall objects using the `from_csl_item` and `from_citekey` methods,
or by using __init__ of a subclass like `Unpaywall_DOI`.
"""
csl_item = None
@abc.abstractmethod
def set_oa_locations(self):
"""
Set `self.oa_locations`, which is a list of `Unpaywall_Location` objects.
"""
self.oa_locations = []
@property
def best_openly_licensed_pdf(self) -> "Unpaywall_Location":
for location in self.oa_locations:
if location.has_openly_licensed_pdf:
return location
@property
def best_pdf(self) -> "Unpaywall_Location":
for location in self.oa_locations:
if location.has_pdf:
return location
@staticmethod
def from_citekey(citekey, csl_item=None):
"""
Create an Unpaywall object for `citekey`.
`csl_item` is an optional field that can avoid an
external web request to generate to a new CSL Item.
"""
from .citekey import CiteKey
if isinstance(citekey, str):
citekey = CiteKey(citekey)
if not isinstance(citekey, CiteKey):
raise ValueError("citekey must be a str or CiteKey")
if citekey.standard_prefix in source_to_unpaywaller:
unpaywaller = source_to_unpaywaller[citekey.standard_prefix]
unpaywall = unpaywaller(citekey.standard_accession, set_oa_locations=False)
else:
raise ValueError(
f"Cannot Unpaywall {citekey.input_id}. "
f"Supported citations sources are {', '.join(source_to_unpaywaller)}. "
"Received {citekey.standard_prefix!r}."
)
unpaywall.csl_item = csl_item
unpaywall.set_oa_locations()
return unpaywall
@classmethod
def from_csl_item(cls, csl_item):
"""
Create an Unpaywall object for `csl_item`.
"""
from .csl_item import CSL_Item
csl_item = CSL_Item(csl_item)
doi = csl_item.get("DOI")
if doi:
return cls.from_citekey(f"doi:{doi}", csl_item=csl_item)
csl_item.infer_id()
return cls.from_citekey(csl_item["id"], csl_item=csl_item)
class Unpaywall_DOI(Unpaywall):
"""
From https://unpaywall.org/data-format:
> The DOI object is more or less a row in our main database...
it's everything we know about a given DOI-assigned resource,
including metadata about the resource itself,
and information about its OA status.
It includes a list of zero or more OA Location Objects,
as well as a `best_oa_location` property that's probably the OA Location you'll want to use.
"""
def __init__(self, doi, set_oa_locations=True):
self.doi = doi.lower()
if set_oa_locations:
self.set_oa_locations()
def set_oa_locations(self):
from manubot.util import contact_email
url = f"https://api.unpaywall.org/v2/{self.doi}"
params = {"email": contact_email}
response = requests.get(url, params=params)
response.raise_for_status()
self.results = response.json()
self.oa_locations = [
Unpaywall_Location(location)
for location in self.results.get("oa_locations", [])
]
class Unpaywall_arXiv(Unpaywall):
def __init__(self, arxiv_id, set_oa_locations=True, use_doi=True):
from .arxiv import split_arxiv_id_version
self.arxiv_id = arxiv_id
self.arxiv_id_latest, self.arxiv_id_version = split_arxiv_id_version(arxiv_id)
self.use_doi = use_doi
if set_oa_locations:
self.set_oa_locations()
def set_oa_locations(self):
from .arxiv import get_arxiv_csl_item
if not self.csl_item:
self.csl_item = get_arxiv_csl_item(self.arxiv_id)
doi = self.csl_item.get("DOI")
if self.use_doi and doi:
unpaywall_doi = Unpaywall_DOI(doi)
self.doi = unpaywall_doi.doi
self.oa_locations = unpaywall_doi.oa_locations
return
location = self.location_from_arvix_id()
self.oa_locations = [location]
def location_from_arvix_id(self):
import datetime
url_for_pdf = f"https://arxiv.org/pdf/{self.arxiv_id}.pdf"
location = Unpaywall_Location(
{
"endpoint_id": None,
"evidence": "oa repository",
"host_type": "repository",
"is_best": True,
"license": self.get_license(),
"pmh_id": f"oai:arXiv.org:{self.arxiv_id_latest}",
"repository_institution": "Cornell University - arXiv",
"updated": datetime.datetime.now().isoformat(),
"url": url_for_pdf,
"url_for_landing_page": f"https://arxiv.org/abs/{self.arxiv_id}",
"url_for_pdf": url_for_pdf,
"version": "submittedVersion",
}
)
return location
def get_license(self):
"""
Return license using choices from the Unpaywall data format.
Looks for license metadata in the CSL Item.
"""
license = self.csl_item.note_dict.get("license")
if not license:
return
# Example licenses from https://arxiv.org/help/license
# http://creativecommons.org/publicdomain/zero/1.0/
# http://creativecommons.org/licenses/by/4.0/
# http://creativecommons.org/licenses/by-sa/4.0/
# http://creativecommons.org/licenses/by-nc-sa/4.0/
# http://arxiv.org/licenses/nonexclusive-distrib/1.0/license.html
from urllib.parse import urlparse
parsed_url = urlparse(license)
if not parsed_url.scheme.startswith("http"):
return
if parsed_url.hostname.endswith("creativecommons.org"):
try:
abbrev = parsed_url.path.split("/")[2]
except IndexError:
return
if abbrev == "zero":
return "cc0"
return f"cc-{abbrev}"
source_to_unpaywaller = {
"doi": Unpaywall_DOI,
"arxiv": Unpaywall_arXiv,
}
class Unpaywall_Location(dict):
"""
From https://unpaywall.org/data-format
> The OA Location object describes particular place where we found a given OA article.
The same article is often available from multiple locations,
and there may be differences in format, version, and license depending on the location;
the OA Location object describes these key attributes.
An OA Location Object is always a Child of a DOI Object.
Example oa_locations from the Unpaywall API are:
```json
{
"endpoint_id": null,
"evidence": "open (via page says license)",
"host_type": "publisher",
"is_best": true,
"license": "cc-by",
"pmh_id": null,
"repository_institution": null,
"updated": "2020-01-19T08:55:45.548214",
"url": "https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1007250&type=printable",
"url_for_landing_page": "https://doi.org/10.1371/journal.pcbi.1007250",
"url_for_pdf": "https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1007250&type=printable",
"version": "publishedVersion"
},
{
"endpoint_id": "ca8f8d56758a80a4f86",
"evidence": "oa repository (via OAI-PMH doi match)",
"host_type": "repository",
"is_best": true,
"license": null,
"pmh_id": "oai:arXiv.org:1806.05726",
"repository_institution": "Cornell University - arXiv",
"updated": "2019-11-01T00:28:16.784912",
"url": "http://arxiv.org/pdf/1806.05726",
"url_for_landing_page": "http://arxiv.org/abs/1806.05726",
"url_for_pdf": "http://arxiv.org/pdf/1806.05726",
"version": "submittedVersion"
}
```
"""
@property
def has_pdf(self):
return bool(self.get("url_for_pdf"))
@property
def has_open_license(self):
license = self.get("license")
return license in open_licenses
@property
def has_creative_commons_license(self):
license = self.get("license")
if not license:
return False
return license == "cc0" or license.startswith("cc-")
@property
def has_openly_licensed_pdf(self):
return self.has_pdf and self.has_open_license
Variables
open_licenses
source_to_unpaywaller
Classes
Unpaywall
class Unpaywall(
/,
*args,
**kwargs
)
A class to handle open access locations in the Unpaywall data format.
Create new Unpaywall objects using the from_csl_item
and from_citekey
methods,
or by using init of a subclass like Unpaywall_DOI
.
View Source
class Unpaywall:
"""
A class to handle open access locations in the Unpaywall data format.
Create new Unpaywall objects using the `from_csl_item` and `from_citekey` methods,
or by using __init__ of a subclass like `Unpaywall_DOI`.
"""
csl_item = None
@abc.abstractmethod
def set_oa_locations(self):
"""
Set `self.oa_locations`, which is a list of `Unpaywall_Location` objects.
"""
self.oa_locations = []
@property
def best_openly_licensed_pdf(self) -> "Unpaywall_Location":
for location in self.oa_locations:
if location.has_openly_licensed_pdf:
return location
@property
def best_pdf(self) -> "Unpaywall_Location":
for location in self.oa_locations:
if location.has_pdf:
return location
@staticmethod
def from_citekey(citekey, csl_item=None):
"""
Create an Unpaywall object for `citekey`.
`csl_item` is an optional field that can avoid an
external web request to generate to a new CSL Item.
"""
from .citekey import CiteKey
if isinstance(citekey, str):
citekey = CiteKey(citekey)
if not isinstance(citekey, CiteKey):
raise ValueError("citekey must be a str or CiteKey")
if citekey.standard_prefix in source_to_unpaywaller:
unpaywaller = source_to_unpaywaller[citekey.standard_prefix]
unpaywall = unpaywaller(citekey.standard_accession, set_oa_locations=False)
else:
raise ValueError(
f"Cannot Unpaywall {citekey.input_id}. "
f"Supported citations sources are {', '.join(source_to_unpaywaller)}. "
"Received {citekey.standard_prefix!r}."
)
unpaywall.csl_item = csl_item
unpaywall.set_oa_locations()
return unpaywall
@classmethod
def from_csl_item(cls, csl_item):
"""
Create an Unpaywall object for `csl_item`.
"""
from .csl_item import CSL_Item
csl_item = CSL_Item(csl_item)
doi = csl_item.get("DOI")
if doi:
return cls.from_citekey(f"doi:{doi}", csl_item=csl_item)
csl_item.infer_id()
return cls.from_citekey(csl_item["id"], csl_item=csl_item)
Descendants
- manubot.cite.unpaywall.Unpaywall_DOI
- manubot.cite.unpaywall.Unpaywall_arXiv
Class variables
csl_item
Static methods
from_citekey
def from_citekey(
citekey,
csl_item=None
)
Create an Unpaywall object for citekey
.
csl_item
is an optional field that can avoid an
external web request to generate to a new CSL Item.
View Source
@staticmethod
def from_citekey(citekey, csl_item=None):
"""
Create an Unpaywall object for `citekey`.
`csl_item` is an optional field that can avoid an
external web request to generate to a new CSL Item.
"""
from .citekey import CiteKey
if isinstance(citekey, str):
citekey = CiteKey(citekey)
if not isinstance(citekey, CiteKey):
raise ValueError("citekey must be a str or CiteKey")
if citekey.standard_prefix in source_to_unpaywaller:
unpaywaller = source_to_unpaywaller[citekey.standard_prefix]
unpaywall = unpaywaller(citekey.standard_accession, set_oa_locations=False)
else:
raise ValueError(
f"Cannot Unpaywall {citekey.input_id}. "
f"Supported citations sources are {', '.join(source_to_unpaywaller)}. "
"Received {citekey.standard_prefix!r}."
)
unpaywall.csl_item = csl_item
unpaywall.set_oa_locations()
return unpaywall
from_csl_item
def from_csl_item(
csl_item
)
Create an Unpaywall object for csl_item
.
View Source
@classmethod
def from_csl_item(cls, csl_item):
"""
Create an Unpaywall object for `csl_item`.
"""
from .csl_item import CSL_Item
csl_item = CSL_Item(csl_item)
doi = csl_item.get("DOI")
if doi:
return cls.from_citekey(f"doi:{doi}", csl_item=csl_item)
csl_item.infer_id()
return cls.from_citekey(csl_item["id"], csl_item=csl_item)
Instance variables
best_openly_licensed_pdf
best_pdf
Methods
set_oa_locations
def set_oa_locations(
self
)
Set self.oa_locations
, which is a list of Unpaywall_Location
objects.
View Source
@abc.abstractmethod
def set_oa_locations(self):
"""
Set `self.oa_locations`, which is a list of `Unpaywall_Location` objects.
"""
self.oa_locations = []
Unpaywall_DOI
class Unpaywall_DOI(
doi,
set_oa_locations=True
)
From https://unpaywall.org/data-format:
The DOI object is more or less a row in our main database... it's everything we know about a given DOI-assigned resource, including metadata about the resource itself, and information about its OA status. It includes a list of zero or more OA Location Objects, as well as a
best_oa_location
property that's probably the OA Location you'll want to use.
View Source
class Unpaywall_DOI(Unpaywall):
"""
From https://unpaywall.org/data-format:
> The DOI object is more or less a row in our main database...
it's everything we know about a given DOI-assigned resource,
including metadata about the resource itself,
and information about its OA status.
It includes a list of zero or more OA Location Objects,
as well as a `best_oa_location` property that's probably the OA Location you'll want to use.
"""
def __init__(self, doi, set_oa_locations=True):
self.doi = doi.lower()
if set_oa_locations:
self.set_oa_locations()
def set_oa_locations(self):
from manubot.util import contact_email
url = f"https://api.unpaywall.org/v2/{self.doi}"
params = {"email": contact_email}
response = requests.get(url, params=params)
response.raise_for_status()
self.results = response.json()
self.oa_locations = [
Unpaywall_Location(location)
for location in self.results.get("oa_locations", [])
]
Ancestors (in MRO)
- manubot.cite.unpaywall.Unpaywall
Class variables
csl_item
Static methods
from_citekey
def from_citekey(
citekey,
csl_item=None
)
Create an Unpaywall object for citekey
.
csl_item
is an optional field that can avoid an
external web request to generate to a new CSL Item.
View Source
@staticmethod
def from_citekey(citekey, csl_item=None):
"""
Create an Unpaywall object for `citekey`.
`csl_item` is an optional field that can avoid an
external web request to generate to a new CSL Item.
"""
from .citekey import CiteKey
if isinstance(citekey, str):
citekey = CiteKey(citekey)
if not isinstance(citekey, CiteKey):
raise ValueError("citekey must be a str or CiteKey")
if citekey.standard_prefix in source_to_unpaywaller:
unpaywaller = source_to_unpaywaller[citekey.standard_prefix]
unpaywall = unpaywaller(citekey.standard_accession, set_oa_locations=False)
else:
raise ValueError(
f"Cannot Unpaywall {citekey.input_id}. "
f"Supported citations sources are {', '.join(source_to_unpaywaller)}. "
"Received {citekey.standard_prefix!r}."
)
unpaywall.csl_item = csl_item
unpaywall.set_oa_locations()
return unpaywall
from_csl_item
def from_csl_item(
csl_item
)
Create an Unpaywall object for csl_item
.
View Source
@classmethod
def from_csl_item(cls, csl_item):
"""
Create an Unpaywall object for `csl_item`.
"""
from .csl_item import CSL_Item
csl_item = CSL_Item(csl_item)
doi = csl_item.get("DOI")
if doi:
return cls.from_citekey(f"doi:{doi}", csl_item=csl_item)
csl_item.infer_id()
return cls.from_citekey(csl_item["id"], csl_item=csl_item)
Instance variables
best_openly_licensed_pdf
best_pdf
Methods
set_oa_locations
def set_oa_locations(
self
)
Set self.oa_locations
, which is a list of Unpaywall_Location
objects.
View Source
def set_oa_locations(self):
from manubot.util import contact_email
url = f"https://api.unpaywall.org/v2/{self.doi}"
params = {"email": contact_email}
response = requests.get(url, params=params)
response.raise_for_status()
self.results = response.json()
self.oa_locations = [
Unpaywall_Location(location)
for location in self.results.get("oa_locations", [])
]
Unpaywall_Location
class Unpaywall_Location(
/,
*args,
**kwargs
)
From https://unpaywall.org/data-format
The OA Location object describes particular place where we found a given OA article. The same article is often available from multiple locations, and there may be differences in format, version, and license depending on the location; the OA Location object describes these key attributes. An OA Location Object is always a Child of a DOI Object.
Example oa_locations from the Unpaywall API are:
{
"endpoint_id": null,
"evidence": "open (via page says license)",
"host_type": "publisher",
"is_best": true,
"license": "cc-by",
"pmh_id": null,
"repository_institution": null,
"updated": "2020-01-19T08:55:45.548214",
"url": "https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1007250&type=printable",
"url_for_landing_page": "https://doi.org/10.1371/journal.pcbi.1007250",
"url_for_pdf": "https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1007250&type=printable",
"version": "publishedVersion"
},
{
"endpoint_id": "ca8f8d56758a80a4f86",
"evidence": "oa repository (via OAI-PMH doi match)",
"host_type": "repository",
"is_best": true,
"license": null,
"pmh_id": "oai:arXiv.org:1806.05726",
"repository_institution": "Cornell University - arXiv",
"updated": "2019-11-01T00:28:16.784912",
"url": "http://arxiv.org/pdf/1806.05726",
"url_for_landing_page": "http://arxiv.org/abs/1806.05726",
"url_for_pdf": "http://arxiv.org/pdf/1806.05726",
"version": "submittedVersion"
}
View Source
class Unpaywall_Location(dict):
"""
From https://unpaywall.org/data-format
> The OA Location object describes particular place where we found a given OA article.
The same article is often available from multiple locations,
and there may be differences in format, version, and license depending on the location;
the OA Location object describes these key attributes.
An OA Location Object is always a Child of a DOI Object.
Example oa_locations from the Unpaywall API are:
```json
{
"endpoint_id": null,
"evidence": "open (via page says license)",
"host_type": "publisher",
"is_best": true,
"license": "cc-by",
"pmh_id": null,
"repository_institution": null,
"updated": "2020-01-19T08:55:45.548214",
"url": "https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1007250&type=printable",
"url_for_landing_page": "https://doi.org/10.1371/journal.pcbi.1007250",
"url_for_pdf": "https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1007250&type=printable",
"version": "publishedVersion"
},
{
"endpoint_id": "ca8f8d56758a80a4f86",
"evidence": "oa repository (via OAI-PMH doi match)",
"host_type": "repository",
"is_best": true,
"license": null,
"pmh_id": "oai:arXiv.org:1806.05726",
"repository_institution": "Cornell University - arXiv",
"updated": "2019-11-01T00:28:16.784912",
"url": "http://arxiv.org/pdf/1806.05726",
"url_for_landing_page": "http://arxiv.org/abs/1806.05726",
"url_for_pdf": "http://arxiv.org/pdf/1806.05726",
"version": "submittedVersion"
}
```
"""
@property
def has_pdf(self):
return bool(self.get("url_for_pdf"))
@property
def has_open_license(self):
license = self.get("license")
return license in open_licenses
@property
def has_creative_commons_license(self):
license = self.get("license")
if not license:
return False
return license == "cc0" or license.startswith("cc-")
@property
def has_openly_licensed_pdf(self):
return self.has_pdf and self.has_open_license
Ancestors (in MRO)
- builtins.dict
Instance variables
has_creative_commons_license
has_open_license
has_openly_licensed_pdf
has_pdf
Methods
clear
def clear(
...
)
D.clear() -> None. Remove all items from D.
copy
def copy(
...
)
D.copy() -> a shallow copy of D
fromkeys
def fromkeys(
iterable,
value=None,
/
)
Create a new dictionary with keys from iterable and values set to value.
get
def get(
self,
key,
default=None,
/
)
Return the value for key if key is in the dictionary, else default.
items
def items(
...
)
D.items() -> a set-like object providing a view on D's items
keys
def keys(
...
)
D.keys() -> a set-like object providing a view on D's keys
pop
def pop(
...
)
D.pop(k[,d]) -> v, remove specified key and return the corresponding value.
If the key is not found, return the default if given; otherwise, raise a KeyError.
popitem
def popitem(
self,
/
)
Remove and return a (key, value) pair as a 2-tuple.
Pairs are returned in LIFO (last-in, first-out) order. Raises KeyError if the dict is empty.
setdefault
def setdefault(
self,
key,
default=None,
/
)
Insert key with a value of default if key is not in the dictionary.
Return the value for key if key is in the dictionary, else default.
update
def update(
...
)
D.update([E, ]**F) -> None. Update D from dict/iterable E and F.
If E is present and has a .keys() method, then does: for k in E: D[k] = E[k] If E is present and lacks a .keys() method, then does: for k, v in E: D[k] = v In either case, this is followed by: for k in F: D[k] = F[k]
values
def values(
...
)
D.values() -> an object providing a view on D's values
Unpaywall_arXiv
class Unpaywall_arXiv(
arxiv_id,
set_oa_locations=True,
use_doi=True
)
A class to handle open access locations in the Unpaywall data format.
Create new Unpaywall objects using the from_csl_item
and from_citekey
methods,
or by using init of a subclass like Unpaywall_DOI
.
View Source
class Unpaywall_arXiv(Unpaywall):
def __init__(self, arxiv_id, set_oa_locations=True, use_doi=True):
from .arxiv import split_arxiv_id_version
self.arxiv_id = arxiv_id
self.arxiv_id_latest, self.arxiv_id_version = split_arxiv_id_version(arxiv_id)
self.use_doi = use_doi
if set_oa_locations:
self.set_oa_locations()
def set_oa_locations(self):
from .arxiv import get_arxiv_csl_item
if not self.csl_item:
self.csl_item = get_arxiv_csl_item(self.arxiv_id)
doi = self.csl_item.get("DOI")
if self.use_doi and doi:
unpaywall_doi = Unpaywall_DOI(doi)
self.doi = unpaywall_doi.doi
self.oa_locations = unpaywall_doi.oa_locations
return
location = self.location_from_arvix_id()
self.oa_locations = [location]
def location_from_arvix_id(self):
import datetime
url_for_pdf = f"https://arxiv.org/pdf/{self.arxiv_id}.pdf"
location = Unpaywall_Location(
{
"endpoint_id": None,
"evidence": "oa repository",
"host_type": "repository",
"is_best": True,
"license": self.get_license(),
"pmh_id": f"oai:arXiv.org:{self.arxiv_id_latest}",
"repository_institution": "Cornell University - arXiv",
"updated": datetime.datetime.now().isoformat(),
"url": url_for_pdf,
"url_for_landing_page": f"https://arxiv.org/abs/{self.arxiv_id}",
"url_for_pdf": url_for_pdf,
"version": "submittedVersion",
}
)
return location
def get_license(self):
"""
Return license using choices from the Unpaywall data format.
Looks for license metadata in the CSL Item.
"""
license = self.csl_item.note_dict.get("license")
if not license:
return
# Example licenses from https://arxiv.org/help/license
# http://creativecommons.org/publicdomain/zero/1.0/
# http://creativecommons.org/licenses/by/4.0/
# http://creativecommons.org/licenses/by-sa/4.0/
# http://creativecommons.org/licenses/by-nc-sa/4.0/
# http://arxiv.org/licenses/nonexclusive-distrib/1.0/license.html
from urllib.parse import urlparse
parsed_url = urlparse(license)
if not parsed_url.scheme.startswith("http"):
return
if parsed_url.hostname.endswith("creativecommons.org"):
try:
abbrev = parsed_url.path.split("/")[2]
except IndexError:
return
if abbrev == "zero":
return "cc0"
return f"cc-{abbrev}"
Ancestors (in MRO)
- manubot.cite.unpaywall.Unpaywall
Class variables
csl_item
Static methods
from_citekey
def from_citekey(
citekey,
csl_item=None
)
Create an Unpaywall object for citekey
.
csl_item
is an optional field that can avoid an
external web request to generate to a new CSL Item.
View Source
@staticmethod
def from_citekey(citekey, csl_item=None):
"""
Create an Unpaywall object for `citekey`.
`csl_item` is an optional field that can avoid an
external web request to generate to a new CSL Item.
"""
from .citekey import CiteKey
if isinstance(citekey, str):
citekey = CiteKey(citekey)
if not isinstance(citekey, CiteKey):
raise ValueError("citekey must be a str or CiteKey")
if citekey.standard_prefix in source_to_unpaywaller:
unpaywaller = source_to_unpaywaller[citekey.standard_prefix]
unpaywall = unpaywaller(citekey.standard_accession, set_oa_locations=False)
else:
raise ValueError(
f"Cannot Unpaywall {citekey.input_id}. "
f"Supported citations sources are {', '.join(source_to_unpaywaller)}. "
"Received {citekey.standard_prefix!r}."
)
unpaywall.csl_item = csl_item
unpaywall.set_oa_locations()
return unpaywall
from_csl_item
def from_csl_item(
csl_item
)
Create an Unpaywall object for csl_item
.
View Source
@classmethod
def from_csl_item(cls, csl_item):
"""
Create an Unpaywall object for `csl_item`.
"""
from .csl_item import CSL_Item
csl_item = CSL_Item(csl_item)
doi = csl_item.get("DOI")
if doi:
return cls.from_citekey(f"doi:{doi}", csl_item=csl_item)
csl_item.infer_id()
return cls.from_citekey(csl_item["id"], csl_item=csl_item)
Instance variables
best_openly_licensed_pdf
best_pdf
Methods
get_license
def get_license(
self
)
Return license using choices from the Unpaywall data format.
Looks for license metadata in the CSL Item.
View Source
def get_license(self):
"""
Return license using choices from the Unpaywall data format.
Looks for license metadata in the CSL Item.
"""
license = self.csl_item.note_dict.get("license")
if not license:
return
# Example licenses from https://arxiv.org/help/license
# http://creativecommons.org/publicdomain/zero/1.0/
# http://creativecommons.org/licenses/by/4.0/
# http://creativecommons.org/licenses/by-sa/4.0/
# http://creativecommons.org/licenses/by-nc-sa/4.0/
# http://arxiv.org/licenses/nonexclusive-distrib/1.0/license.html
from urllib.parse import urlparse
parsed_url = urlparse(license)
if not parsed_url.scheme.startswith("http"):
return
if parsed_url.hostname.endswith("creativecommons.org"):
try:
abbrev = parsed_url.path.split("/")[2]
except IndexError:
return
if abbrev == "zero":
return "cc0"
return f"cc-{abbrev}"
location_from_arvix_id
def location_from_arvix_id(
self
)
View Source
def location_from_arvix_id(self):
import datetime
url_for_pdf = f"https://arxiv.org/pdf/{self.arxiv_id}.pdf"
location = Unpaywall_Location(
{
"endpoint_id": None,
"evidence": "oa repository",
"host_type": "repository",
"is_best": True,
"license": self.get_license(),
"pmh_id": f"oai:arXiv.org:{self.arxiv_id_latest}",
"repository_institution": "Cornell University - arXiv",
"updated": datetime.datetime.now().isoformat(),
"url": url_for_pdf,
"url_for_landing_page": f"https://arxiv.org/abs/{self.arxiv_id}",
"url_for_pdf": url_for_pdf,
"version": "submittedVersion",
}
)
return location
set_oa_locations
def set_oa_locations(
self
)
Set self.oa_locations
, which is a list of Unpaywall_Location
objects.
View Source
def set_oa_locations(self):
from .arxiv import get_arxiv_csl_item
if not self.csl_item:
self.csl_item = get_arxiv_csl_item(self.arxiv_id)
doi = self.csl_item.get("DOI")
if self.use_doi and doi:
unpaywall_doi = Unpaywall_DOI(doi)
self.doi = unpaywall_doi.doi
self.oa_locations = unpaywall_doi.oa_locations
return
location = self.location_from_arvix_id()
self.oa_locations = [location]