Module manubot.cite.citeproc
Correct or validate CSL item schema.
Module naming: citeproc is the generic name for programs that produce formatted bibliographies and citations based on the metadata of the cited objects and the formatting instructions provided by Citation Style Language (CSL) styles. -- https://en.wikipedia.org/wiki/CiteProc
View Source
"""Correct or validate CSL item schema.
Module naming: citeproc is the generic name for programs that produce
formatted bibliographies and citations based on the metadata of
the cited objects and the formatting instructions provided by
Citation Style Language (CSL) styles.
-- https://en.wikipedia.org/wiki/CiteProc
"""
import copy
import functools
import logging
from manubot.util import read_serialized_data
@functools.lru_cache
def get_jsonschema_csl_validator():
"""
Return a jsonschema validator for the CSL Item JSON Schema
"""
import jsonschema
url = "https://github.com/citation-style-language/schema/raw/v1.0.2/schemas/input/csl-data.json"
schema = read_serialized_data(url)
Validator = jsonschema.validators.validator_for(schema)
Validator.check_schema(schema)
return Validator(schema)
def remove_jsonschema_errors(instance, recurse_depth=5, in_place=False):
"""
Remove fields in CSL Items that produce JSON Schema errors. Should errors
be removed, but the JSON instance still fails to validate, recursively call
remove_jsonschema_errors until the instance validates or the recursion
depth limit is reached.
Note that this method may not be work for all types of JSON Schema errors
and users looking to adapt it for other applications should write
task-specific tests to provide empirical evaluate that it works as
intended.
The default in_place=False creates a deepcopy of instance before pruning it,
such that a new dictionary is returned and instance is not edited.
Set in_place=True to edit instance in-place. The inital implementation of
remove_jsonschema_errors always deepcopied instance, and it is possible deepcopying
is important to prevent malfunction when encountering certain edge cases.
Please report if you observe any in_place dependent behaviors.
See also:
https://github.com/Julian/jsonschema/issues/448
https://stackoverflow.com/questions/44694835
"""
validator = get_jsonschema_csl_validator()
errors = list(validator.iter_errors(instance))
if not in_place:
instance = copy.deepcopy(instance)
errors = sorted(errors, key=lambda e: e.path, reverse=True)
for error in errors:
_remove_error(instance, error)
if validator.is_valid(instance) or recurse_depth < 1:
return instance
return remove_jsonschema_errors(instance, recurse_depth - 1, in_place=in_place)
def _delete_elem(instance, path, absolute_path=None, message=""):
"""
Helper function for remove_jsonschema_errors that deletes an element in the
JSON-like input instance at the specified path. absolute_path is relative
to the original validated instance for logging purposes. Defaults to path,
if not specified. message is an optional string with additional error
information to log.
"""
if absolute_path is None:
absolute_path = path
logging.debug(
(f"{message}\n" if message else message)
+ "_delete_elem deleting CSL element at: "
+ "/".join(map(str, absolute_path))
)
*head, tail = path
try:
del _deep_get(instance, head)[tail]
except KeyError:
pass
def _deep_get(instance, path):
"""
Descend path to return a deep element in the JSON object instance.
"""
for key in path:
instance = instance[key]
return instance
def _remove_error(instance, error):
"""
Remove a jsonschema ValidationError from the JSON-like instance.
See ValidationError documentation at
https://python-jsonschema.readthedocs.io/en/latest/errors/#jsonschema.exceptions.ValidationError
"""
sub_errors = error.context
if sub_errors:
# already_removed_additional was neccessary to workaround
# https://github.com/citation-style-language/schema/issues/154
already_removed_additional = False
for sub_error in sub_errors:
if sub_error.validator == "additionalProperties":
if already_removed_additional:
continue
already_removed_additional = True
sub_instance = _deep_get(instance, error.path)
_remove_error(sub_instance, sub_error)
elif error.validator == "additionalProperties":
extras = set(error.instance) - set(error.schema["properties"])
logging.debug(
error.message
+ f"\nWill now remove these {len(extras)} additional properties."
)
for key in extras:
_delete_elem(
instance=instance,
path=list(error.path) + [key],
absolute_path=list(error.absolute_path) + [key],
)
elif error.validator in {"enum", "type", "minItems", "maxItems"}:
_delete_elem(instance, error.path, error.absolute_path, error.message)
elif error.validator == "required":
logging.warning(
(f"{error.message}\n" if error.message else error.message)
+ "required element missing at: "
+ "/".join(map(str, error.absolute_path))
)
else:
raise NotImplementedError(f"{error.validator} is not yet supported")
Functions
get_jsonschema_csl_validator
def get_jsonschema_csl_validator(
)
Return a jsonschema validator for the CSL Item JSON Schema
View Source
@functools.lru_cache
def get_jsonschema_csl_validator():
"""
Return a jsonschema validator for the CSL Item JSON Schema
"""
import jsonschema
url = "https://github.com/citation-style-language/schema/raw/v1.0.2/schemas/input/csl-data.json"
schema = read_serialized_data(url)
Validator = jsonschema.validators.validator_for(schema)
Validator.check_schema(schema)
return Validator(schema)
remove_jsonschema_errors
def remove_jsonschema_errors(
instance,
recurse_depth=5,
in_place=False
)
Remove fields in CSL Items that produce JSON Schema errors. Should errors
be removed, but the JSON instance still fails to validate, recursively call remove_jsonschema_errors until the instance validates or the recursion depth limit is reached.
Note that this method may not be work for all types of JSON Schema errors and users looking to adapt it for other applications should write task-specific tests to provide empirical evaluate that it works as intended.
The default in_place=False creates a deepcopy of instance before pruning it, such that a new dictionary is returned and instance is not edited. Set in_place=True to edit instance in-place. The inital implementation of remove_jsonschema_errors always deepcopied instance, and it is possible deepcopying is important to prevent malfunction when encountering certain edge cases. Please report if you observe any in_place dependent behaviors.
See also: https://github.com/Julian/jsonschema/issues/448 https://stackoverflow.com/questions/44694835
View Source
def remove_jsonschema_errors(instance, recurse_depth=5, in_place=False):
"""
Remove fields in CSL Items that produce JSON Schema errors. Should errors
be removed, but the JSON instance still fails to validate, recursively call
remove_jsonschema_errors until the instance validates or the recursion
depth limit is reached.
Note that this method may not be work for all types of JSON Schema errors
and users looking to adapt it for other applications should write
task-specific tests to provide empirical evaluate that it works as
intended.
The default in_place=False creates a deepcopy of instance before pruning it,
such that a new dictionary is returned and instance is not edited.
Set in_place=True to edit instance in-place. The inital implementation of
remove_jsonschema_errors always deepcopied instance, and it is possible deepcopying
is important to prevent malfunction when encountering certain edge cases.
Please report if you observe any in_place dependent behaviors.
See also:
https://github.com/Julian/jsonschema/issues/448
https://stackoverflow.com/questions/44694835
"""
validator = get_jsonschema_csl_validator()
errors = list(validator.iter_errors(instance))
if not in_place:
instance = copy.deepcopy(instance)
errors = sorted(errors, key=lambda e: e.path, reverse=True)
for error in errors:
_remove_error(instance, error)
if validator.is_valid(instance) or recurse_depth < 1:
return instance
return remove_jsonschema_errors(instance, recurse_depth - 1, in_place=in_place)