Skip to content

Module manubot.util

View Source
import importlib

import json

import logging

import os

import pathlib

import platform

import shlex

import shutil

import subprocess

import sys

import typing

from types import ModuleType

if typing.TYPE_CHECKING:

    # allow type annotations of lazy-imported packages

    import yaml

# Email address that forwards to Manubot maintainers

contact_email: str = "contact@manubot.org"

def import_function(name: str):

    """

    Import a function in a module specified by name. For example, if name were

    'manubot.cite.cite_command.cli_cite', the cli_cite function would be

    returned as an object. See https://stackoverflow.com/a/8790232/4651668.

    """

    module_name, function_name = name.rsplit(".", 1)

    module = importlib.import_module(module_name)

    return getattr(module, function_name)

def import_tomllib() -> ModuleType:

    try:

        import tomllib

    except ModuleNotFoundError:

        import tomli as tomllib

    return tomllib

def get_manubot_user_agent() -> str:

    """

    Return a User-Agent string for web request headers to help services

    identify requests as coming from Manubot.

    """

    try:

        from manubot import __version__ as manubot_version

    except ImportError:

        manubot_version = ""

    return (

        f"manubot/{manubot_version} "

        f"({platform.system()}; Python/{sys.version_info.major}.{sys.version_info.minor}) "

        f"<{contact_email}>"

    )

def shlex_join(split_command) -> str:

    """

    Backport shlex.join for Python < 3.8.

    Also cast all args to str to increase versatility.

    https://github.com/python/cpython/pull/7605

    https://bugs.python.org/issue22454

    """

    return " ".join(shlex.quote(str(arg)) for arg in split_command)

"""Valid schemes for HTTP URL detection"""

_http_url_schemes: set = {"http", "https"}

def is_http_url(string: str) -> bool:

    """

    Return whether `string` is an HTTP(s) Uniform Resource Locator (URL).

    """

    from urllib.parse import urlparse

    parsed_url = urlparse(string)

    return parsed_url.scheme in _http_url_schemes

def read_serialized_data(path: str):

    """

    Read seralized data from a local file path or web-address.

    If file format extension is not detected in path, assumes JSON.

    If a URL does not contain the appropriate suffix, one workaround

    is to hack the fragment like https://example.org#/variables.toml

    """

    import requests

    path_str = os.fspath(path)

    path_obj = pathlib.Path(path)

    supported_suffixes = {".json", ".yaml", ".yml", ".toml"}

    suffixes = set(path_obj.suffixes)

    if is_http_url(path_str):

        headers = {"User-Agent": get_manubot_user_agent()}

        response = requests.get(path_str, headers=headers)

        if not suffixes & supported_suffixes:

            # if URL has no supported suffixes, evaluate suffixes of final redirect

            suffixes = set(pathlib.Path(response.url).suffixes)

        text = response.text

    else:

        text = path_obj.read_text(encoding="utf-8-sig")

    if {".yaml", ".yml"} & suffixes:

        import yaml

        try:

            return yaml.safe_load(text)

        except yaml.parser.ParserError as error:

            _lint_yaml(path)

            raise error

    if ".toml" in suffixes:

        return import_tomllib().loads(text)

    if ".json" not in suffixes:

        logging.info(

            f"read_serialized_data cannot infer serialization format from the extension of {path_str!r}. "

            f"Supported extensions are {', '.join(supported_suffixes)}. "

            "Assuming JSON."

        )

    return json.loads(text)

"""

yamllint configuration as per https://yamllint.readthedocs.io/en/stable/configuration.html

"""

_yamllint_config = {

    "extends": "relaxed",

    "rules": {"line-length": "disable", "trailing-spaces": {"level": "warning"}},

}

def _lint_yaml(path):

    if not shutil.which("yamllint"):

        logging.info(f"yamllint executable not found, skipping linting for {path}")

        return

    args = [

        "yamllint",

        "--config-data",

        json.dumps(_yamllint_config, indent=None),

        os.fspath(path),

    ]

    sys.stderr.write(f"yamllint {path}:\n")

    subprocess.run(args, stdout=sys.stderr)

def read_serialized_dict(path: str) -> dict:

    """

    Read serialized data, confirming that the top-level object is a dictionary.

    Delegates to `read_serialized_data`.

    """

    data = read_serialized_data(path)

    if isinstance(data, dict):

        return data

    raise TypeError(

        f"Expected data encoded by {path!r} to be a dictionary at the top-level. "

        f"Received {data.__class__.__name__!r} instead."

    )

def _yaml_str_representer(dumper: "yaml.Dumper", data: str):

    """

    Use YAML literal block style for multiline strings.

    Based on https://stackoverflow.com/a/33300001/4651668

    """

    if len(data.splitlines()) > 1:

        # use literal block style for multiline strings

        return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|")

    return dumper.represent_scalar("tag:yaml.org,2002:str", data)

def get_configured_yaml() -> ModuleType:

    """

    Return imported YAML library with Manubot configuration.

    The representers are only applied to yaml.dump, not yaml.safe_dump.

    """

    import yaml

    from manubot.cite.csl_item import CSL_Item

    yaml.add_representer(str, _yaml_str_representer)

    # CSL_Item: pyyaml chokes on dict subclass

    # https://github.com/yaml/pyyaml/issues/142

    # https://stackoverflow.com/a/50181505/4651668

    yaml.add_representer(

        CSL_Item,

        lambda dumper, data: dumper.represent_mapping(

            tag="tag:yaml.org,2002:map", mapping=data.items()

        ),

    )

    return yaml

Variables

contact_email

Functions

get_configured_yaml

def get_configured_yaml(

) -> module

Return imported YAML library with Manubot configuration.

The representers are only applied to yaml.dump, not yaml.safe_dump.

View Source
def get_configured_yaml() -> ModuleType:

    """

    Return imported YAML library with Manubot configuration.

    The representers are only applied to yaml.dump, not yaml.safe_dump.

    """

    import yaml

    from manubot.cite.csl_item import CSL_Item

    yaml.add_representer(str, _yaml_str_representer)

    # CSL_Item: pyyaml chokes on dict subclass

    # https://github.com/yaml/pyyaml/issues/142

    # https://stackoverflow.com/a/50181505/4651668

    yaml.add_representer(

        CSL_Item,

        lambda dumper, data: dumper.represent_mapping(

            tag="tag:yaml.org,2002:map", mapping=data.items()

        ),

    )

    return yaml

get_manubot_user_agent

def get_manubot_user_agent(

) -> str

Return a User-Agent string for web request headers to help services

identify requests as coming from Manubot.

View Source
def get_manubot_user_agent() -> str:

    """

    Return a User-Agent string for web request headers to help services

    identify requests as coming from Manubot.

    """

    try:

        from manubot import __version__ as manubot_version

    except ImportError:

        manubot_version = ""

    return (

        f"manubot/{manubot_version} "

        f"({platform.system()}; Python/{sys.version_info.major}.{sys.version_info.minor}) "

        f"<{contact_email}>"

    )

import_function

def import_function(
    name: str
)

Import a function in a module specified by name. For example, if name were

'manubot.cite.cite_command.cli_cite', the cli_cite function would be returned as an object. See https://stackoverflow.com/a/8790232/4651668.

View Source
def import_function(name: str):

    """

    Import a function in a module specified by name. For example, if name were

    'manubot.cite.cite_command.cli_cite', the cli_cite function would be

    returned as an object. See https://stackoverflow.com/a/8790232/4651668.

    """

    module_name, function_name = name.rsplit(".", 1)

    module = importlib.import_module(module_name)

    return getattr(module, function_name)

import_tomllib

def import_tomllib(

) -> module
View Source
def import_tomllib() -> ModuleType:

    try:

        import tomllib

    except ModuleNotFoundError:

        import tomli as tomllib

    return tomllib

is_http_url

def is_http_url(
    string: str
) -> bool

Return whether string is an HTTP(s) Uniform Resource Locator (URL).

View Source
def is_http_url(string: str) -> bool:

    """

    Return whether `string` is an HTTP(s) Uniform Resource Locator (URL).

    """

    from urllib.parse import urlparse

    parsed_url = urlparse(string)

    return parsed_url.scheme in _http_url_schemes

read_serialized_data

def read_serialized_data(
    path: str
)

Read seralized data from a local file path or web-address.

If file format extension is not detected in path, assumes JSON. If a URL does not contain the appropriate suffix, one workaround is to hack the fragment like https://example.org#/variables.toml

View Source
def read_serialized_data(path: str):

    """

    Read seralized data from a local file path or web-address.

    If file format extension is not detected in path, assumes JSON.

    If a URL does not contain the appropriate suffix, one workaround

    is to hack the fragment like https://example.org#/variables.toml

    """

    import requests

    path_str = os.fspath(path)

    path_obj = pathlib.Path(path)

    supported_suffixes = {".json", ".yaml", ".yml", ".toml"}

    suffixes = set(path_obj.suffixes)

    if is_http_url(path_str):

        headers = {"User-Agent": get_manubot_user_agent()}

        response = requests.get(path_str, headers=headers)

        if not suffixes & supported_suffixes:

            # if URL has no supported suffixes, evaluate suffixes of final redirect

            suffixes = set(pathlib.Path(response.url).suffixes)

        text = response.text

    else:

        text = path_obj.read_text(encoding="utf-8-sig")

    if {".yaml", ".yml"} & suffixes:

        import yaml

        try:

            return yaml.safe_load(text)

        except yaml.parser.ParserError as error:

            _lint_yaml(path)

            raise error

    if ".toml" in suffixes:

        return import_tomllib().loads(text)

    if ".json" not in suffixes:

        logging.info(

            f"read_serialized_data cannot infer serialization format from the extension of {path_str!r}. "

            f"Supported extensions are {', '.join(supported_suffixes)}. "

            "Assuming JSON."

        )

    return json.loads(text)

read_serialized_dict

def read_serialized_dict(
    path: str
) -> dict

Read serialized data, confirming that the top-level object is a dictionary.

Delegates to read_serialized_data.

View Source
def read_serialized_dict(path: str) -> dict:

    """

    Read serialized data, confirming that the top-level object is a dictionary.

    Delegates to `read_serialized_data`.

    """

    data = read_serialized_data(path)

    if isinstance(data, dict):

        return data

    raise TypeError(

        f"Expected data encoded by {path!r} to be a dictionary at the top-level. "

        f"Received {data.__class__.__name__!r} instead."

    )

shlex_join

def shlex_join(
    split_command
) -> str

Backport shlex.join for Python < 3.8.

Also cast all args to str to increase versatility. https://github.com/python/cpython/pull/7605 https://bugs.python.org/issue22454

View Source
def shlex_join(split_command) -> str:

    """

    Backport shlex.join for Python < 3.8.

    Also cast all args to str to increase versatility.

    https://github.com/python/cpython/pull/7605

    https://bugs.python.org/issue22454

    """

    return " ".join(shlex.quote(str(arg)) for arg in split_command)