Source code for friendly_data.helpers

"""Collection of helper functions"""

from collections import deque
from collections.abc import Sequence
from functools import partial
from importlib import import_module
from logging import getLogger
import re
import sys
from typing import Dict, Iterable, List, Tuple

from glom import Check, Match, SKIP
import pandas as pd

logger = getLogger(__name__)


[docs]def import_from(module: str, name: str):
    """Import ``name`` from ``module``, if ``name`` is empty, return module"""
    try:
        mod = import_module(module)
    except ImportError as err:
        msg = f"Missing optional dependency '{module}', use pip or conda to install"
        logger.error(msg)
        raise err from None
    else:
        return getattr(mod, name) if name else mod


[docs]def is_windows() -> bool:
    """Check if we are on Windows"""
    return sys.platform in ("win32", "cygwin")


[docs]def sanitise(string: str) -> str:
    """Sanitise string for use as group/directory name"""
    return "_".join(re.findall(re.compile("[^ @&()/]+"), string))


def is_fmtstr(string: str) -> bool:
    opening_braces = string.count("{")
    closing_braces = string.count("}")
    return bool(opening_braces and closing_braces and opening_braces == closing_braces)


# def from_hints(fn: Callable, arg: str) -> Tuple:
#     """NOTE: Comment out until we drop 3.7"""
#     from typing import get_args, get_origin, get_type_hints

#     hint = get_type_hints(fn)[arg]
#     return get_origin(hint), get_args(hint)


[docs]def flatten_list(lst: Iterable) -> Iterable:
    """Flatten an arbitrarily nested list (returns a generator)"""
    for el in lst:
        if isinstance(el, Sequence) and not isinstance(el, (str, bytes)):
            yield from flatten_list(el)
        else:
            yield el


[docs]def filter_dict(data: Dict, allowed: Iterable) -> Dict:
    """Filter a dictionary based on a set of allowed keys"""
    return dict(filter(lambda kv: kv[0] in allowed, data.items()))


[docs]class noop_map(dict):
    """A noop mapping class

    A dictionary subclass that falls back to noop on ``KeyError`` and returns
    the key being looked up.

    """

    def __missing__(self, key):
        return key


[docs]def idx_lvl_values(idx: pd.MultiIndex, name: str) -> pd.Index:
    """Given a ``pandas.MultiIndex`` and a level name, find the level values

    Parameters
    ----------
    idx : pandas.MultiIndex
        A multi index

    name : str
        Level name

    Returns
    -------
    pandas.Index
        Index with the level values

    """
    return idx.levels[idx.names.index(name)]


[docs]def idxslice(lvls: Iterable[str], selection: Dict[str, List]) -> Tuple:
    """Create an index slice tuple from a set of level names, and selection mapping

    NOTE: The order of ``lvls`` should match the order of the levels in the
    index exactly; typically, ``mydf.index.names``.

    Parameters
    ----------
    lvls : Iterable[str]
        Complete set of levels in the index

    selection : Dict[str, List]
        Selection set; the key is a level name, and the value is a list of
        values to select

    Returns
    -------
    Tuple
        Tuple of values, with ``slice(None)`` for skipped levels (matches anything)

    """
    return tuple(selection[lvl] if lvl in selection else slice(None) for lvl in lvls)


[docs]def select(spec, **kwargs):
    """Wrap ``glom.Check`` with the default action set to ``glom.SKIP``.

    This is very useful to select items inside nested data structures.  A few
    example uses:

    >>> from glom import glom
    >>> cols = [
    ...     {
    ...         "name": "abc",
    ...         "type": "integer"
    ...     },
    ...     {
    ...         "name": "def",
    ...         "type": "string"
    ...     },
    ... ]
    >>> glom(cols, [select("name", equal_to="abc")])
    [{"name": "abc", "type": "integer"}]

    For details see: `glom.Check`_

    .. _glom.Check: https://glom.readthedocs.io/en/latest/matching.html#validation-with-check

    """  # noqa: E501
    return Check(spec, default=SKIP, **kwargs)


[docs]def match(pattern, **kwargs):
    """Wrap ``glom.Match`` with the default action set to ``glom.SKIP``.

    This is very useful to match items inside nested data structures.  A few
    example uses:

    >>> from glom import glom
    >>> cols = [
    ...     {
    ...         "name": "abc",
    ...         "type": "integer",
    ...         "constraints": {"enum": []}
    ...     },
    ...     {
    ...         "name": "def",
    ...         "type": "string"
    ...     },
    ... ]
    >>> glom(cols, [match({"constraints": {"enum": list}, str: str})])
    [{"name": "abc", "type": "integer", "constraints": {"enum": []}}]

    For details see: `glom.Match`_

    .. _glom.Match: https://glom.readthedocs.io/en/latest/matching.html#validation-with-match

    """  # noqa: E501
    return Match(pattern, default=SKIP, **kwargs)


consume = partial(deque, maxlen=0)
consume.__doc__ = "Consume or exhaust an iterator"