""" Module that contains many useful utilities for validating data or function arguments """ from __future__ import annotations from typing import ( Any, Iterable, Sequence, TypeVar, overload, ) import warnings import numpy as np from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( is_bool, is_integer, ) BoolishT = TypeVar("BoolishT", bool, int) BoolishNoneT = TypeVar("BoolishNoneT", bool, int, None) def _check_arg_length(fname, args, max_fname_arg_count, compat_args): """ Checks whether 'args' has length of at most 'compat_args'. Raises a TypeError if that is not the case, similar to in Python when a function is called with too many arguments. """ if max_fname_arg_count < 0: raise ValueError("'max_fname_arg_count' must be non-negative") if len(args) > len(compat_args): max_arg_count = len(compat_args) + max_fname_arg_count actual_arg_count = len(args) + max_fname_arg_count argument = "argument" if max_arg_count == 1 else "arguments" raise TypeError( f"{fname}() takes at most {max_arg_count} {argument} " f"({actual_arg_count} given)" ) def _check_for_default_values(fname, arg_val_dict, compat_args): """ Check that the keys in `arg_val_dict` are mapped to their default values as specified in `compat_args`. Note that this function is to be called only when it has been checked that arg_val_dict.keys() is a subset of compat_args """ for key in arg_val_dict: # try checking equality directly with '=' operator, # as comparison may have been overridden for the left # hand object try: v1 = arg_val_dict[key] v2 = compat_args[key] # check for None-ness otherwise we could end up # comparing a numpy array vs None if (v1 is not None and v2 is None) or (v1 is None and v2 is not None): match = False else: match = v1 == v2 if not is_bool(match): raise ValueError("'match' is not a boolean") # could not compare them directly, so try comparison # using the 'is' operator except ValueError: match = arg_val_dict[key] is compat_args[key] if not match: raise ValueError( f"the '{key}' parameter is not supported in " f"the pandas implementation of {fname}()" ) def validate_args(fname, args, max_fname_arg_count, compat_args) -> None: """ Checks whether the length of the `*args` argument passed into a function has at most `len(compat_args)` arguments and whether or not all of these elements in `args` are set to their default values. Parameters ---------- fname : str The name of the function being passed the `*args` parameter args : tuple The `*args` parameter passed into a function max_fname_arg_count : int The maximum number of arguments that the function `fname` can accept, excluding those in `args`. Used for displaying appropriate error messages. Must be non-negative. compat_args : dict A dictionary of keys and their associated default values. In order to accommodate buggy behaviour in some versions of `numpy`, where a signature displayed keyword arguments but then passed those arguments **positionally** internally when calling downstream implementations, a dict ensures that the original order of the keyword arguments is enforced. Raises ------ TypeError If `args` contains more values than there are `compat_args` ValueError If `args` contains values that do not correspond to those of the default values specified in `compat_args` """ _check_arg_length(fname, args, max_fname_arg_count, compat_args) # We do this so that we can provide a more informative # error message about the parameters that we are not # supporting in the pandas implementation of 'fname' kwargs = dict(zip(compat_args, args)) _check_for_default_values(fname, kwargs, compat_args) def _check_for_invalid_keys(fname, kwargs, compat_args): """ Checks whether 'kwargs' contains any keys that are not in 'compat_args' and raises a TypeError if there is one. """ # set(dict) --> set of the dictionary's keys diff = set(kwargs) - set(compat_args) if diff: bad_arg = list(diff)[0] raise TypeError(f"{fname}() got an unexpected keyword argument '{bad_arg}'") def validate_kwargs(fname, kwargs, compat_args) -> None: """ Checks whether parameters passed to the **kwargs argument in a function `fname` are valid parameters as specified in `*compat_args` and whether or not they are set to their default values. Parameters ---------- fname : str The name of the function being passed the `**kwargs` parameter kwargs : dict The `**kwargs` parameter passed into `fname` compat_args: dict A dictionary of keys that `kwargs` is allowed to have and their associated default values Raises ------ TypeError if `kwargs` contains keys not in `compat_args` ValueError if `kwargs` contains keys in `compat_args` that do not map to the default values specified in `compat_args` """ kwds = kwargs.copy() _check_for_invalid_keys(fname, kwargs, compat_args) _check_for_default_values(fname, kwds, compat_args) def validate_args_and_kwargs( fname, args, kwargs, max_fname_arg_count, compat_args ) -> None: """ Checks whether parameters passed to the *args and **kwargs argument in a function `fname` are valid parameters as specified in `*compat_args` and whether or not they are set to their default values. Parameters ---------- fname: str The name of the function being passed the `**kwargs` parameter args: tuple The `*args` parameter passed into a function kwargs: dict The `**kwargs` parameter passed into `fname` max_fname_arg_count: int The minimum number of arguments that the function `fname` requires, excluding those in `args`. Used for displaying appropriate error messages. Must be non-negative. compat_args: dict A dictionary of keys that `kwargs` is allowed to have and their associated default values. Raises ------ TypeError if `args` contains more values than there are `compat_args` OR `kwargs` contains keys not in `compat_args` ValueError if `args` contains values not at the default value (`None`) `kwargs` contains keys in `compat_args` that do not map to the default value as specified in `compat_args` See Also -------- validate_args : Purely args validation. validate_kwargs : Purely kwargs validation. """ # Check that the total number of arguments passed in (i.e. # args and kwargs) does not exceed the length of compat_args _check_arg_length( fname, args + tuple(kwargs.values()), max_fname_arg_count, compat_args ) # Check there is no overlap with the positional and keyword # arguments, similar to what is done in actual Python functions args_dict = dict(zip(compat_args, args)) for key in args_dict: if key in kwargs: raise TypeError( f"{fname}() got multiple values for keyword argument '{key}'" ) kwargs.update(args_dict) validate_kwargs(fname, kwargs, compat_args) def validate_bool_kwarg( value: BoolishNoneT, arg_name, none_allowed=True, int_allowed=False ) -> BoolishNoneT: """ Ensure that argument passed in arg_name can be interpreted as boolean. Parameters ---------- value : bool Value to be validated. arg_name : str Name of the argument. To be reflected in the error message. none_allowed : bool, default True Whether to consider None to be a valid boolean. int_allowed : bool, default False Whether to consider integer value to be a valid boolean. Returns ------- value The same value as input. Raises ------ ValueError If the value is not a valid boolean. """ good_value = is_bool(value) if none_allowed: good_value = good_value or value is None if int_allowed: good_value = good_value or isinstance(value, int) if not good_value: raise ValueError( f'For argument "{arg_name}" expected type bool, received ' f"type {type(value).__name__}." ) return value def validate_axis_style_args( data, args, kwargs, arg_name, method_name ) -> dict[str, Any]: """ Argument handler for mixed index, columns / axis functions In an attempt to handle both `.method(index, columns)`, and `.method(arg, axis=.)`, we have to do some bad things to argument parsing. This translates all arguments to `{index=., columns=.}` style. Parameters ---------- data : DataFrame args : tuple All positional arguments from the user kwargs : dict All keyword arguments from the user arg_name, method_name : str Used for better error messages Returns ------- kwargs : dict A dictionary of keyword arguments. Doesn't modify ``kwargs`` inplace, so update them with the return value here. Examples -------- >>> df = pd.DataFrame(range(2)) >>> validate_axis_style_args(df, (str.upper,), {'columns': id}, ... 'mapper', 'rename') {'columns': , 'index': } This emits a warning >>> validate_axis_style_args(df, (str.upper, id), {}, ... 'mapper', 'rename') {'index': , 'columns': } """ # TODO: Change to keyword-only args and remove all this out = {} # Goal: fill 'out' with index/columns-style arguments # like out = {'index': foo, 'columns': bar} # Start by validating for consistency if "axis" in kwargs and any(x in kwargs for x in data._AXIS_TO_AXIS_NUMBER): msg = "Cannot specify both 'axis' and any of 'index' or 'columns'." raise TypeError(msg) # First fill with explicit values provided by the user... if arg_name in kwargs: if args: msg = f"{method_name} got multiple values for argument '{arg_name}'" raise TypeError(msg) axis = data._get_axis_name(kwargs.get("axis", 0)) out[axis] = kwargs[arg_name] # More user-provided arguments, now from kwargs for k, v in kwargs.items(): try: ax = data._get_axis_name(k) except ValueError: pass else: out[ax] = v # All user-provided kwargs have been handled now. # Now we supplement with positional arguments, emitting warnings # when there's ambiguity and raising when there's conflicts if len(args) == 0: pass # It's up to the function to decide if this is valid elif len(args) == 1: axis = data._get_axis_name(kwargs.get("axis", 0)) out[axis] = args[0] elif len(args) == 2: if "axis" in kwargs: # Unambiguously wrong msg = "Cannot specify both 'axis' and any of 'index' or 'columns'" raise TypeError(msg) msg = ( f"Interpreting call\n\t'.{method_name}(a, b)' as " f"\n\t'.{method_name}(index=a, columns=b)'.\nUse named " "arguments to remove any ambiguity. In the future, using " "positional arguments for 'index' or 'columns' will raise " "a 'TypeError'." ) warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) out[data._get_axis_name(0)] = args[0] out[data._get_axis_name(1)] = args[1] else: msg = f"Cannot specify all of '{arg_name}', 'index', 'columns'." raise TypeError(msg) return out def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True): """ Validate the keyword arguments to 'fillna'. This checks that exactly one of 'value' and 'method' is specified. If 'method' is specified, this validates that it's a valid method. Parameters ---------- value, method : object The 'value' and 'method' keyword arguments for 'fillna'. validate_scalar_dict_value : bool, default True Whether to validate that 'value' is a scalar or dict. Specifically, validate that it is not a list or tuple. Returns ------- value, method : object """ from pandas.core.missing import clean_fill_method if value is None and method is None: raise ValueError("Must specify a fill 'value' or 'method'.") elif value is None and method is not None: method = clean_fill_method(method) elif value is not None and method is None: if validate_scalar_dict_value and isinstance(value, (list, tuple)): raise TypeError( '"value" parameter must be a scalar or dict, but ' f'you passed a "{type(value).__name__}"' ) elif value is not None and method is not None: raise ValueError("Cannot specify both 'value' and 'method'.") return value, method def validate_percentile(q: float | Iterable[float]) -> np.ndarray: """ Validate percentiles (used by describe and quantile). This function checks if the given float or iterable of floats is a valid percentile otherwise raises a ValueError. Parameters ---------- q: float or iterable of floats A single percentile or an iterable of percentiles. Returns ------- ndarray An ndarray of the percentiles if valid. Raises ------ ValueError if percentiles are not in given interval([0, 1]). """ q_arr = np.asarray(q) # Don't change this to an f-string. The string formatting # is too expensive for cases where we don't need it. msg = "percentiles should all be in the interval [0, 1]. Try {} instead." if q_arr.ndim == 0: if not 0 <= q_arr <= 1: raise ValueError(msg.format(q_arr / 100.0)) else: if not all(0 <= qs <= 1 for qs in q_arr): raise ValueError(msg.format(q_arr / 100.0)) return q_arr @overload def validate_ascending(ascending: BoolishT) -> BoolishT: ... @overload def validate_ascending(ascending: Sequence[BoolishT]) -> list[BoolishT]: ... def validate_ascending( ascending: bool | int | Sequence[BoolishT], ) -> bool | int | list[BoolishT]: """Validate ``ascending`` kwargs for ``sort_index`` method.""" kwargs = {"none_allowed": False, "int_allowed": True} if not isinstance(ascending, Sequence): return validate_bool_kwarg(ascending, "ascending", **kwargs) return [validate_bool_kwarg(item, "ascending", **kwargs) for item in ascending] def validate_endpoints(closed: str | None) -> tuple[bool, bool]: """ Check that the `closed` argument is among [None, "left", "right"] Parameters ---------- closed : {None, "left", "right"} Returns ------- left_closed : bool right_closed : bool Raises ------ ValueError : if argument is not among valid values """ left_closed = False right_closed = False if closed is None: left_closed = True right_closed = True elif closed == "left": left_closed = True elif closed == "right": right_closed = True else: raise ValueError("Closed has to be either 'left', 'right' or None") return left_closed, right_closed def validate_inclusive(inclusive: str | None) -> tuple[bool, bool]: """ Check that the `inclusive` argument is among {"both", "neither", "left", "right"}. Parameters ---------- inclusive : {"both", "neither", "left", "right"} Returns ------- left_right_inclusive : tuple[bool, bool] Raises ------ ValueError : if argument is not among valid values """ left_right_inclusive: tuple[bool, bool] | None = None if isinstance(inclusive, str): left_right_inclusive = { "both": (True, True), "left": (True, False), "right": (False, True), "neither": (False, False), }.get(inclusive) if left_right_inclusive is None: raise ValueError( "Inclusive has to be either 'both', 'neither', 'left' or 'right'" ) return left_right_inclusive def validate_insert_loc(loc: int, length: int) -> int: """ Check that we have an integer between -length and length, inclusive. Standardize negative loc to within [0, length]. The exceptions we raise on failure match np.insert. """ if not is_integer(loc): raise TypeError(f"loc must be an integer between -{length} and {length}") if loc < 0: loc += length if not 0 <= loc <= length: raise IndexError(f"loc must be an integer between -{length} and {length}") return loc