""" Misc tools for implementing data structures Note: pandas.core.common is *not* part of the public API. """ from __future__ import annotations import builtins from collections import ( abc, defaultdict, ) import contextlib from functools import partial import inspect from typing import ( TYPE_CHECKING, Any, Callable, Collection, Hashable, Iterable, Iterator, Sequence, cast, overload, ) import warnings import numpy as np from pandas._libs import lib from pandas._typing import ( AnyArrayLike, ArrayLike, NpDtype, RandomState, T, ) from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike from pandas.core.dtypes.common import ( is_array_like, is_bool_dtype, is_extension_array_dtype, is_integer, ) from pandas.core.dtypes.generic import ( ABCExtensionArray, ABCIndex, ABCSeries, ) from pandas.core.dtypes.inference import iterable_not_string from pandas.core.dtypes.missing import isna if TYPE_CHECKING: from pandas import Index def flatten(line): """ Flatten an arbitrarily nested sequence. Parameters ---------- line : sequence The non string sequence to flatten Notes ----- This doesn't consider strings sequences. Returns ------- flattened : generator """ for element in line: if iterable_not_string(element): yield from flatten(element) else: yield element def consensus_name_attr(objs): name = objs[0].name for obj in objs[1:]: try: if obj.name != name: name = None except ValueError: name = None return name def is_bool_indexer(key: Any) -> bool: """ Check whether `key` is a valid boolean indexer. Parameters ---------- key : Any Only list-likes may be considered boolean indexers. All other types are not considered a boolean indexer. For array-like input, boolean ndarrays or ExtensionArrays with ``_is_boolean`` set are considered boolean indexers. Returns ------- bool Whether `key` is a valid boolean indexer. Raises ------ ValueError When the array is an object-dtype ndarray or ExtensionArray and contains missing values. See Also -------- check_array_indexer : Check that `key` is a valid array to index, and convert to an ndarray. """ if isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or ( is_array_like(key) and is_extension_array_dtype(key.dtype) ): if key.dtype == np.object_: key_array = np.asarray(key) if not lib.is_bool_array(key_array): na_msg = "Cannot mask with non-boolean array containing NA / NaN values" if lib.infer_dtype(key_array) == "boolean" and isna(key_array).any(): # Don't raise on e.g. ["A", "B", np.nan], see # test_loc_getitem_list_of_labels_categoricalindex_with_na raise ValueError(na_msg) return False return True elif is_bool_dtype(key.dtype): return True elif isinstance(key, list): # check if np.array(key).dtype would be bool if len(key) > 0: if type(key) is not list: # GH#42461 cython will raise TypeError if we pass a subclass key = list(key) return lib.is_bool_list(key) return False def cast_scalar_indexer(val, warn_float: bool = False): """ To avoid numpy DeprecationWarnings, cast float to integer where valid. Parameters ---------- val : scalar warn_float : bool, default False If True, issue deprecation warning for a float indexer. Returns ------- outval : scalar """ # assumes lib.is_scalar(val) if lib.is_float(val) and val.is_integer(): if warn_float: warnings.warn( "Indexing with a float is deprecated, and will raise an IndexError " "in pandas 2.0. You can manually convert to an integer key instead.", FutureWarning, stacklevel=find_stack_level(), ) return int(val) return val def not_none(*args): """ Returns a generator consisting of the arguments that are not None. """ return (arg for arg in args if arg is not None) def any_none(*args) -> bool: """ Returns a boolean indicating if any argument is None. """ return any(arg is None for arg in args) def all_none(*args) -> bool: """ Returns a boolean indicating if all arguments are None. """ return all(arg is None for arg in args) def any_not_none(*args) -> bool: """ Returns a boolean indicating if any argument is not None. """ return any(arg is not None for arg in args) def all_not_none(*args) -> bool: """ Returns a boolean indicating if all arguments are not None. """ return all(arg is not None for arg in args) def count_not_none(*args) -> int: """ Returns the count of arguments that are not None. """ return sum(x is not None for x in args) @overload def asarray_tuplesafe( values: ArrayLike | list | tuple | zip, dtype: NpDtype | None = ... ) -> np.ndarray: # ExtensionArray can only be returned when values is an Index, all other iterables # will return np.ndarray. Unfortunately "all other" cannot be encoded in a type # signature, so instead we special-case some common types. ... @overload def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = ...) -> ArrayLike: ... def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = None) -> ArrayLike: if not (isinstance(values, (list, tuple)) or hasattr(values, "__array__")): values = list(values) elif isinstance(values, ABCIndex): return values._values if isinstance(values, list) and dtype in [np.object_, object]: return construct_1d_object_array_from_listlike(values) result = np.asarray(values, dtype=dtype) if issubclass(result.dtype.type, str): result = np.asarray(values, dtype=object) if result.ndim == 2: # Avoid building an array of arrays: values = [tuple(x) for x in values] result = construct_1d_object_array_from_listlike(values) return result def index_labels_to_array( labels: np.ndarray | Iterable, dtype: NpDtype | None = None ) -> np.ndarray: """ Transform label or iterable of labels to array, for use in Index. Parameters ---------- dtype : dtype If specified, use as dtype of the resulting array, otherwise infer. Returns ------- array """ if isinstance(labels, (str, tuple)): labels = [labels] if not isinstance(labels, (list, np.ndarray)): try: labels = list(labels) except TypeError: # non-iterable labels = [labels] labels = asarray_tuplesafe(labels, dtype=dtype) return labels def maybe_make_list(obj): if obj is not None and not isinstance(obj, (tuple, list)): return [obj] return obj def maybe_iterable_to_list(obj: Iterable[T] | T) -> Collection[T] | T: """ If obj is Iterable but not list-like, consume into list. """ if isinstance(obj, abc.Iterable) and not isinstance(obj, abc.Sized): return list(obj) obj = cast(Collection, obj) return obj def is_null_slice(obj) -> bool: """ We have a null slice. """ return ( isinstance(obj, slice) and obj.start is None and obj.stop is None and obj.step is None ) def is_true_slices(line) -> list[bool]: """ Find non-trivial slices in "line": return a list of booleans with same length. """ return [isinstance(k, slice) and not is_null_slice(k) for k in line] # TODO: used only once in indexing; belongs elsewhere? def is_full_slice(obj, line: int) -> bool: """ We have a full length slice. """ return ( isinstance(obj, slice) and obj.start == 0 and obj.stop == line and obj.step is None ) def get_callable_name(obj): # typical case has name if hasattr(obj, "__name__"): return getattr(obj, "__name__") # some objects don't; could recurse if isinstance(obj, partial): return get_callable_name(obj.func) # fall back to class name if callable(obj): return type(obj).__name__ # everything failed (probably because the argument # wasn't actually callable); we return None # instead of the empty string in this case to allow # distinguishing between no name and a name of '' return None def apply_if_callable(maybe_callable, obj, **kwargs): """ Evaluate possibly callable input using obj and kwargs if it is callable, otherwise return as it is. Parameters ---------- maybe_callable : possibly a callable obj : NDFrame **kwargs """ if callable(maybe_callable): return maybe_callable(obj, **kwargs) return maybe_callable def standardize_mapping(into): """ Helper function to standardize a supplied mapping. Parameters ---------- into : instance or subclass of collections.abc.Mapping Must be a class, an initialized collections.defaultdict, or an instance of a collections.abc.Mapping subclass. Returns ------- mapping : a collections.abc.Mapping subclass or other constructor a callable object that can accept an iterator to create the desired Mapping. See Also -------- DataFrame.to_dict Series.to_dict """ if not inspect.isclass(into): if isinstance(into, defaultdict): return partial(defaultdict, into.default_factory) into = type(into) if not issubclass(into, abc.Mapping): raise TypeError(f"unsupported type: {into}") elif into == defaultdict: raise TypeError("to_dict() only accepts initialized defaultdicts") return into @overload def random_state(state: np.random.Generator) -> np.random.Generator: ... @overload def random_state( state: int | ArrayLike | np.random.BitGenerator | np.random.RandomState | None, ) -> np.random.RandomState: ... def random_state(state: RandomState | None = None): """ Helper function for processing random_state arguments. Parameters ---------- state : int, array-like, BitGenerator, Generator, np.random.RandomState, None. If receives an int, array-like, or BitGenerator, passes to np.random.RandomState() as seed. If receives an np.random RandomState or Generator, just returns that unchanged. If receives `None`, returns np.random. If receives anything else, raises an informative ValueError. .. versionchanged:: 1.1.0 array-like and BitGenerator object now passed to np.random.RandomState() as seed Default None. Returns ------- np.random.RandomState or np.random.Generator. If state is None, returns np.random """ if ( is_integer(state) or is_array_like(state) or isinstance(state, np.random.BitGenerator) ): # error: Argument 1 to "RandomState" has incompatible type "Optional[Union[int, # Union[ExtensionArray, ndarray[Any, Any]], Generator, RandomState]]"; expected # "Union[None, Union[Union[_SupportsArray[dtype[Union[bool_, integer[Any]]]], # Sequence[_SupportsArray[dtype[Union[bool_, integer[Any]]]]], # Sequence[Sequence[_SupportsArray[dtype[Union[bool_, integer[Any]]]]]], # Sequence[Sequence[Sequence[_SupportsArray[dtype[Union[bool_, # integer[Any]]]]]]], # Sequence[Sequence[Sequence[Sequence[_SupportsArray[dtype[Union[bool_, # integer[Any]]]]]]]]], Union[bool, int, Sequence[Union[bool, int]], # Sequence[Sequence[Union[bool, int]]], Sequence[Sequence[Sequence[Union[bool, # int]]]], Sequence[Sequence[Sequence[Sequence[Union[bool, int]]]]]]], # BitGenerator]" return np.random.RandomState(state) # type: ignore[arg-type] elif isinstance(state, np.random.RandomState): return state elif isinstance(state, np.random.Generator): return state elif state is None: return np.random else: raise ValueError( "random_state must be an integer, array-like, a BitGenerator, Generator, " "a numpy RandomState, or None" ) def pipe( obj, func: Callable[..., T] | tuple[Callable[..., T], str], *args, **kwargs ) -> T: """ Apply a function ``func`` to object ``obj`` either by passing obj as the first argument to the function or, in the case that the func is a tuple, interpret the first element of the tuple as a function and pass the obj to that function as a keyword argument whose key is the value of the second element of the tuple. Parameters ---------- func : callable or tuple of (callable, str) Function to apply to this object or, alternatively, a ``(callable, data_keyword)`` tuple where ``data_keyword`` is a string indicating the keyword of ``callable`` that expects the object. *args : iterable, optional Positional arguments passed into ``func``. **kwargs : dict, optional A dictionary of keyword arguments passed into ``func``. Returns ------- object : the return type of ``func``. """ if isinstance(func, tuple): func, target = func if target in kwargs: msg = f"{target} is both the pipe target and a keyword argument" raise ValueError(msg) kwargs[target] = obj return func(*args, **kwargs) else: return func(obj, *args, **kwargs) def get_rename_function(mapper): """ Returns a function that will map names/labels, dependent if mapper is a dict, Series or just a function. """ def f(x): if x in mapper: return mapper[x] else: return x return f if isinstance(mapper, (abc.Mapping, ABCSeries)) else mapper def convert_to_list_like( values: Hashable | Iterable | AnyArrayLike, ) -> list | AnyArrayLike: """ Convert list-like or scalar input to list-like. List, numpy and pandas array-like inputs are returned unmodified whereas others are converted to list. """ if isinstance(values, (list, np.ndarray, ABCIndex, ABCSeries, ABCExtensionArray)): return values elif isinstance(values, abc.Iterable) and not isinstance(values, str): return list(values) return [values] @contextlib.contextmanager def temp_setattr(obj, attr: str, value) -> Iterator[None]: """Temporarily set attribute on an object. Args: obj: Object whose attribute will be modified. attr: Attribute to modify. value: Value to temporarily set attribute to. Yields: obj with modified attribute. """ old_value = getattr(obj, attr) setattr(obj, attr, value) try: yield obj finally: setattr(obj, attr, old_value) def require_length_match(data, index: Index) -> None: """ Check the length of data matches the length of the index. """ if len(data) != len(index): raise ValueError( "Length of values " f"({len(data)}) " "does not match length of index " f"({len(index)})" ) # the ufuncs np.maximum.reduce and np.minimum.reduce default to axis=0, # whereas np.min and np.max (which directly call obj.min and obj.max) # default to axis=None. _builtin_table = { builtins.sum: np.sum, builtins.max: np.maximum.reduce, builtins.min: np.minimum.reduce, } _cython_table = { builtins.sum: "sum", builtins.max: "max", builtins.min: "min", np.all: "all", np.any: "any", np.sum: "sum", np.nansum: "sum", np.mean: "mean", np.nanmean: "mean", np.prod: "prod", np.nanprod: "prod", np.std: "std", np.nanstd: "std", np.var: "var", np.nanvar: "var", np.median: "median", np.nanmedian: "median", np.max: "max", np.nanmax: "max", np.min: "min", np.nanmin: "min", np.cumprod: "cumprod", np.nancumprod: "cumprod", np.cumsum: "cumsum", np.nancumsum: "cumsum", } def get_cython_func(arg: Callable) -> str | None: """ if we define an internal function for this argument, return it """ return _cython_table.get(arg) def is_builtin_func(arg): """ if we define a builtin function for this argument, return it, otherwise return the arg """ return _builtin_table.get(arg, arg) def fill_missing_names(names: Sequence[Hashable | None]) -> list[Hashable]: """ If a name is missing then replace it by level_n, where n is the count .. versionadded:: 1.4.0 Parameters ---------- names : list-like list of column names or None values. Returns ------- list list of column names with the None values replaced. """ return [f"level_{i}" if name is None else name for i, name in enumerate(names)] def resolve_numeric_only(numeric_only: bool | None | lib.NoDefault) -> bool: """Determine the Boolean value of numeric_only. See GH#46560 for details on the deprecation. Parameters ---------- numeric_only : bool, None, or lib.no_default Value passed to the method. Returns ------- Resolved value of numeric_only. """ if numeric_only is lib.no_default: # Methods that behave like numeric_only=True and only got the numeric_only # arg in 1.5.0 default to lib.no_default result = True elif numeric_only is None: # Methods that had the numeric_only arg prior to 1.5.0 and try all columns # first default to None result = False else: result = numeric_only return result def deprecate_numeric_only_default( cls: type, name: str, deprecate_none: bool = False ) -> None: """Emit FutureWarning message for deprecation of numeric_only. See GH#46560 for details on the deprecation. Parameters ---------- cls : type pandas type that is generating the warning. name : str Name of the method that is generating the warning. deprecate_none : bool, default False Whether to also warn about the deprecation of specifying ``numeric_only=None``. """ if name in ["all", "any"]: arg_name = "bool_only" else: arg_name = "numeric_only" msg = ( f"The default value of {arg_name} in {cls.__name__}.{name} is " "deprecated. In a future version, it will default to False. " ) if deprecate_none: msg += f"In addition, specifying '{arg_name}=None' is deprecated. " msg += ( f"Select only valid columns or specify the value of {arg_name} to silence " "this warning." ) warnings.warn(msg, FutureWarning, stacklevel=find_stack_level())