6266 lines
189 KiB
Python
6266 lines
189 KiB
Python
"""
|
|
Data structure for 1-dimensional cross-sectional and time series data
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from textwrap import dedent
|
|
from typing import (
|
|
IO,
|
|
TYPE_CHECKING,
|
|
Any,
|
|
Callable,
|
|
Hashable,
|
|
Iterable,
|
|
Literal,
|
|
Mapping,
|
|
Sequence,
|
|
Union,
|
|
cast,
|
|
overload,
|
|
)
|
|
import warnings
|
|
import weakref
|
|
|
|
import numpy as np
|
|
|
|
from pandas._config import get_option
|
|
|
|
from pandas._libs import (
|
|
lib,
|
|
properties,
|
|
reshape,
|
|
tslibs,
|
|
)
|
|
from pandas._libs.lib import no_default
|
|
from pandas._typing import (
|
|
AggFuncType,
|
|
AnyArrayLike,
|
|
ArrayLike,
|
|
Axis,
|
|
Dtype,
|
|
DtypeObj,
|
|
FilePath,
|
|
FillnaOptions,
|
|
Frequency,
|
|
IgnoreRaise,
|
|
IndexKeyFunc,
|
|
IndexLabel,
|
|
Level,
|
|
NaPosition,
|
|
QuantileInterpolation,
|
|
Renamer,
|
|
SingleManager,
|
|
SortKind,
|
|
StorageOptions,
|
|
TimedeltaConvertibleTypes,
|
|
TimestampConvertibleTypes,
|
|
ValueKeyFunc,
|
|
WriteBuffer,
|
|
npt,
|
|
)
|
|
from pandas.compat.numpy import function as nv
|
|
from pandas.errors import InvalidIndexError
|
|
from pandas.util._decorators import (
|
|
Appender,
|
|
Substitution,
|
|
deprecate_kwarg,
|
|
deprecate_nonkeyword_arguments,
|
|
doc,
|
|
)
|
|
from pandas.util._exceptions import find_stack_level
|
|
from pandas.util._validators import (
|
|
validate_ascending,
|
|
validate_bool_kwarg,
|
|
validate_percentile,
|
|
)
|
|
|
|
from pandas.core.dtypes.cast import (
|
|
LossySetitemError,
|
|
convert_dtypes,
|
|
maybe_box_native,
|
|
maybe_cast_pointwise_result,
|
|
)
|
|
from pandas.core.dtypes.common import (
|
|
ensure_platform_int,
|
|
is_dict_like,
|
|
is_integer,
|
|
is_iterator,
|
|
is_list_like,
|
|
is_numeric_dtype,
|
|
is_object_dtype,
|
|
is_scalar,
|
|
pandas_dtype,
|
|
validate_all_hashable,
|
|
)
|
|
from pandas.core.dtypes.generic import ABCDataFrame
|
|
from pandas.core.dtypes.inference import is_hashable
|
|
from pandas.core.dtypes.missing import (
|
|
isna,
|
|
na_value_for_dtype,
|
|
notna,
|
|
remove_na_arraylike,
|
|
)
|
|
|
|
from pandas.core import (
|
|
algorithms,
|
|
base,
|
|
common as com,
|
|
missing,
|
|
nanops,
|
|
ops,
|
|
)
|
|
from pandas.core.accessor import CachedAccessor
|
|
from pandas.core.apply import SeriesApply
|
|
from pandas.core.arrays import ExtensionArray
|
|
from pandas.core.arrays.categorical import CategoricalAccessor
|
|
from pandas.core.arrays.sparse import SparseAccessor
|
|
from pandas.core.construction import (
|
|
create_series_with_explicit_dtype,
|
|
extract_array,
|
|
is_empty_data,
|
|
sanitize_array,
|
|
)
|
|
from pandas.core.generic import NDFrame
|
|
from pandas.core.indexers import (
|
|
deprecate_ndim_indexing,
|
|
unpack_1tuple,
|
|
)
|
|
from pandas.core.indexes.accessors import CombinedDatetimelikeProperties
|
|
from pandas.core.indexes.api import (
|
|
CategoricalIndex,
|
|
DatetimeIndex,
|
|
Float64Index,
|
|
Index,
|
|
MultiIndex,
|
|
PeriodIndex,
|
|
TimedeltaIndex,
|
|
default_index,
|
|
ensure_index,
|
|
)
|
|
import pandas.core.indexes.base as ibase
|
|
from pandas.core.indexing import (
|
|
check_bool_indexer,
|
|
check_deprecated_indexers,
|
|
)
|
|
from pandas.core.internals import (
|
|
SingleArrayManager,
|
|
SingleBlockManager,
|
|
)
|
|
from pandas.core.shared_docs import _shared_docs
|
|
from pandas.core.sorting import (
|
|
ensure_key_mapped,
|
|
nargsort,
|
|
)
|
|
from pandas.core.strings import StringMethods
|
|
from pandas.core.tools.datetimes import to_datetime
|
|
|
|
import pandas.io.formats.format as fmt
|
|
from pandas.io.formats.info import (
|
|
INFO_DOCSTRING,
|
|
SeriesInfo,
|
|
series_sub_kwargs,
|
|
)
|
|
import pandas.plotting
|
|
|
|
if TYPE_CHECKING:
|
|
from pandas._typing import (
|
|
NumpySorter,
|
|
NumpyValueArrayLike,
|
|
Suffixes,
|
|
)
|
|
|
|
from pandas.core.frame import DataFrame
|
|
from pandas.core.groupby.generic import SeriesGroupBy
|
|
from pandas.core.resample import Resampler
|
|
|
|
__all__ = ["Series"]
|
|
|
|
_shared_doc_kwargs = {
|
|
"axes": "index",
|
|
"klass": "Series",
|
|
"axes_single_arg": "{0 or 'index'}",
|
|
"axis": """axis : {0 or 'index'}
|
|
Unused. Parameter needed for compatibility with DataFrame.""",
|
|
"inplace": """inplace : bool, default False
|
|
If True, performs operation inplace and returns None.""",
|
|
"unique": "np.ndarray",
|
|
"duplicated": "Series",
|
|
"optional_by": "",
|
|
"optional_mapper": "",
|
|
"optional_labels": "",
|
|
"optional_axis": "",
|
|
"replace_iloc": """
|
|
This differs from updating with ``.loc`` or ``.iloc``, which require
|
|
you to specify a location to update with some value.""",
|
|
}
|
|
|
|
|
|
def _coerce_method(converter):
|
|
"""
|
|
Install the scalar coercion methods.
|
|
"""
|
|
|
|
def wrapper(self):
|
|
if len(self) == 1:
|
|
return converter(self.iloc[0])
|
|
raise TypeError(f"cannot convert the series to {converter}")
|
|
|
|
wrapper.__name__ = f"__{converter.__name__}__"
|
|
return wrapper
|
|
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Series class
|
|
|
|
|
|
class Series(base.IndexOpsMixin, NDFrame):
|
|
"""
|
|
One-dimensional ndarray with axis labels (including time series).
|
|
|
|
Labels need not be unique but must be a hashable type. The object
|
|
supports both integer- and label-based indexing and provides a host of
|
|
methods for performing operations involving the index. Statistical
|
|
methods from ndarray have been overridden to automatically exclude
|
|
missing data (currently represented as NaN).
|
|
|
|
Operations between Series (+, -, /, \\*, \\*\\*) align values based on their
|
|
associated index values-- they need not be the same length. The result
|
|
index will be the sorted union of the two indexes.
|
|
|
|
Parameters
|
|
----------
|
|
data : array-like, Iterable, dict, or scalar value
|
|
Contains data stored in Series. If data is a dict, argument order is
|
|
maintained.
|
|
index : array-like or Index (1d)
|
|
Values must be hashable and have the same length as `data`.
|
|
Non-unique index values are allowed. Will default to
|
|
RangeIndex (0, 1, 2, ..., n) if not provided. If data is dict-like
|
|
and index is None, then the keys in the data are used as the index. If the
|
|
index is not None, the resulting Series is reindexed with the index values.
|
|
dtype : str, numpy.dtype, or ExtensionDtype, optional
|
|
Data type for the output Series. If not specified, this will be
|
|
inferred from `data`.
|
|
See the :ref:`user guide <basics.dtypes>` for more usages.
|
|
name : str, optional
|
|
The name to give to the Series.
|
|
copy : bool, default False
|
|
Copy input data. Only affects Series or 1d ndarray input. See examples.
|
|
|
|
Notes
|
|
-----
|
|
Please reference the :ref:`User Guide <basics.series>` for more information.
|
|
|
|
Examples
|
|
--------
|
|
Constructing Series from a dictionary with an Index specified
|
|
|
|
>>> d = {'a': 1, 'b': 2, 'c': 3}
|
|
>>> ser = pd.Series(data=d, index=['a', 'b', 'c'])
|
|
>>> ser
|
|
a 1
|
|
b 2
|
|
c 3
|
|
dtype: int64
|
|
|
|
The keys of the dictionary match with the Index values, hence the Index
|
|
values have no effect.
|
|
|
|
>>> d = {'a': 1, 'b': 2, 'c': 3}
|
|
>>> ser = pd.Series(data=d, index=['x', 'y', 'z'])
|
|
>>> ser
|
|
x NaN
|
|
y NaN
|
|
z NaN
|
|
dtype: float64
|
|
|
|
Note that the Index is first build with the keys from the dictionary.
|
|
After this the Series is reindexed with the given Index values, hence we
|
|
get all NaN as a result.
|
|
|
|
Constructing Series from a list with `copy=False`.
|
|
|
|
>>> r = [1, 2]
|
|
>>> ser = pd.Series(r, copy=False)
|
|
>>> ser.iloc[0] = 999
|
|
>>> r
|
|
[1, 2]
|
|
>>> ser
|
|
0 999
|
|
1 2
|
|
dtype: int64
|
|
|
|
Due to input data type the Series has a `copy` of
|
|
the original data even though `copy=False`, so
|
|
the data is unchanged.
|
|
|
|
Constructing Series from a 1d ndarray with `copy=False`.
|
|
|
|
>>> r = np.array([1, 2])
|
|
>>> ser = pd.Series(r, copy=False)
|
|
>>> ser.iloc[0] = 999
|
|
>>> r
|
|
array([999, 2])
|
|
>>> ser
|
|
0 999
|
|
1 2
|
|
dtype: int64
|
|
|
|
Due to input data type the Series has a `view` on
|
|
the original data, so
|
|
the data is changed as well.
|
|
"""
|
|
|
|
_typ = "series"
|
|
_HANDLED_TYPES = (Index, ExtensionArray, np.ndarray)
|
|
|
|
_name: Hashable
|
|
_metadata: list[str] = ["name"]
|
|
_internal_names_set = {"index"} | NDFrame._internal_names_set
|
|
_accessors = {"dt", "cat", "str", "sparse"}
|
|
_hidden_attrs = (
|
|
base.IndexOpsMixin._hidden_attrs
|
|
| NDFrame._hidden_attrs
|
|
| frozenset(["compress", "ptp"])
|
|
)
|
|
|
|
# Override cache_readonly bc Series is mutable
|
|
# error: Incompatible types in assignment (expression has type "property",
|
|
# base class "IndexOpsMixin" defined the type as "Callable[[IndexOpsMixin], bool]")
|
|
hasnans = property( # type: ignore[assignment]
|
|
# error: "Callable[[IndexOpsMixin], bool]" has no attribute "fget"
|
|
base.IndexOpsMixin.hasnans.fget, # type: ignore[attr-defined]
|
|
doc=base.IndexOpsMixin.hasnans.__doc__,
|
|
)
|
|
_mgr: SingleManager
|
|
div: Callable[[Series, Any], Series]
|
|
rdiv: Callable[[Series, Any], Series]
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Constructors
|
|
|
|
def __init__(
|
|
self,
|
|
data=None,
|
|
index=None,
|
|
dtype: Dtype | None = None,
|
|
name=None,
|
|
copy: bool = False,
|
|
fastpath: bool = False,
|
|
) -> None:
|
|
|
|
if (
|
|
isinstance(data, (SingleBlockManager, SingleArrayManager))
|
|
and index is None
|
|
and dtype is None
|
|
and copy is False
|
|
):
|
|
# GH#33357 called with just the SingleBlockManager
|
|
NDFrame.__init__(self, data)
|
|
if fastpath:
|
|
# e.g. from _box_col_values, skip validation of name
|
|
object.__setattr__(self, "_name", name)
|
|
else:
|
|
self.name = name
|
|
return
|
|
|
|
# we are called internally, so short-circuit
|
|
if fastpath:
|
|
|
|
# data is an ndarray, index is defined
|
|
if not isinstance(data, (SingleBlockManager, SingleArrayManager)):
|
|
manager = get_option("mode.data_manager")
|
|
if manager == "block":
|
|
data = SingleBlockManager.from_array(data, index)
|
|
elif manager == "array":
|
|
data = SingleArrayManager.from_array(data, index)
|
|
if copy:
|
|
data = data.copy()
|
|
if index is None:
|
|
index = data.index
|
|
|
|
else:
|
|
|
|
name = ibase.maybe_extract_name(name, data, type(self))
|
|
|
|
if is_empty_data(data) and dtype is None:
|
|
# gh-17261
|
|
warnings.warn(
|
|
"The default dtype for empty Series will be 'object' instead "
|
|
"of 'float64' in a future version. Specify a dtype explicitly "
|
|
"to silence this warning.",
|
|
FutureWarning,
|
|
stacklevel=find_stack_level(),
|
|
)
|
|
# uncomment the line below when removing the FutureWarning
|
|
# dtype = np.dtype(object)
|
|
|
|
if index is not None:
|
|
index = ensure_index(index)
|
|
|
|
if data is None:
|
|
data = {}
|
|
if dtype is not None:
|
|
dtype = self._validate_dtype(dtype)
|
|
|
|
if isinstance(data, MultiIndex):
|
|
raise NotImplementedError(
|
|
"initializing a Series from a MultiIndex is not supported"
|
|
)
|
|
elif isinstance(data, Index):
|
|
|
|
if dtype is not None:
|
|
# astype copies
|
|
data = data.astype(dtype)
|
|
else:
|
|
# GH#24096 we need to ensure the index remains immutable
|
|
data = data._values.copy()
|
|
copy = False
|
|
|
|
elif isinstance(data, np.ndarray):
|
|
if len(data.dtype):
|
|
# GH#13296 we are dealing with a compound dtype, which
|
|
# should be treated as 2D
|
|
raise ValueError(
|
|
"Cannot construct a Series from an ndarray with "
|
|
"compound dtype. Use DataFrame instead."
|
|
)
|
|
elif isinstance(data, Series):
|
|
if index is None:
|
|
index = data.index
|
|
else:
|
|
data = data.reindex(index, copy=copy)
|
|
copy = False
|
|
data = data._mgr
|
|
elif is_dict_like(data):
|
|
data, index = self._init_dict(data, index, dtype)
|
|
dtype = None
|
|
copy = False
|
|
elif isinstance(data, (SingleBlockManager, SingleArrayManager)):
|
|
if index is None:
|
|
index = data.index
|
|
elif not data.index.equals(index) or copy:
|
|
# GH#19275 SingleBlockManager input should only be called
|
|
# internally
|
|
raise AssertionError(
|
|
"Cannot pass both SingleBlockManager "
|
|
"`data` argument and a different "
|
|
"`index` argument. `copy` must be False."
|
|
)
|
|
|
|
elif isinstance(data, ExtensionArray):
|
|
pass
|
|
else:
|
|
data = com.maybe_iterable_to_list(data)
|
|
|
|
if index is None:
|
|
if not is_list_like(data):
|
|
data = [data]
|
|
index = default_index(len(data))
|
|
elif is_list_like(data):
|
|
com.require_length_match(data, index)
|
|
|
|
# create/copy the manager
|
|
if isinstance(data, (SingleBlockManager, SingleArrayManager)):
|
|
if dtype is not None:
|
|
data = data.astype(dtype=dtype, errors="ignore", copy=copy)
|
|
elif copy:
|
|
data = data.copy()
|
|
else:
|
|
data = sanitize_array(data, index, dtype, copy)
|
|
|
|
manager = get_option("mode.data_manager")
|
|
if manager == "block":
|
|
data = SingleBlockManager.from_array(data, index)
|
|
elif manager == "array":
|
|
data = SingleArrayManager.from_array(data, index)
|
|
|
|
NDFrame.__init__(self, data)
|
|
if fastpath:
|
|
# skips validation of the name
|
|
object.__setattr__(self, "_name", name)
|
|
else:
|
|
self.name = name
|
|
self._set_axis(0, index)
|
|
|
|
def _init_dict(
|
|
self, data, index: Index | None = None, dtype: DtypeObj | None = None
|
|
):
|
|
"""
|
|
Derive the "_mgr" and "index" attributes of a new Series from a
|
|
dictionary input.
|
|
|
|
Parameters
|
|
----------
|
|
data : dict or dict-like
|
|
Data used to populate the new Series.
|
|
index : Index or None, default None
|
|
Index for the new Series: if None, use dict keys.
|
|
dtype : np.dtype, ExtensionDtype, or None, default None
|
|
The dtype for the new Series: if None, infer from data.
|
|
|
|
Returns
|
|
-------
|
|
_data : BlockManager for the new Series
|
|
index : index for the new Series
|
|
"""
|
|
keys: Index | tuple
|
|
|
|
# Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')]
|
|
# raises KeyError), so we iterate the entire dict, and align
|
|
if data:
|
|
# GH:34717, issue was using zip to extract key and values from data.
|
|
# using generators in effects the performance.
|
|
# Below is the new way of extracting the keys and values
|
|
|
|
keys = tuple(data.keys())
|
|
values = list(data.values()) # Generating list of values- faster way
|
|
elif index is not None:
|
|
# fastpath for Series(data=None). Just use broadcasting a scalar
|
|
# instead of reindexing.
|
|
values = na_value_for_dtype(pandas_dtype(dtype), compat=False)
|
|
keys = index
|
|
else:
|
|
keys, values = (), []
|
|
|
|
# Input is now list-like, so rely on "standard" construction:
|
|
|
|
# TODO: passing np.float64 to not break anything yet. See GH-17261
|
|
s = create_series_with_explicit_dtype(
|
|
# error: Argument "index" to "create_series_with_explicit_dtype" has
|
|
# incompatible type "Tuple[Any, ...]"; expected "Union[ExtensionArray,
|
|
# ndarray, Index, None]"
|
|
values,
|
|
index=keys, # type: ignore[arg-type]
|
|
dtype=dtype,
|
|
dtype_if_empty=np.float64,
|
|
)
|
|
|
|
# Now we just make sure the order is respected, if any
|
|
if data and index is not None:
|
|
s = s.reindex(index, copy=False)
|
|
return s._mgr, s.index
|
|
|
|
# ----------------------------------------------------------------------
|
|
|
|
@property
|
|
def _constructor(self) -> Callable[..., Series]:
|
|
return Series
|
|
|
|
@property
|
|
def _constructor_expanddim(self) -> Callable[..., DataFrame]:
|
|
"""
|
|
Used when a manipulation result has one higher dimension as the
|
|
original, such as Series.to_frame()
|
|
"""
|
|
from pandas.core.frame import DataFrame
|
|
|
|
return DataFrame
|
|
|
|
# types
|
|
@property
|
|
def _can_hold_na(self) -> bool:
|
|
return self._mgr._can_hold_na
|
|
|
|
def _set_axis(self, axis: int, labels: AnyArrayLike | list) -> None:
|
|
"""
|
|
Override generic, we want to set the _typ here.
|
|
|
|
This is called from the cython code when we set the `index` attribute
|
|
directly, e.g. `series.index = [1, 2, 3]`.
|
|
"""
|
|
labels = ensure_index(labels)
|
|
|
|
if labels._is_all_dates and not (
|
|
type(labels) is Index and not isinstance(labels.dtype, np.dtype)
|
|
):
|
|
# exclude e.g. timestamp[ns][pyarrow] dtype from this casting
|
|
deep_labels = labels
|
|
if isinstance(labels, CategoricalIndex):
|
|
deep_labels = labels.categories
|
|
|
|
if not isinstance(
|
|
deep_labels, (DatetimeIndex, PeriodIndex, TimedeltaIndex)
|
|
):
|
|
try:
|
|
labels = DatetimeIndex(labels)
|
|
except (tslibs.OutOfBoundsDatetime, ValueError):
|
|
# labels may exceeds datetime bounds,
|
|
# or not be a DatetimeIndex
|
|
pass
|
|
|
|
# The ensure_index call above ensures we have an Index object
|
|
self._mgr.set_axis(axis, labels)
|
|
|
|
# ndarray compatibility
|
|
@property
|
|
def dtype(self) -> DtypeObj:
|
|
"""
|
|
Return the dtype object of the underlying data.
|
|
"""
|
|
return self._mgr.dtype
|
|
|
|
@property
|
|
def dtypes(self) -> DtypeObj:
|
|
"""
|
|
Return the dtype object of the underlying data.
|
|
"""
|
|
# DataFrame compatibility
|
|
return self.dtype
|
|
|
|
@property
|
|
def name(self) -> Hashable:
|
|
"""
|
|
Return the name of the Series.
|
|
|
|
The name of a Series becomes its index or column name if it is used
|
|
to form a DataFrame. It is also used whenever displaying the Series
|
|
using the interpreter.
|
|
|
|
Returns
|
|
-------
|
|
label (hashable object)
|
|
The name of the Series, also the column name if part of a DataFrame.
|
|
|
|
See Also
|
|
--------
|
|
Series.rename : Sets the Series name when given a scalar input.
|
|
Index.name : Corresponding Index property.
|
|
|
|
Examples
|
|
--------
|
|
The Series name can be set initially when calling the constructor.
|
|
|
|
>>> s = pd.Series([1, 2, 3], dtype=np.int64, name='Numbers')
|
|
>>> s
|
|
0 1
|
|
1 2
|
|
2 3
|
|
Name: Numbers, dtype: int64
|
|
>>> s.name = "Integers"
|
|
>>> s
|
|
0 1
|
|
1 2
|
|
2 3
|
|
Name: Integers, dtype: int64
|
|
|
|
The name of a Series within a DataFrame is its column name.
|
|
|
|
>>> df = pd.DataFrame([[1, 2], [3, 4], [5, 6]],
|
|
... columns=["Odd Numbers", "Even Numbers"])
|
|
>>> df
|
|
Odd Numbers Even Numbers
|
|
0 1 2
|
|
1 3 4
|
|
2 5 6
|
|
>>> df["Even Numbers"].name
|
|
'Even Numbers'
|
|
"""
|
|
return self._name
|
|
|
|
@name.setter
|
|
def name(self, value: Hashable) -> None:
|
|
validate_all_hashable(value, error_name=f"{type(self).__name__}.name")
|
|
object.__setattr__(self, "_name", value)
|
|
|
|
@property
|
|
def values(self):
|
|
"""
|
|
Return Series as ndarray or ndarray-like depending on the dtype.
|
|
|
|
.. warning::
|
|
|
|
We recommend using :attr:`Series.array` or
|
|
:meth:`Series.to_numpy`, depending on whether you need
|
|
a reference to the underlying data or a NumPy array.
|
|
|
|
Returns
|
|
-------
|
|
numpy.ndarray or ndarray-like
|
|
|
|
See Also
|
|
--------
|
|
Series.array : Reference to the underlying data.
|
|
Series.to_numpy : A NumPy array representing the underlying data.
|
|
|
|
Examples
|
|
--------
|
|
>>> pd.Series([1, 2, 3]).values
|
|
array([1, 2, 3])
|
|
|
|
>>> pd.Series(list('aabc')).values
|
|
array(['a', 'a', 'b', 'c'], dtype=object)
|
|
|
|
>>> pd.Series(list('aabc')).astype('category').values
|
|
['a', 'a', 'b', 'c']
|
|
Categories (3, object): ['a', 'b', 'c']
|
|
|
|
Timezone aware datetime data is converted to UTC:
|
|
|
|
>>> pd.Series(pd.date_range('20130101', periods=3,
|
|
... tz='US/Eastern')).values
|
|
array(['2013-01-01T05:00:00.000000000',
|
|
'2013-01-02T05:00:00.000000000',
|
|
'2013-01-03T05:00:00.000000000'], dtype='datetime64[ns]')
|
|
"""
|
|
return self._mgr.external_values()
|
|
|
|
@property
|
|
def _values(self):
|
|
"""
|
|
Return the internal repr of this data (defined by Block.interval_values).
|
|
This are the values as stored in the Block (ndarray or ExtensionArray
|
|
depending on the Block class), with datetime64[ns] and timedelta64[ns]
|
|
wrapped in ExtensionArrays to match Index._values behavior.
|
|
|
|
Differs from the public ``.values`` for certain data types, because of
|
|
historical backwards compatibility of the public attribute (e.g. period
|
|
returns object ndarray and datetimetz a datetime64[ns] ndarray for
|
|
``.values`` while it returns an ExtensionArray for ``._values`` in those
|
|
cases).
|
|
|
|
Differs from ``.array`` in that this still returns the numpy array if
|
|
the Block is backed by a numpy array (except for datetime64 and
|
|
timedelta64 dtypes), while ``.array`` ensures to always return an
|
|
ExtensionArray.
|
|
|
|
Overview:
|
|
|
|
dtype | values | _values | array |
|
|
----------- | ------------- | ------------- | ------------- |
|
|
Numeric | ndarray | ndarray | PandasArray |
|
|
Category | Categorical | Categorical | Categorical |
|
|
dt64[ns] | ndarray[M8ns] | DatetimeArray | DatetimeArray |
|
|
dt64[ns tz] | ndarray[M8ns] | DatetimeArray | DatetimeArray |
|
|
td64[ns] | ndarray[m8ns] | TimedeltaArray| ndarray[m8ns] |
|
|
Period | ndarray[obj] | PeriodArray | PeriodArray |
|
|
Nullable | EA | EA | EA |
|
|
|
|
"""
|
|
return self._mgr.internal_values()
|
|
|
|
# error: Decorated property not supported
|
|
@Appender(base.IndexOpsMixin.array.__doc__) # type: ignore[misc]
|
|
@property
|
|
def array(self) -> ExtensionArray:
|
|
return self._mgr.array_values()
|
|
|
|
# ops
|
|
def ravel(self, order: str = "C") -> np.ndarray:
|
|
"""
|
|
Return the flattened underlying data as an ndarray.
|
|
|
|
Returns
|
|
-------
|
|
numpy.ndarray or ndarray-like
|
|
Flattened data of the Series.
|
|
|
|
See Also
|
|
--------
|
|
numpy.ndarray.ravel : Return a flattened array.
|
|
"""
|
|
return self._values.ravel(order=order)
|
|
|
|
def __len__(self) -> int:
|
|
"""
|
|
Return the length of the Series.
|
|
"""
|
|
return len(self._mgr)
|
|
|
|
def view(self, dtype: Dtype | None = None) -> Series:
|
|
"""
|
|
Create a new view of the Series.
|
|
|
|
This function will return a new Series with a view of the same
|
|
underlying values in memory, optionally reinterpreted with a new data
|
|
type. The new data type must preserve the same size in bytes as to not
|
|
cause index misalignment.
|
|
|
|
Parameters
|
|
----------
|
|
dtype : data type
|
|
Data type object or one of their string representations.
|
|
|
|
Returns
|
|
-------
|
|
Series
|
|
A new Series object as a view of the same data in memory.
|
|
|
|
See Also
|
|
--------
|
|
numpy.ndarray.view : Equivalent numpy function to create a new view of
|
|
the same data in memory.
|
|
|
|
Notes
|
|
-----
|
|
Series are instantiated with ``dtype=float64`` by default. While
|
|
``numpy.ndarray.view()`` will return a view with the same data type as
|
|
the original array, ``Series.view()`` (without specified dtype)
|
|
will try using ``float64`` and may fail if the original data type size
|
|
in bytes is not the same.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pd.Series([-2, -1, 0, 1, 2], dtype='int8')
|
|
>>> s
|
|
0 -2
|
|
1 -1
|
|
2 0
|
|
3 1
|
|
4 2
|
|
dtype: int8
|
|
|
|
The 8 bit signed integer representation of `-1` is `0b11111111`, but
|
|
the same bytes represent 255 if read as an 8 bit unsigned integer:
|
|
|
|
>>> us = s.view('uint8')
|
|
>>> us
|
|
0 254
|
|
1 255
|
|
2 0
|
|
3 1
|
|
4 2
|
|
dtype: uint8
|
|
|
|
The views share the same underlying values:
|
|
|
|
>>> us[0] = 128
|
|
>>> s
|
|
0 -128
|
|
1 -1
|
|
2 0
|
|
3 1
|
|
4 2
|
|
dtype: int8
|
|
"""
|
|
# self.array instead of self._values so we piggyback on PandasArray
|
|
# implementation
|
|
res_values = self.array.view(dtype)
|
|
res_ser = self._constructor(res_values, index=self.index)
|
|
return res_ser.__finalize__(self, method="view")
|
|
|
|
# ----------------------------------------------------------------------
|
|
# NDArray Compat
|
|
_HANDLED_TYPES = (Index, ExtensionArray, np.ndarray)
|
|
|
|
def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray:
|
|
"""
|
|
Return the values as a NumPy array.
|
|
|
|
Users should not call this directly. Rather, it is invoked by
|
|
:func:`numpy.array` and :func:`numpy.asarray`.
|
|
|
|
Parameters
|
|
----------
|
|
dtype : str or numpy.dtype, optional
|
|
The dtype to use for the resulting NumPy array. By default,
|
|
the dtype is inferred from the data.
|
|
|
|
Returns
|
|
-------
|
|
numpy.ndarray
|
|
The values in the series converted to a :class:`numpy.ndarray`
|
|
with the specified `dtype`.
|
|
|
|
See Also
|
|
--------
|
|
array : Create a new array from data.
|
|
Series.array : Zero-copy view to the array backing the Series.
|
|
Series.to_numpy : Series method for similar behavior.
|
|
|
|
Examples
|
|
--------
|
|
>>> ser = pd.Series([1, 2, 3])
|
|
>>> np.asarray(ser)
|
|
array([1, 2, 3])
|
|
|
|
For timezone-aware data, the timezones may be retained with
|
|
``dtype='object'``
|
|
|
|
>>> tzser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))
|
|
>>> np.asarray(tzser, dtype="object")
|
|
array([Timestamp('2000-01-01 00:00:00+0100', tz='CET'),
|
|
Timestamp('2000-01-02 00:00:00+0100', tz='CET')],
|
|
dtype=object)
|
|
|
|
Or the values may be localized to UTC and the tzinfo discarded with
|
|
``dtype='datetime64[ns]'``
|
|
|
|
>>> np.asarray(tzser, dtype="datetime64[ns]") # doctest: +ELLIPSIS
|
|
array(['1999-12-31T23:00:00.000000000', ...],
|
|
dtype='datetime64[ns]')
|
|
"""
|
|
return np.asarray(self._values, dtype)
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Unary Methods
|
|
|
|
# coercion
|
|
__float__ = _coerce_method(float)
|
|
__long__ = _coerce_method(int)
|
|
__int__ = _coerce_method(int)
|
|
|
|
# ----------------------------------------------------------------------
|
|
|
|
# indexers
|
|
@property
|
|
def axes(self) -> list[Index]:
|
|
"""
|
|
Return a list of the row axis labels.
|
|
"""
|
|
return [self.index]
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Indexing Methods
|
|
|
|
@Appender(NDFrame.take.__doc__)
|
|
def take(
|
|
self, indices, axis: Axis = 0, is_copy: bool | None = None, **kwargs
|
|
) -> Series:
|
|
if is_copy is not None:
|
|
warnings.warn(
|
|
"is_copy is deprecated and will be removed in a future version. "
|
|
"'take' always returns a copy, so there is no need to specify this.",
|
|
FutureWarning,
|
|
stacklevel=find_stack_level(),
|
|
)
|
|
nv.validate_take((), kwargs)
|
|
|
|
indices = ensure_platform_int(indices)
|
|
new_index = self.index.take(indices)
|
|
new_values = self._values.take(indices)
|
|
|
|
result = self._constructor(new_values, index=new_index, fastpath=True)
|
|
return result.__finalize__(self, method="take")
|
|
|
|
def _take_with_is_copy(self, indices, axis=0) -> Series:
|
|
"""
|
|
Internal version of the `take` method that sets the `_is_copy`
|
|
attribute to keep track of the parent dataframe (using in indexing
|
|
for the SettingWithCopyWarning). For Series this does the same
|
|
as the public take (it never sets `_is_copy`).
|
|
|
|
See the docstring of `take` for full explanation of the parameters.
|
|
"""
|
|
return self.take(indices=indices, axis=axis)
|
|
|
|
def _ixs(self, i: int, axis: int = 0) -> Any:
|
|
"""
|
|
Return the i-th value or values in the Series by location.
|
|
|
|
Parameters
|
|
----------
|
|
i : int
|
|
|
|
Returns
|
|
-------
|
|
scalar (int) or Series (slice, sequence)
|
|
"""
|
|
return self._values[i]
|
|
|
|
def _slice(self, slobj: slice, axis: int = 0) -> Series:
|
|
# axis kwarg is retained for compat with NDFrame method
|
|
# _slice is *always* positional
|
|
return self._get_values(slobj)
|
|
|
|
def __getitem__(self, key):
|
|
check_deprecated_indexers(key)
|
|
key = com.apply_if_callable(key, self)
|
|
|
|
if key is Ellipsis:
|
|
return self
|
|
|
|
key_is_scalar = is_scalar(key)
|
|
if isinstance(key, (list, tuple)):
|
|
key = unpack_1tuple(key)
|
|
|
|
if is_integer(key) and self.index._should_fallback_to_positional:
|
|
return self._values[key]
|
|
|
|
elif key_is_scalar:
|
|
return self._get_value(key)
|
|
|
|
if is_hashable(key):
|
|
# Otherwise index.get_value will raise InvalidIndexError
|
|
try:
|
|
# For labels that don't resolve as scalars like tuples and frozensets
|
|
result = self._get_value(key)
|
|
|
|
return result
|
|
|
|
except (KeyError, TypeError, InvalidIndexError):
|
|
# InvalidIndexError for e.g. generator
|
|
# see test_series_getitem_corner_generator
|
|
if isinstance(key, tuple) and isinstance(self.index, MultiIndex):
|
|
# We still have the corner case where a tuple is a key
|
|
# in the first level of our MultiIndex
|
|
return self._get_values_tuple(key)
|
|
|
|
if is_iterator(key):
|
|
key = list(key)
|
|
|
|
if com.is_bool_indexer(key):
|
|
key = check_bool_indexer(self.index, key)
|
|
key = np.asarray(key, dtype=bool)
|
|
return self._get_values(key)
|
|
|
|
return self._get_with(key)
|
|
|
|
def _get_with(self, key):
|
|
# other: fancy integer or otherwise
|
|
if isinstance(key, slice):
|
|
# _convert_slice_indexer to determine if this slice is positional
|
|
# or label based, and if the latter, convert to positional
|
|
slobj = self.index._convert_slice_indexer(key, kind="getitem")
|
|
return self._slice(slobj)
|
|
elif isinstance(key, ABCDataFrame):
|
|
raise TypeError(
|
|
"Indexing a Series with DataFrame is not "
|
|
"supported, use the appropriate DataFrame column"
|
|
)
|
|
elif isinstance(key, tuple):
|
|
return self._get_values_tuple(key)
|
|
|
|
elif not is_list_like(key):
|
|
# e.g. scalars that aren't recognized by lib.is_scalar, GH#32684
|
|
return self.loc[key]
|
|
|
|
if not isinstance(key, (list, np.ndarray, ExtensionArray, Series, Index)):
|
|
key = list(key)
|
|
|
|
if isinstance(key, Index):
|
|
key_type = key.inferred_type
|
|
else:
|
|
key_type = lib.infer_dtype(key, skipna=False)
|
|
|
|
# Note: The key_type == "boolean" case should be caught by the
|
|
# com.is_bool_indexer check in __getitem__
|
|
if key_type == "integer":
|
|
# We need to decide whether to treat this as a positional indexer
|
|
# (i.e. self.iloc) or label-based (i.e. self.loc)
|
|
if not self.index._should_fallback_to_positional:
|
|
return self.loc[key]
|
|
else:
|
|
return self.iloc[key]
|
|
|
|
# handle the dup indexing case GH#4246
|
|
return self.loc[key]
|
|
|
|
def _get_values_tuple(self, key: tuple):
|
|
# mpl hackaround
|
|
if com.any_none(*key):
|
|
# mpl compat if we look up e.g. ser[:, np.newaxis];
|
|
# see tests.series.timeseries.test_mpl_compat_hack
|
|
# the asarray is needed to avoid returning a 2D DatetimeArray
|
|
result = np.asarray(self._values[key])
|
|
deprecate_ndim_indexing(result, stacklevel=find_stack_level())
|
|
return result
|
|
|
|
if not isinstance(self.index, MultiIndex):
|
|
raise KeyError("key of type tuple not found and not a MultiIndex")
|
|
|
|
# If key is contained, would have returned by now
|
|
indexer, new_index = self.index.get_loc_level(key)
|
|
return self._constructor(self._values[indexer], index=new_index).__finalize__(
|
|
self
|
|
)
|
|
|
|
def _get_values(self, indexer: slice | npt.NDArray[np.bool_]) -> Series:
|
|
new_mgr = self._mgr.getitem_mgr(indexer)
|
|
return self._constructor(new_mgr).__finalize__(self)
|
|
|
|
def _get_value(self, label, takeable: bool = False):
|
|
"""
|
|
Quickly retrieve single value at passed index label.
|
|
|
|
Parameters
|
|
----------
|
|
label : object
|
|
takeable : interpret the index as indexers, default False
|
|
|
|
Returns
|
|
-------
|
|
scalar value
|
|
"""
|
|
if takeable:
|
|
return self._values[label]
|
|
|
|
# Similar to Index.get_value, but we do not fall back to positional
|
|
loc = self.index.get_loc(label)
|
|
return self.index._get_values_for_loc(self, loc, label)
|
|
|
|
def __setitem__(self, key, value) -> None:
|
|
check_deprecated_indexers(key)
|
|
key = com.apply_if_callable(key, self)
|
|
cacher_needs_updating = self._check_is_chained_assignment_possible()
|
|
|
|
if key is Ellipsis:
|
|
key = slice(None)
|
|
|
|
if isinstance(key, slice):
|
|
indexer = self.index._convert_slice_indexer(key, kind="getitem")
|
|
return self._set_values(indexer, value)
|
|
|
|
try:
|
|
self._set_with_engine(key, value)
|
|
except KeyError:
|
|
# We have a scalar (or for MultiIndex or object-dtype, scalar-like)
|
|
# key that is not present in self.index.
|
|
if is_integer(key) and self.index.inferred_type != "integer":
|
|
# positional setter
|
|
if not self.index._should_fallback_to_positional:
|
|
# GH#33469
|
|
warnings.warn(
|
|
"Treating integers as positional in Series.__setitem__ "
|
|
"with a Float64Index is deprecated. In a future version, "
|
|
"`series[an_int] = val` will insert a new key into the "
|
|
"Series. Use `series.iloc[an_int] = val` to treat the "
|
|
"key as positional.",
|
|
FutureWarning,
|
|
stacklevel=find_stack_level(),
|
|
)
|
|
# can't use _mgr.setitem_inplace yet bc could have *both*
|
|
# KeyError and then ValueError, xref GH#45070
|
|
self._set_values(key, value)
|
|
else:
|
|
# GH#12862 adding a new key to the Series
|
|
self.loc[key] = value
|
|
|
|
except (TypeError, ValueError, LossySetitemError):
|
|
# The key was OK, but we cannot set the value losslessly
|
|
indexer = self.index.get_loc(key)
|
|
self._set_values(indexer, value)
|
|
|
|
except InvalidIndexError as err:
|
|
if isinstance(key, tuple) and not isinstance(self.index, MultiIndex):
|
|
# cases with MultiIndex don't get here bc they raise KeyError
|
|
# e.g. test_basic_getitem_setitem_corner
|
|
raise KeyError(
|
|
"key of type tuple not found and not a MultiIndex"
|
|
) from err
|
|
|
|
if com.is_bool_indexer(key):
|
|
key = check_bool_indexer(self.index, key)
|
|
key = np.asarray(key, dtype=bool)
|
|
|
|
if (
|
|
is_list_like(value)
|
|
and len(value) != len(self)
|
|
and not isinstance(value, Series)
|
|
and not is_object_dtype(self.dtype)
|
|
):
|
|
# Series will be reindexed to have matching length inside
|
|
# _where call below
|
|
# GH#44265
|
|
indexer = key.nonzero()[0]
|
|
self._set_values(indexer, value)
|
|
return
|
|
|
|
# otherwise with listlike other we interpret series[mask] = other
|
|
# as series[mask] = other[mask]
|
|
try:
|
|
self._where(~key, value, inplace=True)
|
|
except InvalidIndexError:
|
|
# test_where_dups
|
|
self.iloc[key] = value
|
|
return
|
|
|
|
else:
|
|
self._set_with(key, value)
|
|
|
|
if cacher_needs_updating:
|
|
self._maybe_update_cacher(inplace=True)
|
|
|
|
def _set_with_engine(self, key, value) -> None:
|
|
loc = self.index.get_loc(key)
|
|
|
|
# this is equivalent to self._values[key] = value
|
|
self._mgr.setitem_inplace(loc, value)
|
|
|
|
def _set_with(self, key, value):
|
|
# We got here via exception-handling off of InvalidIndexError, so
|
|
# key should always be listlike at this point.
|
|
assert not isinstance(key, tuple)
|
|
|
|
if is_iterator(key):
|
|
# Without this, the call to infer_dtype will consume the generator
|
|
key = list(key)
|
|
|
|
if not self.index._should_fallback_to_positional:
|
|
# Regardless of the key type, we're treating it as labels
|
|
self._set_labels(key, value)
|
|
|
|
else:
|
|
# Note: key_type == "boolean" should not occur because that
|
|
# should be caught by the is_bool_indexer check in __setitem__
|
|
key_type = lib.infer_dtype(key, skipna=False)
|
|
|
|
if key_type == "integer":
|
|
self._set_values(key, value)
|
|
else:
|
|
self._set_labels(key, value)
|
|
|
|
def _set_labels(self, key, value) -> None:
|
|
key = com.asarray_tuplesafe(key)
|
|
indexer: np.ndarray = self.index.get_indexer(key)
|
|
mask = indexer == -1
|
|
if mask.any():
|
|
raise KeyError(f"{key[mask]} not in index")
|
|
self._set_values(indexer, value)
|
|
|
|
def _set_values(self, key, value) -> None:
|
|
if isinstance(key, (Index, Series)):
|
|
key = key._values
|
|
|
|
self._mgr = self._mgr.setitem(indexer=key, value=value)
|
|
self._maybe_update_cacher()
|
|
|
|
def _set_value(self, label, value, takeable: bool = False):
|
|
"""
|
|
Quickly set single value at passed label.
|
|
|
|
If label is not contained, a new object is created with the label
|
|
placed at the end of the result index.
|
|
|
|
Parameters
|
|
----------
|
|
label : object
|
|
Partial indexing with MultiIndex not allowed.
|
|
value : object
|
|
Scalar value.
|
|
takeable : interpret the index as indexers, default False
|
|
"""
|
|
if not takeable:
|
|
try:
|
|
loc = self.index.get_loc(label)
|
|
except KeyError:
|
|
# set using a non-recursive method
|
|
self.loc[label] = value
|
|
return
|
|
else:
|
|
loc = label
|
|
|
|
self._set_values(loc, value)
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Lookup Caching
|
|
|
|
@property
|
|
def _is_cached(self) -> bool:
|
|
"""Return boolean indicating if self is cached or not."""
|
|
return getattr(self, "_cacher", None) is not None
|
|
|
|
def _get_cacher(self):
|
|
"""return my cacher or None"""
|
|
cacher = getattr(self, "_cacher", None)
|
|
if cacher is not None:
|
|
cacher = cacher[1]()
|
|
return cacher
|
|
|
|
def _reset_cacher(self) -> None:
|
|
"""
|
|
Reset the cacher.
|
|
"""
|
|
if hasattr(self, "_cacher"):
|
|
del self._cacher
|
|
|
|
def _set_as_cached(self, item, cacher) -> None:
|
|
"""
|
|
Set the _cacher attribute on the calling object with a weakref to
|
|
cacher.
|
|
"""
|
|
self._cacher = (item, weakref.ref(cacher))
|
|
|
|
def _clear_item_cache(self) -> None:
|
|
# no-op for Series
|
|
pass
|
|
|
|
def _check_is_chained_assignment_possible(self) -> bool:
|
|
"""
|
|
See NDFrame._check_is_chained_assignment_possible.__doc__
|
|
"""
|
|
if self._is_view and self._is_cached:
|
|
ref = self._get_cacher()
|
|
if ref is not None and ref._is_mixed_type:
|
|
self._check_setitem_copy(t="referent", force=True)
|
|
return True
|
|
return super()._check_is_chained_assignment_possible()
|
|
|
|
def _maybe_update_cacher(
|
|
self, clear: bool = False, verify_is_copy: bool = True, inplace: bool = False
|
|
) -> None:
|
|
"""
|
|
See NDFrame._maybe_update_cacher.__doc__
|
|
"""
|
|
cacher = getattr(self, "_cacher", None)
|
|
if cacher is not None:
|
|
assert self.ndim == 1
|
|
ref: DataFrame = cacher[1]()
|
|
|
|
# we are trying to reference a dead referent, hence
|
|
# a copy
|
|
if ref is None:
|
|
del self._cacher
|
|
# for CoW, we never want to update the parent DataFrame cache
|
|
# if the Series changed, and always pop the cached item
|
|
elif (
|
|
not (
|
|
get_option("mode.copy_on_write")
|
|
and get_option("mode.data_manager") == "block"
|
|
)
|
|
and len(self) == len(ref)
|
|
and self.name in ref.columns
|
|
):
|
|
# GH#42530 self.name must be in ref.columns
|
|
# to ensure column still in dataframe
|
|
# otherwise, either self or ref has swapped in new arrays
|
|
ref._maybe_cache_changed(cacher[0], self, inplace=inplace)
|
|
else:
|
|
# GH#33675 we have swapped in a new array, so parent
|
|
# reference to self is now invalid
|
|
ref._item_cache.pop(cacher[0], None)
|
|
|
|
super()._maybe_update_cacher(
|
|
clear=clear, verify_is_copy=verify_is_copy, inplace=inplace
|
|
)
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Unsorted
|
|
|
|
@property
|
|
def _is_mixed_type(self):
|
|
return False
|
|
|
|
def repeat(self, repeats: int | Sequence[int], axis: None = None) -> Series:
|
|
"""
|
|
Repeat elements of a Series.
|
|
|
|
Returns a new Series where each element of the current Series
|
|
is repeated consecutively a given number of times.
|
|
|
|
Parameters
|
|
----------
|
|
repeats : int or array of ints
|
|
The number of repetitions for each element. This should be a
|
|
non-negative integer. Repeating 0 times will return an empty
|
|
Series.
|
|
axis : None
|
|
Unused. Parameter needed for compatibility with DataFrame.
|
|
|
|
Returns
|
|
-------
|
|
Series
|
|
Newly created Series with repeated elements.
|
|
|
|
See Also
|
|
--------
|
|
Index.repeat : Equivalent function for Index.
|
|
numpy.repeat : Similar method for :class:`numpy.ndarray`.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pd.Series(['a', 'b', 'c'])
|
|
>>> s
|
|
0 a
|
|
1 b
|
|
2 c
|
|
dtype: object
|
|
>>> s.repeat(2)
|
|
0 a
|
|
0 a
|
|
1 b
|
|
1 b
|
|
2 c
|
|
2 c
|
|
dtype: object
|
|
>>> s.repeat([1, 2, 3])
|
|
0 a
|
|
1 b
|
|
1 b
|
|
2 c
|
|
2 c
|
|
2 c
|
|
dtype: object
|
|
"""
|
|
nv.validate_repeat((), {"axis": axis})
|
|
new_index = self.index.repeat(repeats)
|
|
new_values = self._values.repeat(repeats)
|
|
return self._constructor(new_values, index=new_index).__finalize__(
|
|
self, method="repeat"
|
|
)
|
|
|
|
@overload
|
|
def reset_index(
|
|
self,
|
|
level: IndexLabel = ...,
|
|
*,
|
|
drop: Literal[False] = ...,
|
|
name: Level = ...,
|
|
inplace: Literal[False] = ...,
|
|
allow_duplicates: bool = ...,
|
|
) -> DataFrame:
|
|
...
|
|
|
|
@overload
|
|
def reset_index(
|
|
self,
|
|
level: IndexLabel = ...,
|
|
*,
|
|
drop: Literal[True],
|
|
name: Level = ...,
|
|
inplace: Literal[False] = ...,
|
|
allow_duplicates: bool = ...,
|
|
) -> Series:
|
|
...
|
|
|
|
@overload
|
|
def reset_index(
|
|
self,
|
|
level: IndexLabel = ...,
|
|
*,
|
|
drop: bool = ...,
|
|
name: Level = ...,
|
|
inplace: Literal[True],
|
|
allow_duplicates: bool = ...,
|
|
) -> None:
|
|
...
|
|
|
|
@deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "level"])
|
|
def reset_index(
|
|
self,
|
|
level: IndexLabel = None,
|
|
drop: bool = False,
|
|
name: Level = lib.no_default,
|
|
inplace: bool = False,
|
|
allow_duplicates: bool = False,
|
|
) -> DataFrame | Series | None:
|
|
"""
|
|
Generate a new DataFrame or Series with the index reset.
|
|
|
|
This is useful when the index needs to be treated as a column, or
|
|
when the index is meaningless and needs to be reset to the default
|
|
before another operation.
|
|
|
|
Parameters
|
|
----------
|
|
level : int, str, tuple, or list, default optional
|
|
For a Series with a MultiIndex, only remove the specified levels
|
|
from the index. Removes all levels by default.
|
|
drop : bool, default False
|
|
Just reset the index, without inserting it as a column in
|
|
the new DataFrame.
|
|
name : object, optional
|
|
The name to use for the column containing the original Series
|
|
values. Uses ``self.name`` by default. This argument is ignored
|
|
when `drop` is True.
|
|
inplace : bool, default False
|
|
Modify the Series in place (do not create a new object).
|
|
allow_duplicates : bool, default False
|
|
Allow duplicate column labels to be created.
|
|
|
|
.. versionadded:: 1.5.0
|
|
|
|
Returns
|
|
-------
|
|
Series or DataFrame or None
|
|
When `drop` is False (the default), a DataFrame is returned.
|
|
The newly created columns will come first in the DataFrame,
|
|
followed by the original Series values.
|
|
When `drop` is True, a `Series` is returned.
|
|
In either case, if ``inplace=True``, no value is returned.
|
|
|
|
See Also
|
|
--------
|
|
DataFrame.reset_index: Analogous function for DataFrame.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pd.Series([1, 2, 3, 4], name='foo',
|
|
... index=pd.Index(['a', 'b', 'c', 'd'], name='idx'))
|
|
|
|
Generate a DataFrame with default index.
|
|
|
|
>>> s.reset_index()
|
|
idx foo
|
|
0 a 1
|
|
1 b 2
|
|
2 c 3
|
|
3 d 4
|
|
|
|
To specify the name of the new column use `name`.
|
|
|
|
>>> s.reset_index(name='values')
|
|
idx values
|
|
0 a 1
|
|
1 b 2
|
|
2 c 3
|
|
3 d 4
|
|
|
|
To generate a new Series with the default set `drop` to True.
|
|
|
|
>>> s.reset_index(drop=True)
|
|
0 1
|
|
1 2
|
|
2 3
|
|
3 4
|
|
Name: foo, dtype: int64
|
|
|
|
To update the Series in place, without generating a new one
|
|
set `inplace` to True. Note that it also requires ``drop=True``.
|
|
|
|
>>> s.reset_index(inplace=True, drop=True)
|
|
>>> s
|
|
0 1
|
|
1 2
|
|
2 3
|
|
3 4
|
|
Name: foo, dtype: int64
|
|
|
|
The `level` parameter is interesting for Series with a multi-level
|
|
index.
|
|
|
|
>>> arrays = [np.array(['bar', 'bar', 'baz', 'baz']),
|
|
... np.array(['one', 'two', 'one', 'two'])]
|
|
>>> s2 = pd.Series(
|
|
... range(4), name='foo',
|
|
... index=pd.MultiIndex.from_arrays(arrays,
|
|
... names=['a', 'b']))
|
|
|
|
To remove a specific level from the Index, use `level`.
|
|
|
|
>>> s2.reset_index(level='a')
|
|
a foo
|
|
b
|
|
one bar 0
|
|
two bar 1
|
|
one baz 2
|
|
two baz 3
|
|
|
|
If `level` is not set, all levels are removed from the Index.
|
|
|
|
>>> s2.reset_index()
|
|
a b foo
|
|
0 bar one 0
|
|
1 bar two 1
|
|
2 baz one 2
|
|
3 baz two 3
|
|
"""
|
|
inplace = validate_bool_kwarg(inplace, "inplace")
|
|
if drop:
|
|
new_index = default_index(len(self))
|
|
if level is not None:
|
|
level_list: Sequence[Hashable]
|
|
if not isinstance(level, (tuple, list)):
|
|
level_list = [level]
|
|
else:
|
|
level_list = level
|
|
level_list = [self.index._get_level_number(lev) for lev in level_list]
|
|
if len(level_list) < self.index.nlevels:
|
|
new_index = self.index.droplevel(level_list)
|
|
|
|
if inplace:
|
|
self.index = new_index
|
|
else:
|
|
return self._constructor(
|
|
self._values.copy(), index=new_index
|
|
).__finalize__(self, method="reset_index")
|
|
elif inplace:
|
|
raise TypeError(
|
|
"Cannot reset_index inplace on a Series to create a DataFrame"
|
|
)
|
|
else:
|
|
if name is lib.no_default:
|
|
# For backwards compatibility, keep columns as [0] instead of
|
|
# [None] when self.name is None
|
|
if self.name is None:
|
|
name = 0
|
|
else:
|
|
name = self.name
|
|
|
|
df = self.to_frame(name)
|
|
return df.reset_index(
|
|
level=level, drop=drop, allow_duplicates=allow_duplicates
|
|
)
|
|
return None
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Rendering Methods
|
|
|
|
def __repr__(self) -> str:
|
|
"""
|
|
Return a string representation for a particular Series.
|
|
"""
|
|
repr_params = fmt.get_series_repr_params()
|
|
return self.to_string(**repr_params)
|
|
|
|
@overload
|
|
def to_string(
|
|
self,
|
|
buf: None = ...,
|
|
na_rep: str = ...,
|
|
float_format: str | None = ...,
|
|
header: bool = ...,
|
|
index: bool = ...,
|
|
length=...,
|
|
dtype=...,
|
|
name=...,
|
|
max_rows: int | None = ...,
|
|
min_rows: int | None = ...,
|
|
) -> str:
|
|
...
|
|
|
|
@overload
|
|
def to_string(
|
|
self,
|
|
buf: FilePath | WriteBuffer[str],
|
|
na_rep: str = ...,
|
|
float_format: str | None = ...,
|
|
header: bool = ...,
|
|
index: bool = ...,
|
|
length=...,
|
|
dtype=...,
|
|
name=...,
|
|
max_rows: int | None = ...,
|
|
min_rows: int | None = ...,
|
|
) -> None:
|
|
...
|
|
|
|
def to_string(
|
|
self,
|
|
buf: FilePath | WriteBuffer[str] | None = None,
|
|
na_rep: str = "NaN",
|
|
float_format: str | None = None,
|
|
header: bool = True,
|
|
index: bool = True,
|
|
length=False,
|
|
dtype=False,
|
|
name=False,
|
|
max_rows: int | None = None,
|
|
min_rows: int | None = None,
|
|
) -> str | None:
|
|
"""
|
|
Render a string representation of the Series.
|
|
|
|
Parameters
|
|
----------
|
|
buf : StringIO-like, optional
|
|
Buffer to write to.
|
|
na_rep : str, optional
|
|
String representation of NaN to use, default 'NaN'.
|
|
float_format : one-parameter function, optional
|
|
Formatter function to apply to columns' elements if they are
|
|
floats, default None.
|
|
header : bool, default True
|
|
Add the Series header (index name).
|
|
index : bool, optional
|
|
Add index (row) labels, default True.
|
|
length : bool, default False
|
|
Add the Series length.
|
|
dtype : bool, default False
|
|
Add the Series dtype.
|
|
name : bool, default False
|
|
Add the Series name if not None.
|
|
max_rows : int, optional
|
|
Maximum number of rows to show before truncating. If None, show
|
|
all.
|
|
min_rows : int, optional
|
|
The number of rows to display in a truncated repr (when number
|
|
of rows is above `max_rows`).
|
|
|
|
Returns
|
|
-------
|
|
str or None
|
|
String representation of Series if ``buf=None``, otherwise None.
|
|
"""
|
|
formatter = fmt.SeriesFormatter(
|
|
self,
|
|
name=name,
|
|
length=length,
|
|
header=header,
|
|
index=index,
|
|
dtype=dtype,
|
|
na_rep=na_rep,
|
|
float_format=float_format,
|
|
min_rows=min_rows,
|
|
max_rows=max_rows,
|
|
)
|
|
result = formatter.to_string()
|
|
|
|
# catch contract violations
|
|
if not isinstance(result, str):
|
|
raise AssertionError(
|
|
"result must be of type str, type "
|
|
f"of result is {repr(type(result).__name__)}"
|
|
)
|
|
|
|
if buf is None:
|
|
return result
|
|
else:
|
|
if hasattr(buf, "write"):
|
|
# error: Item "str" of "Union[str, PathLike[str], WriteBuffer
|
|
# [str]]" has no attribute "write"
|
|
buf.write(result) # type: ignore[union-attr]
|
|
else:
|
|
# error: Argument 1 to "open" has incompatible type "Union[str,
|
|
# PathLike[str], WriteBuffer[str]]"; expected "Union[Union[str,
|
|
# bytes, PathLike[str], PathLike[bytes]], int]"
|
|
with open(buf, "w") as f: # type: ignore[arg-type]
|
|
f.write(result)
|
|
return None
|
|
|
|
@doc(
|
|
klass=_shared_doc_kwargs["klass"],
|
|
storage_options=_shared_docs["storage_options"],
|
|
examples=dedent(
|
|
"""Examples
|
|
--------
|
|
>>> s = pd.Series(["elk", "pig", "dog", "quetzal"], name="animal")
|
|
>>> print(s.to_markdown())
|
|
| | animal |
|
|
|---:|:---------|
|
|
| 0 | elk |
|
|
| 1 | pig |
|
|
| 2 | dog |
|
|
| 3 | quetzal |
|
|
|
|
Output markdown with a tabulate option.
|
|
|
|
>>> print(s.to_markdown(tablefmt="grid"))
|
|
+----+----------+
|
|
| | animal |
|
|
+====+==========+
|
|
| 0 | elk |
|
|
+----+----------+
|
|
| 1 | pig |
|
|
+----+----------+
|
|
| 2 | dog |
|
|
+----+----------+
|
|
| 3 | quetzal |
|
|
+----+----------+"""
|
|
),
|
|
)
|
|
def to_markdown(
|
|
self,
|
|
buf: IO[str] | None = None,
|
|
mode: str = "wt",
|
|
index: bool = True,
|
|
storage_options: StorageOptions = None,
|
|
**kwargs,
|
|
) -> str | None:
|
|
"""
|
|
Print {klass} in Markdown-friendly format.
|
|
|
|
.. versionadded:: 1.0.0
|
|
|
|
Parameters
|
|
----------
|
|
buf : str, Path or StringIO-like, optional, default None
|
|
Buffer to write to. If None, the output is returned as a string.
|
|
mode : str, optional
|
|
Mode in which file is opened, "wt" by default.
|
|
index : bool, optional, default True
|
|
Add index (row) labels.
|
|
|
|
.. versionadded:: 1.1.0
|
|
{storage_options}
|
|
|
|
.. versionadded:: 1.2.0
|
|
|
|
**kwargs
|
|
These parameters will be passed to `tabulate \
|
|
<https://pypi.org/project/tabulate>`_.
|
|
|
|
Returns
|
|
-------
|
|
str
|
|
{klass} in Markdown-friendly format.
|
|
|
|
Notes
|
|
-----
|
|
Requires the `tabulate <https://pypi.org/project/tabulate>`_ package.
|
|
|
|
{examples}
|
|
"""
|
|
return self.to_frame().to_markdown(
|
|
buf, mode, index, storage_options=storage_options, **kwargs
|
|
)
|
|
|
|
# ----------------------------------------------------------------------
|
|
|
|
def items(self) -> Iterable[tuple[Hashable, Any]]:
|
|
"""
|
|
Lazily iterate over (index, value) tuples.
|
|
|
|
This method returns an iterable tuple (index, value). This is
|
|
convenient if you want to create a lazy iterator.
|
|
|
|
Returns
|
|
-------
|
|
iterable
|
|
Iterable of tuples containing the (index, value) pairs from a
|
|
Series.
|
|
|
|
See Also
|
|
--------
|
|
DataFrame.items : Iterate over (column name, Series) pairs.
|
|
DataFrame.iterrows : Iterate over DataFrame rows as (index, Series) pairs.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pd.Series(['A', 'B', 'C'])
|
|
>>> for index, value in s.items():
|
|
... print(f"Index : {index}, Value : {value}")
|
|
Index : 0, Value : A
|
|
Index : 1, Value : B
|
|
Index : 2, Value : C
|
|
"""
|
|
return zip(iter(self.index), iter(self))
|
|
|
|
def iteritems(self) -> Iterable[tuple[Hashable, Any]]:
|
|
"""
|
|
Lazily iterate over (index, value) tuples.
|
|
|
|
.. deprecated:: 1.5.0
|
|
iteritems is deprecated and will be removed in a future version.
|
|
Use .items instead.
|
|
|
|
This method returns an iterable tuple (index, value). This is
|
|
convenient if you want to create a lazy iterator.
|
|
|
|
Returns
|
|
-------
|
|
iterable
|
|
Iterable of tuples containing the (index, value) pairs from a
|
|
Series.
|
|
|
|
See Also
|
|
--------
|
|
Series.items : Recommended alternative.
|
|
DataFrame.items : Iterate over (column name, Series) pairs.
|
|
DataFrame.iterrows : Iterate over DataFrame rows as (index, Series) pairs.
|
|
"""
|
|
warnings.warn(
|
|
"iteritems is deprecated and will be removed in a future version. "
|
|
"Use .items instead.",
|
|
FutureWarning,
|
|
stacklevel=find_stack_level(),
|
|
)
|
|
return self.items()
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Misc public methods
|
|
|
|
def keys(self) -> Index:
|
|
"""
|
|
Return alias for index.
|
|
|
|
Returns
|
|
-------
|
|
Index
|
|
Index of the Series.
|
|
"""
|
|
return self.index
|
|
|
|
def to_dict(self, into: type[dict] = dict) -> dict:
|
|
"""
|
|
Convert Series to {label -> value} dict or dict-like object.
|
|
|
|
Parameters
|
|
----------
|
|
into : class, default dict
|
|
The collections.abc.Mapping subclass to use as the return
|
|
object. Can be the actual class or an empty
|
|
instance of the mapping type you want. If you want a
|
|
collections.defaultdict, you must pass it initialized.
|
|
|
|
Returns
|
|
-------
|
|
collections.abc.Mapping
|
|
Key-value representation of Series.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pd.Series([1, 2, 3, 4])
|
|
>>> s.to_dict()
|
|
{0: 1, 1: 2, 2: 3, 3: 4}
|
|
>>> from collections import OrderedDict, defaultdict
|
|
>>> s.to_dict(OrderedDict)
|
|
OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
|
|
>>> dd = defaultdict(list)
|
|
>>> s.to_dict(dd)
|
|
defaultdict(<class 'list'>, {0: 1, 1: 2, 2: 3, 3: 4})
|
|
"""
|
|
# GH16122
|
|
into_c = com.standardize_mapping(into)
|
|
return into_c((k, maybe_box_native(v)) for k, v in self.items())
|
|
|
|
def to_frame(self, name: Hashable = lib.no_default) -> DataFrame:
|
|
"""
|
|
Convert Series to DataFrame.
|
|
|
|
Parameters
|
|
----------
|
|
name : object, optional
|
|
The passed name should substitute for the series name (if it has
|
|
one).
|
|
|
|
Returns
|
|
-------
|
|
DataFrame
|
|
DataFrame representation of Series.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pd.Series(["a", "b", "c"],
|
|
... name="vals")
|
|
>>> s.to_frame()
|
|
vals
|
|
0 a
|
|
1 b
|
|
2 c
|
|
"""
|
|
if name is None:
|
|
warnings.warn(
|
|
"Explicitly passing `name=None` currently preserves the Series' name "
|
|
"or uses a default name of 0. This behaviour is deprecated, and in "
|
|
"the future `None` will be used as the name of the resulting "
|
|
"DataFrame column.",
|
|
FutureWarning,
|
|
stacklevel=find_stack_level(),
|
|
)
|
|
name = lib.no_default
|
|
|
|
columns: Index
|
|
if name is lib.no_default:
|
|
name = self.name
|
|
if name is None:
|
|
# default to [0], same as we would get with DataFrame(self)
|
|
columns = default_index(1)
|
|
else:
|
|
columns = Index([name])
|
|
else:
|
|
columns = Index([name])
|
|
|
|
mgr = self._mgr.to_2d_mgr(columns)
|
|
df = self._constructor_expanddim(mgr)
|
|
return df.__finalize__(self, method="to_frame")
|
|
|
|
def _set_name(self, name, inplace=False) -> Series:
|
|
"""
|
|
Set the Series name.
|
|
|
|
Parameters
|
|
----------
|
|
name : str
|
|
inplace : bool
|
|
Whether to modify `self` directly or return a copy.
|
|
"""
|
|
inplace = validate_bool_kwarg(inplace, "inplace")
|
|
ser = self if inplace else self.copy()
|
|
ser.name = name
|
|
return ser
|
|
|
|
@Appender(
|
|
"""
|
|
Examples
|
|
--------
|
|
>>> ser = pd.Series([390., 350., 30., 20.],
|
|
... index=['Falcon', 'Falcon', 'Parrot', 'Parrot'], name="Max Speed")
|
|
>>> ser
|
|
Falcon 390.0
|
|
Falcon 350.0
|
|
Parrot 30.0
|
|
Parrot 20.0
|
|
Name: Max Speed, dtype: float64
|
|
>>> ser.groupby(["a", "b", "a", "b"]).mean()
|
|
a 210.0
|
|
b 185.0
|
|
Name: Max Speed, dtype: float64
|
|
>>> ser.groupby(level=0).mean()
|
|
Falcon 370.0
|
|
Parrot 25.0
|
|
Name: Max Speed, dtype: float64
|
|
>>> ser.groupby(ser > 100).mean()
|
|
Max Speed
|
|
False 25.0
|
|
True 370.0
|
|
Name: Max Speed, dtype: float64
|
|
|
|
**Grouping by Indexes**
|
|
|
|
We can groupby different levels of a hierarchical index
|
|
using the `level` parameter:
|
|
|
|
>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
|
|
... ['Captive', 'Wild', 'Captive', 'Wild']]
|
|
>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
|
|
>>> ser = pd.Series([390., 350., 30., 20.], index=index, name="Max Speed")
|
|
>>> ser
|
|
Animal Type
|
|
Falcon Captive 390.0
|
|
Wild 350.0
|
|
Parrot Captive 30.0
|
|
Wild 20.0
|
|
Name: Max Speed, dtype: float64
|
|
>>> ser.groupby(level=0).mean()
|
|
Animal
|
|
Falcon 370.0
|
|
Parrot 25.0
|
|
Name: Max Speed, dtype: float64
|
|
>>> ser.groupby(level="Type").mean()
|
|
Type
|
|
Captive 210.0
|
|
Wild 185.0
|
|
Name: Max Speed, dtype: float64
|
|
|
|
We can also choose to include `NA` in group keys or not by defining
|
|
`dropna` parameter, the default setting is `True`.
|
|
|
|
>>> ser = pd.Series([1, 2, 3, 3], index=["a", 'a', 'b', np.nan])
|
|
>>> ser.groupby(level=0).sum()
|
|
a 3
|
|
b 3
|
|
dtype: int64
|
|
|
|
>>> ser.groupby(level=0, dropna=False).sum()
|
|
a 3
|
|
b 3
|
|
NaN 3
|
|
dtype: int64
|
|
|
|
>>> arrays = ['Falcon', 'Falcon', 'Parrot', 'Parrot']
|
|
>>> ser = pd.Series([390., 350., 30., 20.], index=arrays, name="Max Speed")
|
|
>>> ser.groupby(["a", "b", "a", np.nan]).mean()
|
|
a 210.0
|
|
b 350.0
|
|
Name: Max Speed, dtype: float64
|
|
|
|
>>> ser.groupby(["a", "b", "a", np.nan], dropna=False).mean()
|
|
a 210.0
|
|
b 350.0
|
|
NaN 20.0
|
|
Name: Max Speed, dtype: float64
|
|
"""
|
|
)
|
|
@Appender(_shared_docs["groupby"] % _shared_doc_kwargs)
|
|
def groupby(
|
|
self,
|
|
by=None,
|
|
axis: Axis = 0,
|
|
level: Level = None,
|
|
as_index: bool = True,
|
|
sort: bool = True,
|
|
group_keys: bool | lib.NoDefault = no_default,
|
|
squeeze: bool | lib.NoDefault = no_default,
|
|
observed: bool = False,
|
|
dropna: bool = True,
|
|
) -> SeriesGroupBy:
|
|
from pandas.core.groupby.generic import SeriesGroupBy
|
|
|
|
if squeeze is not no_default:
|
|
warnings.warn(
|
|
(
|
|
"The `squeeze` parameter is deprecated and "
|
|
"will be removed in a future version."
|
|
),
|
|
FutureWarning,
|
|
stacklevel=find_stack_level(),
|
|
)
|
|
else:
|
|
squeeze = False
|
|
|
|
if level is None and by is None:
|
|
raise TypeError("You have to supply one of 'by' and 'level'")
|
|
axis = self._get_axis_number(axis)
|
|
|
|
return SeriesGroupBy(
|
|
obj=self,
|
|
keys=by,
|
|
axis=axis,
|
|
level=level,
|
|
as_index=as_index,
|
|
sort=sort,
|
|
group_keys=group_keys,
|
|
squeeze=squeeze,
|
|
observed=observed,
|
|
dropna=dropna,
|
|
)
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Statistics, overridden ndarray methods
|
|
|
|
# TODO: integrate bottleneck
|
|
def count(self, level: Level = None):
|
|
"""
|
|
Return number of non-NA/null observations in the Series.
|
|
|
|
Parameters
|
|
----------
|
|
level : int or level name, default None
|
|
If the axis is a MultiIndex (hierarchical), count along a
|
|
particular level, collapsing into a smaller Series.
|
|
|
|
Returns
|
|
-------
|
|
int or Series (if level specified)
|
|
Number of non-null values in the Series.
|
|
|
|
See Also
|
|
--------
|
|
DataFrame.count : Count non-NA cells for each column or row.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pd.Series([0.0, 1.0, np.nan])
|
|
>>> s.count()
|
|
2
|
|
"""
|
|
if level is None:
|
|
return notna(self._values).sum().astype("int64")
|
|
else:
|
|
warnings.warn(
|
|
"Using the level keyword in DataFrame and Series aggregations is "
|
|
"deprecated and will be removed in a future version. Use groupby "
|
|
"instead. ser.count(level=1) should use ser.groupby(level=1).count().",
|
|
FutureWarning,
|
|
stacklevel=find_stack_level(),
|
|
)
|
|
if not isinstance(self.index, MultiIndex):
|
|
raise ValueError("Series.count level is only valid with a MultiIndex")
|
|
|
|
index = self.index
|
|
assert isinstance(index, MultiIndex) # for mypy
|
|
|
|
if isinstance(level, str):
|
|
level = index._get_level_number(level)
|
|
|
|
lev = index.levels[level]
|
|
level_codes = np.array(index.codes[level], subok=False, copy=True)
|
|
|
|
mask = level_codes == -1
|
|
if mask.any():
|
|
level_codes[mask] = cnt = len(lev)
|
|
lev = lev.insert(cnt, lev._na_value)
|
|
|
|
obs = level_codes[notna(self._values)]
|
|
# error: Argument "minlength" to "bincount" has incompatible type
|
|
# "Optional[int]"; expected "SupportsIndex"
|
|
out = np.bincount(obs, minlength=len(lev) or None) # type: ignore[arg-type]
|
|
return self._constructor(out, index=lev, dtype="int64").__finalize__(
|
|
self, method="count"
|
|
)
|
|
|
|
def mode(self, dropna: bool = True) -> Series:
|
|
"""
|
|
Return the mode(s) of the Series.
|
|
|
|
The mode is the value that appears most often. There can be multiple modes.
|
|
|
|
Always returns Series even if only one value is returned.
|
|
|
|
Parameters
|
|
----------
|
|
dropna : bool, default True
|
|
Don't consider counts of NaN/NaT.
|
|
|
|
Returns
|
|
-------
|
|
Series
|
|
Modes of the Series in sorted order.
|
|
"""
|
|
# TODO: Add option for bins like value_counts()
|
|
values = self._values
|
|
if isinstance(values, np.ndarray):
|
|
res_values = algorithms.mode(values, dropna=dropna)
|
|
else:
|
|
res_values = values._mode(dropna=dropna)
|
|
|
|
# Ensure index is type stable (should always use int index)
|
|
return self._constructor(
|
|
res_values, index=range(len(res_values)), name=self.name
|
|
)
|
|
|
|
def unique(self) -> ArrayLike:
|
|
"""
|
|
Return unique values of Series object.
|
|
|
|
Uniques are returned in order of appearance. Hash table-based unique,
|
|
therefore does NOT sort.
|
|
|
|
Returns
|
|
-------
|
|
ndarray or ExtensionArray
|
|
The unique values returned as a NumPy array. See Notes.
|
|
|
|
See Also
|
|
--------
|
|
Series.drop_duplicates : Return Series with duplicate values removed.
|
|
unique : Top-level unique method for any 1-d array-like object.
|
|
Index.unique : Return Index with unique values from an Index object.
|
|
|
|
Notes
|
|
-----
|
|
Returns the unique values as a NumPy array. In case of an
|
|
extension-array backed Series, a new
|
|
:class:`~api.extensions.ExtensionArray` of that type with just
|
|
the unique values is returned. This includes
|
|
|
|
* Categorical
|
|
* Period
|
|
* Datetime with Timezone
|
|
* Interval
|
|
* Sparse
|
|
* IntegerNA
|
|
|
|
See Examples section.
|
|
|
|
Examples
|
|
--------
|
|
>>> pd.Series([2, 1, 3, 3], name='A').unique()
|
|
array([2, 1, 3])
|
|
|
|
>>> pd.Series([pd.Timestamp('2016-01-01') for _ in range(3)]).unique()
|
|
array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]')
|
|
|
|
>>> pd.Series([pd.Timestamp('2016-01-01', tz='US/Eastern')
|
|
... for _ in range(3)]).unique()
|
|
<DatetimeArray>
|
|
['2016-01-01 00:00:00-05:00']
|
|
Length: 1, dtype: datetime64[ns, US/Eastern]
|
|
|
|
An Categorical will return categories in the order of
|
|
appearance and with the same dtype.
|
|
|
|
>>> pd.Series(pd.Categorical(list('baabc'))).unique()
|
|
['b', 'a', 'c']
|
|
Categories (3, object): ['a', 'b', 'c']
|
|
>>> pd.Series(pd.Categorical(list('baabc'), categories=list('abc'),
|
|
... ordered=True)).unique()
|
|
['b', 'a', 'c']
|
|
Categories (3, object): ['a' < 'b' < 'c']
|
|
"""
|
|
return super().unique()
|
|
|
|
@overload
|
|
def drop_duplicates(
|
|
self,
|
|
keep: Literal["first", "last", False] = ...,
|
|
*,
|
|
inplace: Literal[False] = ...,
|
|
) -> Series:
|
|
...
|
|
|
|
@overload
|
|
def drop_duplicates(
|
|
self, keep: Literal["first", "last", False] = ..., *, inplace: Literal[True]
|
|
) -> None:
|
|
...
|
|
|
|
@overload
|
|
def drop_duplicates(
|
|
self, keep: Literal["first", "last", False] = ..., *, inplace: bool = ...
|
|
) -> Series | None:
|
|
...
|
|
|
|
@deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
|
|
def drop_duplicates(
|
|
self, keep: Literal["first", "last", False] = "first", inplace=False
|
|
) -> Series | None:
|
|
"""
|
|
Return Series with duplicate values removed.
|
|
|
|
Parameters
|
|
----------
|
|
keep : {'first', 'last', ``False``}, default 'first'
|
|
Method to handle dropping duplicates:
|
|
|
|
- 'first' : Drop duplicates except for the first occurrence.
|
|
- 'last' : Drop duplicates except for the last occurrence.
|
|
- ``False`` : Drop all duplicates.
|
|
|
|
inplace : bool, default ``False``
|
|
If ``True``, performs operation inplace and returns None.
|
|
|
|
Returns
|
|
-------
|
|
Series or None
|
|
Series with duplicates dropped or None if ``inplace=True``.
|
|
|
|
See Also
|
|
--------
|
|
Index.drop_duplicates : Equivalent method on Index.
|
|
DataFrame.drop_duplicates : Equivalent method on DataFrame.
|
|
Series.duplicated : Related method on Series, indicating duplicate
|
|
Series values.
|
|
Series.unique : Return unique values as an array.
|
|
|
|
Examples
|
|
--------
|
|
Generate a Series with duplicated entries.
|
|
|
|
>>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'],
|
|
... name='animal')
|
|
>>> s
|
|
0 lama
|
|
1 cow
|
|
2 lama
|
|
3 beetle
|
|
4 lama
|
|
5 hippo
|
|
Name: animal, dtype: object
|
|
|
|
With the 'keep' parameter, the selection behaviour of duplicated values
|
|
can be changed. The value 'first' keeps the first occurrence for each
|
|
set of duplicated entries. The default value of keep is 'first'.
|
|
|
|
>>> s.drop_duplicates()
|
|
0 lama
|
|
1 cow
|
|
3 beetle
|
|
5 hippo
|
|
Name: animal, dtype: object
|
|
|
|
The value 'last' for parameter 'keep' keeps the last occurrence for
|
|
each set of duplicated entries.
|
|
|
|
>>> s.drop_duplicates(keep='last')
|
|
1 cow
|
|
3 beetle
|
|
4 lama
|
|
5 hippo
|
|
Name: animal, dtype: object
|
|
|
|
The value ``False`` for parameter 'keep' discards all sets of
|
|
duplicated entries. Setting the value of 'inplace' to ``True`` performs
|
|
the operation inplace and returns ``None``.
|
|
|
|
>>> s.drop_duplicates(keep=False, inplace=True)
|
|
>>> s
|
|
1 cow
|
|
3 beetle
|
|
5 hippo
|
|
Name: animal, dtype: object
|
|
"""
|
|
inplace = validate_bool_kwarg(inplace, "inplace")
|
|
result = super().drop_duplicates(keep=keep)
|
|
if inplace:
|
|
self._update_inplace(result)
|
|
return None
|
|
else:
|
|
return result
|
|
|
|
def duplicated(self, keep: Literal["first", "last", False] = "first") -> Series:
|
|
"""
|
|
Indicate duplicate Series values.
|
|
|
|
Duplicated values are indicated as ``True`` values in the resulting
|
|
Series. Either all duplicates, all except the first or all except the
|
|
last occurrence of duplicates can be indicated.
|
|
|
|
Parameters
|
|
----------
|
|
keep : {'first', 'last', False}, default 'first'
|
|
Method to handle dropping duplicates:
|
|
|
|
- 'first' : Mark duplicates as ``True`` except for the first
|
|
occurrence.
|
|
- 'last' : Mark duplicates as ``True`` except for the last
|
|
occurrence.
|
|
- ``False`` : Mark all duplicates as ``True``.
|
|
|
|
Returns
|
|
-------
|
|
Series[bool]
|
|
Series indicating whether each value has occurred in the
|
|
preceding values.
|
|
|
|
See Also
|
|
--------
|
|
Index.duplicated : Equivalent method on pandas.Index.
|
|
DataFrame.duplicated : Equivalent method on pandas.DataFrame.
|
|
Series.drop_duplicates : Remove duplicate values from Series.
|
|
|
|
Examples
|
|
--------
|
|
By default, for each set of duplicated values, the first occurrence is
|
|
set on False and all others on True:
|
|
|
|
>>> animals = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama'])
|
|
>>> animals.duplicated()
|
|
0 False
|
|
1 False
|
|
2 True
|
|
3 False
|
|
4 True
|
|
dtype: bool
|
|
|
|
which is equivalent to
|
|
|
|
>>> animals.duplicated(keep='first')
|
|
0 False
|
|
1 False
|
|
2 True
|
|
3 False
|
|
4 True
|
|
dtype: bool
|
|
|
|
By using 'last', the last occurrence of each set of duplicated values
|
|
is set on False and all others on True:
|
|
|
|
>>> animals.duplicated(keep='last')
|
|
0 True
|
|
1 False
|
|
2 True
|
|
3 False
|
|
4 False
|
|
dtype: bool
|
|
|
|
By setting keep on ``False``, all duplicates are True:
|
|
|
|
>>> animals.duplicated(keep=False)
|
|
0 True
|
|
1 False
|
|
2 True
|
|
3 False
|
|
4 True
|
|
dtype: bool
|
|
"""
|
|
res = self._duplicated(keep=keep)
|
|
result = self._constructor(res, index=self.index)
|
|
return result.__finalize__(self, method="duplicated")
|
|
|
|
def idxmin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashable:
|
|
"""
|
|
Return the row label of the minimum value.
|
|
|
|
If multiple values equal the minimum, the first row label with that
|
|
value is returned.
|
|
|
|
Parameters
|
|
----------
|
|
axis : {0 or 'index'}
|
|
Unused. Parameter needed for compatibility with DataFrame.
|
|
skipna : bool, default True
|
|
Exclude NA/null values. If the entire Series is NA, the result
|
|
will be NA.
|
|
*args, **kwargs
|
|
Additional arguments and keywords have no effect but might be
|
|
accepted for compatibility with NumPy.
|
|
|
|
Returns
|
|
-------
|
|
Index
|
|
Label of the minimum value.
|
|
|
|
Raises
|
|
------
|
|
ValueError
|
|
If the Series is empty.
|
|
|
|
See Also
|
|
--------
|
|
numpy.argmin : Return indices of the minimum values
|
|
along the given axis.
|
|
DataFrame.idxmin : Return index of first occurrence of minimum
|
|
over requested axis.
|
|
Series.idxmax : Return index *label* of the first occurrence
|
|
of maximum of values.
|
|
|
|
Notes
|
|
-----
|
|
This method is the Series version of ``ndarray.argmin``. This method
|
|
returns the label of the minimum, while ``ndarray.argmin`` returns
|
|
the position. To get the position, use ``series.values.argmin()``.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pd.Series(data=[1, None, 4, 1],
|
|
... index=['A', 'B', 'C', 'D'])
|
|
>>> s
|
|
A 1.0
|
|
B NaN
|
|
C 4.0
|
|
D 1.0
|
|
dtype: float64
|
|
|
|
>>> s.idxmin()
|
|
'A'
|
|
|
|
If `skipna` is False and there is an NA value in the data,
|
|
the function returns ``nan``.
|
|
|
|
>>> s.idxmin(skipna=False)
|
|
nan
|
|
"""
|
|
i = self.argmin(axis, skipna, *args, **kwargs)
|
|
if i == -1:
|
|
return np.nan
|
|
return self.index[i]
|
|
|
|
def idxmax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashable:
|
|
"""
|
|
Return the row label of the maximum value.
|
|
|
|
If multiple values equal the maximum, the first row label with that
|
|
value is returned.
|
|
|
|
Parameters
|
|
----------
|
|
axis : {0 or 'index'}
|
|
Unused. Parameter needed for compatibility with DataFrame.
|
|
skipna : bool, default True
|
|
Exclude NA/null values. If the entire Series is NA, the result
|
|
will be NA.
|
|
*args, **kwargs
|
|
Additional arguments and keywords have no effect but might be
|
|
accepted for compatibility with NumPy.
|
|
|
|
Returns
|
|
-------
|
|
Index
|
|
Label of the maximum value.
|
|
|
|
Raises
|
|
------
|
|
ValueError
|
|
If the Series is empty.
|
|
|
|
See Also
|
|
--------
|
|
numpy.argmax : Return indices of the maximum values
|
|
along the given axis.
|
|
DataFrame.idxmax : Return index of first occurrence of maximum
|
|
over requested axis.
|
|
Series.idxmin : Return index *label* of the first occurrence
|
|
of minimum of values.
|
|
|
|
Notes
|
|
-----
|
|
This method is the Series version of ``ndarray.argmax``. This method
|
|
returns the label of the maximum, while ``ndarray.argmax`` returns
|
|
the position. To get the position, use ``series.values.argmax()``.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pd.Series(data=[1, None, 4, 3, 4],
|
|
... index=['A', 'B', 'C', 'D', 'E'])
|
|
>>> s
|
|
A 1.0
|
|
B NaN
|
|
C 4.0
|
|
D 3.0
|
|
E 4.0
|
|
dtype: float64
|
|
|
|
>>> s.idxmax()
|
|
'C'
|
|
|
|
If `skipna` is False and there is an NA value in the data,
|
|
the function returns ``nan``.
|
|
|
|
>>> s.idxmax(skipna=False)
|
|
nan
|
|
"""
|
|
i = self.argmax(axis, skipna, *args, **kwargs)
|
|
if i == -1:
|
|
return np.nan
|
|
return self.index[i]
|
|
|
|
def round(self, decimals: int = 0, *args, **kwargs) -> Series:
|
|
"""
|
|
Round each value in a Series to the given number of decimals.
|
|
|
|
Parameters
|
|
----------
|
|
decimals : int, default 0
|
|
Number of decimal places to round to. If decimals is negative,
|
|
it specifies the number of positions to the left of the decimal point.
|
|
*args, **kwargs
|
|
Additional arguments and keywords have no effect but might be
|
|
accepted for compatibility with NumPy.
|
|
|
|
Returns
|
|
-------
|
|
Series
|
|
Rounded values of the Series.
|
|
|
|
See Also
|
|
--------
|
|
numpy.around : Round values of an np.array.
|
|
DataFrame.round : Round values of a DataFrame.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pd.Series([0.1, 1.3, 2.7])
|
|
>>> s.round()
|
|
0 0.0
|
|
1 1.0
|
|
2 3.0
|
|
dtype: float64
|
|
"""
|
|
nv.validate_round(args, kwargs)
|
|
result = self._values.round(decimals)
|
|
result = self._constructor(result, index=self.index).__finalize__(
|
|
self, method="round"
|
|
)
|
|
|
|
return result
|
|
|
|
@overload
|
|
def quantile(
|
|
self, q: float = ..., interpolation: QuantileInterpolation = ...
|
|
) -> float:
|
|
...
|
|
|
|
@overload
|
|
def quantile(
|
|
self,
|
|
q: Sequence[float] | AnyArrayLike,
|
|
interpolation: QuantileInterpolation = ...,
|
|
) -> Series:
|
|
...
|
|
|
|
@overload
|
|
def quantile(
|
|
self,
|
|
q: float | Sequence[float] | AnyArrayLike = ...,
|
|
interpolation: QuantileInterpolation = ...,
|
|
) -> float | Series:
|
|
...
|
|
|
|
def quantile(
|
|
self,
|
|
q: float | Sequence[float] | AnyArrayLike = 0.5,
|
|
interpolation: QuantileInterpolation = "linear",
|
|
) -> float | Series:
|
|
"""
|
|
Return value at the given quantile.
|
|
|
|
Parameters
|
|
----------
|
|
q : float or array-like, default 0.5 (50% quantile)
|
|
The quantile(s) to compute, which can lie in range: 0 <= q <= 1.
|
|
interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
|
|
This optional parameter specifies the interpolation method to use,
|
|
when the desired quantile lies between two data points `i` and `j`:
|
|
|
|
* linear: `i + (j - i) * fraction`, where `fraction` is the
|
|
fractional part of the index surrounded by `i` and `j`.
|
|
* lower: `i`.
|
|
* higher: `j`.
|
|
* nearest: `i` or `j` whichever is nearest.
|
|
* midpoint: (`i` + `j`) / 2.
|
|
|
|
Returns
|
|
-------
|
|
float or Series
|
|
If ``q`` is an array, a Series will be returned where the
|
|
index is ``q`` and the values are the quantiles, otherwise
|
|
a float will be returned.
|
|
|
|
See Also
|
|
--------
|
|
core.window.Rolling.quantile : Calculate the rolling quantile.
|
|
numpy.percentile : Returns the q-th percentile(s) of the array elements.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pd.Series([1, 2, 3, 4])
|
|
>>> s.quantile(.5)
|
|
2.5
|
|
>>> s.quantile([.25, .5, .75])
|
|
0.25 1.75
|
|
0.50 2.50
|
|
0.75 3.25
|
|
dtype: float64
|
|
"""
|
|
validate_percentile(q)
|
|
|
|
# We dispatch to DataFrame so that core.internals only has to worry
|
|
# about 2D cases.
|
|
df = self.to_frame()
|
|
|
|
result = df.quantile(q=q, interpolation=interpolation, numeric_only=False)
|
|
if result.ndim == 2:
|
|
result = result.iloc[:, 0]
|
|
|
|
if is_list_like(q):
|
|
result.name = self.name
|
|
return self._constructor(result, index=Float64Index(q), name=self.name)
|
|
else:
|
|
# scalar
|
|
return result.iloc[0]
|
|
|
|
def corr(
|
|
self,
|
|
other: Series,
|
|
method: Literal["pearson", "kendall", "spearman"]
|
|
| Callable[[np.ndarray, np.ndarray], float] = "pearson",
|
|
min_periods: int | None = None,
|
|
) -> float:
|
|
"""
|
|
Compute correlation with `other` Series, excluding missing values.
|
|
|
|
The two `Series` objects are not required to be the same length and will be
|
|
aligned internally before the correlation function is applied.
|
|
|
|
Parameters
|
|
----------
|
|
other : Series
|
|
Series with which to compute the correlation.
|
|
method : {'pearson', 'kendall', 'spearman'} or callable
|
|
Method used to compute correlation:
|
|
|
|
- pearson : Standard correlation coefficient
|
|
- kendall : Kendall Tau correlation coefficient
|
|
- spearman : Spearman rank correlation
|
|
- callable: Callable with input two 1d ndarrays and returning a float.
|
|
|
|
.. warning::
|
|
Note that the returned matrix from corr will have 1 along the
|
|
diagonals and will be symmetric regardless of the callable's
|
|
behavior.
|
|
min_periods : int, optional
|
|
Minimum number of observations needed to have a valid result.
|
|
|
|
Returns
|
|
-------
|
|
float
|
|
Correlation with other.
|
|
|
|
See Also
|
|
--------
|
|
DataFrame.corr : Compute pairwise correlation between columns.
|
|
DataFrame.corrwith : Compute pairwise correlation with another
|
|
DataFrame or Series.
|
|
|
|
Notes
|
|
-----
|
|
Pearson, Kendall and Spearman correlation are currently computed using pairwise complete observations.
|
|
|
|
* `Pearson correlation coefficient <https://en.wikipedia.org/wiki/Pearson_correlation_coefficient>`_
|
|
* `Kendall rank correlation coefficient <https://en.wikipedia.org/wiki/Kendall_rank_correlation_coefficient>`_
|
|
* `Spearman's rank correlation coefficient <https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient>`_
|
|
|
|
Examples
|
|
--------
|
|
>>> def histogram_intersection(a, b):
|
|
... v = np.minimum(a, b).sum().round(decimals=1)
|
|
... return v
|
|
>>> s1 = pd.Series([.2, .0, .6, .2])
|
|
>>> s2 = pd.Series([.3, .6, .0, .1])
|
|
>>> s1.corr(s2, method=histogram_intersection)
|
|
0.3
|
|
""" # noqa:E501
|
|
this, other = self.align(other, join="inner", copy=False)
|
|
if len(this) == 0:
|
|
return np.nan
|
|
|
|
if method in ["pearson", "spearman", "kendall"] or callable(method):
|
|
return nanops.nancorr(
|
|
this.values, other.values, method=method, min_periods=min_periods
|
|
)
|
|
|
|
raise ValueError(
|
|
"method must be either 'pearson', "
|
|
"'spearman', 'kendall', or a callable, "
|
|
f"'{method}' was supplied"
|
|
)
|
|
|
|
def cov(
|
|
self,
|
|
other: Series,
|
|
min_periods: int | None = None,
|
|
ddof: int | None = 1,
|
|
) -> float:
|
|
"""
|
|
Compute covariance with Series, excluding missing values.
|
|
|
|
The two `Series` objects are not required to be the same length and
|
|
will be aligned internally before the covariance is calculated.
|
|
|
|
Parameters
|
|
----------
|
|
other : Series
|
|
Series with which to compute the covariance.
|
|
min_periods : int, optional
|
|
Minimum number of observations needed to have a valid result.
|
|
ddof : int, default 1
|
|
Delta degrees of freedom. The divisor used in calculations
|
|
is ``N - ddof``, where ``N`` represents the number of elements.
|
|
|
|
.. versionadded:: 1.1.0
|
|
|
|
Returns
|
|
-------
|
|
float
|
|
Covariance between Series and other normalized by N-1
|
|
(unbiased estimator).
|
|
|
|
See Also
|
|
--------
|
|
DataFrame.cov : Compute pairwise covariance of columns.
|
|
|
|
Examples
|
|
--------
|
|
>>> s1 = pd.Series([0.90010907, 0.13484424, 0.62036035])
|
|
>>> s2 = pd.Series([0.12528585, 0.26962463, 0.51111198])
|
|
>>> s1.cov(s2)
|
|
-0.01685762652715874
|
|
"""
|
|
this, other = self.align(other, join="inner", copy=False)
|
|
if len(this) == 0:
|
|
return np.nan
|
|
return nanops.nancov(
|
|
this.values, other.values, min_periods=min_periods, ddof=ddof
|
|
)
|
|
|
|
@doc(
|
|
klass="Series",
|
|
extra_params="",
|
|
other_klass="DataFrame",
|
|
examples=dedent(
|
|
"""
|
|
Difference with previous row
|
|
|
|
>>> s = pd.Series([1, 1, 2, 3, 5, 8])
|
|
>>> s.diff()
|
|
0 NaN
|
|
1 0.0
|
|
2 1.0
|
|
3 1.0
|
|
4 2.0
|
|
5 3.0
|
|
dtype: float64
|
|
|
|
Difference with 3rd previous row
|
|
|
|
>>> s.diff(periods=3)
|
|
0 NaN
|
|
1 NaN
|
|
2 NaN
|
|
3 2.0
|
|
4 4.0
|
|
5 6.0
|
|
dtype: float64
|
|
|
|
Difference with following row
|
|
|
|
>>> s.diff(periods=-1)
|
|
0 0.0
|
|
1 -1.0
|
|
2 -1.0
|
|
3 -2.0
|
|
4 -3.0
|
|
5 NaN
|
|
dtype: float64
|
|
|
|
Overflow in input dtype
|
|
|
|
>>> s = pd.Series([1, 0], dtype=np.uint8)
|
|
>>> s.diff()
|
|
0 NaN
|
|
1 255.0
|
|
dtype: float64"""
|
|
),
|
|
)
|
|
def diff(self, periods: int = 1) -> Series:
|
|
"""
|
|
First discrete difference of element.
|
|
|
|
Calculates the difference of a {klass} element compared with another
|
|
element in the {klass} (default is element in previous row).
|
|
|
|
Parameters
|
|
----------
|
|
periods : int, default 1
|
|
Periods to shift for calculating difference, accepts negative
|
|
values.
|
|
{extra_params}
|
|
Returns
|
|
-------
|
|
{klass}
|
|
First differences of the Series.
|
|
|
|
See Also
|
|
--------
|
|
{klass}.pct_change: Percent change over given number of periods.
|
|
{klass}.shift: Shift index by desired number of periods with an
|
|
optional time freq.
|
|
{other_klass}.diff: First discrete difference of object.
|
|
|
|
Notes
|
|
-----
|
|
For boolean dtypes, this uses :meth:`operator.xor` rather than
|
|
:meth:`operator.sub`.
|
|
The result is calculated according to current dtype in {klass},
|
|
however dtype of the result is always float64.
|
|
|
|
Examples
|
|
--------
|
|
{examples}
|
|
"""
|
|
result = algorithms.diff(self._values, periods)
|
|
return self._constructor(result, index=self.index).__finalize__(
|
|
self, method="diff"
|
|
)
|
|
|
|
def autocorr(self, lag: int = 1) -> float:
|
|
"""
|
|
Compute the lag-N autocorrelation.
|
|
|
|
This method computes the Pearson correlation between
|
|
the Series and its shifted self.
|
|
|
|
Parameters
|
|
----------
|
|
lag : int, default 1
|
|
Number of lags to apply before performing autocorrelation.
|
|
|
|
Returns
|
|
-------
|
|
float
|
|
The Pearson correlation between self and self.shift(lag).
|
|
|
|
See Also
|
|
--------
|
|
Series.corr : Compute the correlation between two Series.
|
|
Series.shift : Shift index by desired number of periods.
|
|
DataFrame.corr : Compute pairwise correlation of columns.
|
|
DataFrame.corrwith : Compute pairwise correlation between rows or
|
|
columns of two DataFrame objects.
|
|
|
|
Notes
|
|
-----
|
|
If the Pearson correlation is not well defined return 'NaN'.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pd.Series([0.25, 0.5, 0.2, -0.05])
|
|
>>> s.autocorr() # doctest: +ELLIPSIS
|
|
0.10355...
|
|
>>> s.autocorr(lag=2) # doctest: +ELLIPSIS
|
|
-0.99999...
|
|
|
|
If the Pearson correlation is not well defined, then 'NaN' is returned.
|
|
|
|
>>> s = pd.Series([1, 0, 0, 0])
|
|
>>> s.autocorr()
|
|
nan
|
|
"""
|
|
return self.corr(self.shift(lag))
|
|
|
|
def dot(self, other: AnyArrayLike) -> Series | np.ndarray:
|
|
"""
|
|
Compute the dot product between the Series and the columns of other.
|
|
|
|
This method computes the dot product between the Series and another
|
|
one, or the Series and each columns of a DataFrame, or the Series and
|
|
each columns of an array.
|
|
|
|
It can also be called using `self @ other` in Python >= 3.5.
|
|
|
|
Parameters
|
|
----------
|
|
other : Series, DataFrame or array-like
|
|
The other object to compute the dot product with its columns.
|
|
|
|
Returns
|
|
-------
|
|
scalar, Series or numpy.ndarray
|
|
Return the dot product of the Series and other if other is a
|
|
Series, the Series of the dot product of Series and each rows of
|
|
other if other is a DataFrame or a numpy.ndarray between the Series
|
|
and each columns of the numpy array.
|
|
|
|
See Also
|
|
--------
|
|
DataFrame.dot: Compute the matrix product with the DataFrame.
|
|
Series.mul: Multiplication of series and other, element-wise.
|
|
|
|
Notes
|
|
-----
|
|
The Series and other has to share the same index if other is a Series
|
|
or a DataFrame.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pd.Series([0, 1, 2, 3])
|
|
>>> other = pd.Series([-1, 2, -3, 4])
|
|
>>> s.dot(other)
|
|
8
|
|
>>> s @ other
|
|
8
|
|
>>> df = pd.DataFrame([[0, 1], [-2, 3], [4, -5], [6, 7]])
|
|
>>> s.dot(df)
|
|
0 24
|
|
1 14
|
|
dtype: int64
|
|
>>> arr = np.array([[0, 1], [-2, 3], [4, -5], [6, 7]])
|
|
>>> s.dot(arr)
|
|
array([24, 14])
|
|
"""
|
|
if isinstance(other, (Series, ABCDataFrame)):
|
|
common = self.index.union(other.index)
|
|
if len(common) > len(self.index) or len(common) > len(other.index):
|
|
raise ValueError("matrices are not aligned")
|
|
|
|
left = self.reindex(index=common, copy=False)
|
|
right = other.reindex(index=common, copy=False)
|
|
lvals = left.values
|
|
rvals = right.values
|
|
else:
|
|
lvals = self.values
|
|
rvals = np.asarray(other)
|
|
if lvals.shape[0] != rvals.shape[0]:
|
|
raise Exception(
|
|
f"Dot product shape mismatch, {lvals.shape} vs {rvals.shape}"
|
|
)
|
|
|
|
if isinstance(other, ABCDataFrame):
|
|
return self._constructor(
|
|
np.dot(lvals, rvals), index=other.columns
|
|
).__finalize__(self, method="dot")
|
|
elif isinstance(other, Series):
|
|
return np.dot(lvals, rvals)
|
|
elif isinstance(rvals, np.ndarray):
|
|
return np.dot(lvals, rvals)
|
|
else: # pragma: no cover
|
|
raise TypeError(f"unsupported type: {type(other)}")
|
|
|
|
def __matmul__(self, other):
|
|
"""
|
|
Matrix multiplication using binary `@` operator in Python>=3.5.
|
|
"""
|
|
return self.dot(other)
|
|
|
|
def __rmatmul__(self, other):
|
|
"""
|
|
Matrix multiplication using binary `@` operator in Python>=3.5.
|
|
"""
|
|
return self.dot(np.transpose(other))
|
|
|
|
@doc(base.IndexOpsMixin.searchsorted, klass="Series")
|
|
# Signature of "searchsorted" incompatible with supertype "IndexOpsMixin"
|
|
def searchsorted( # type: ignore[override]
|
|
self,
|
|
value: NumpyValueArrayLike | ExtensionArray,
|
|
side: Literal["left", "right"] = "left",
|
|
sorter: NumpySorter = None,
|
|
) -> npt.NDArray[np.intp] | np.intp:
|
|
return base.IndexOpsMixin.searchsorted(self, value, side=side, sorter=sorter)
|
|
|
|
# -------------------------------------------------------------------
|
|
# Combination
|
|
|
|
def append(
|
|
self, to_append, ignore_index: bool = False, verify_integrity: bool = False
|
|
) -> Series:
|
|
"""
|
|
Concatenate two or more Series.
|
|
|
|
.. deprecated:: 1.4.0
|
|
Use :func:`concat` instead. For further details see
|
|
:ref:`whatsnew_140.deprecations.frame_series_append`
|
|
|
|
Parameters
|
|
----------
|
|
to_append : Series or list/tuple of Series
|
|
Series to append with self.
|
|
ignore_index : bool, default False
|
|
If True, the resulting axis will be labeled 0, 1, …, n - 1.
|
|
verify_integrity : bool, default False
|
|
If True, raise Exception on creating index with duplicates.
|
|
|
|
Returns
|
|
-------
|
|
Series
|
|
Concatenated Series.
|
|
|
|
See Also
|
|
--------
|
|
concat : General function to concatenate DataFrame or Series objects.
|
|
|
|
Notes
|
|
-----
|
|
Iteratively appending to a Series can be more computationally intensive
|
|
than a single concatenate. A better solution is to append values to a
|
|
list and then concatenate the list with the original Series all at
|
|
once.
|
|
|
|
Examples
|
|
--------
|
|
>>> s1 = pd.Series([1, 2, 3])
|
|
>>> s2 = pd.Series([4, 5, 6])
|
|
>>> s3 = pd.Series([4, 5, 6], index=[3, 4, 5])
|
|
>>> s1.append(s2)
|
|
0 1
|
|
1 2
|
|
2 3
|
|
0 4
|
|
1 5
|
|
2 6
|
|
dtype: int64
|
|
|
|
>>> s1.append(s3)
|
|
0 1
|
|
1 2
|
|
2 3
|
|
3 4
|
|
4 5
|
|
5 6
|
|
dtype: int64
|
|
|
|
With `ignore_index` set to True:
|
|
|
|
>>> s1.append(s2, ignore_index=True)
|
|
0 1
|
|
1 2
|
|
2 3
|
|
3 4
|
|
4 5
|
|
5 6
|
|
dtype: int64
|
|
|
|
With `verify_integrity` set to True:
|
|
|
|
>>> s1.append(s2, verify_integrity=True)
|
|
Traceback (most recent call last):
|
|
...
|
|
ValueError: Indexes have overlapping values: [0, 1, 2]
|
|
"""
|
|
warnings.warn(
|
|
"The series.append method is deprecated "
|
|
"and will be removed from pandas in a future version. "
|
|
"Use pandas.concat instead.",
|
|
FutureWarning,
|
|
stacklevel=find_stack_level(),
|
|
)
|
|
|
|
return self._append(to_append, ignore_index, verify_integrity)
|
|
|
|
def _append(
|
|
self, to_append, ignore_index: bool = False, verify_integrity: bool = False
|
|
):
|
|
from pandas.core.reshape.concat import concat
|
|
|
|
if isinstance(to_append, (list, tuple)):
|
|
to_concat = [self]
|
|
to_concat.extend(to_append)
|
|
else:
|
|
to_concat = [self, to_append]
|
|
if any(isinstance(x, (ABCDataFrame,)) for x in to_concat[1:]):
|
|
msg = "to_append should be a Series or list/tuple of Series, got DataFrame"
|
|
raise TypeError(msg)
|
|
return concat(
|
|
to_concat, ignore_index=ignore_index, verify_integrity=verify_integrity
|
|
)
|
|
|
|
def _binop(self, other: Series, func, level=None, fill_value=None):
|
|
"""
|
|
Perform generic binary operation with optional fill value.
|
|
|
|
Parameters
|
|
----------
|
|
other : Series
|
|
func : binary operator
|
|
fill_value : float or object
|
|
Value to substitute for NA/null values. If both Series are NA in a
|
|
location, the result will be NA regardless of the passed fill value.
|
|
level : int or level name, default None
|
|
Broadcast across a level, matching Index values on the
|
|
passed MultiIndex level.
|
|
|
|
Returns
|
|
-------
|
|
Series
|
|
"""
|
|
if not isinstance(other, Series):
|
|
raise AssertionError("Other operand must be Series")
|
|
|
|
this = self
|
|
|
|
if not self.index.equals(other.index):
|
|
this, other = self.align(other, level=level, join="outer", copy=False)
|
|
|
|
this_vals, other_vals = ops.fill_binop(this._values, other._values, fill_value)
|
|
|
|
with np.errstate(all="ignore"):
|
|
result = func(this_vals, other_vals)
|
|
|
|
name = ops.get_op_result_name(self, other)
|
|
return this._construct_result(result, name)
|
|
|
|
def _construct_result(
|
|
self, result: ArrayLike | tuple[ArrayLike, ArrayLike], name: Hashable
|
|
) -> Series | tuple[Series, Series]:
|
|
"""
|
|
Construct an appropriately-labelled Series from the result of an op.
|
|
|
|
Parameters
|
|
----------
|
|
result : ndarray or ExtensionArray
|
|
name : Label
|
|
|
|
Returns
|
|
-------
|
|
Series
|
|
In the case of __divmod__ or __rdivmod__, a 2-tuple of Series.
|
|
"""
|
|
if isinstance(result, tuple):
|
|
# produced by divmod or rdivmod
|
|
|
|
res1 = self._construct_result(result[0], name=name)
|
|
res2 = self._construct_result(result[1], name=name)
|
|
|
|
# GH#33427 assertions to keep mypy happy
|
|
assert isinstance(res1, Series)
|
|
assert isinstance(res2, Series)
|
|
return (res1, res2)
|
|
|
|
# We do not pass dtype to ensure that the Series constructor
|
|
# does inference in the case where `result` has object-dtype.
|
|
out = self._constructor(result, index=self.index)
|
|
out = out.__finalize__(self)
|
|
|
|
# Set the result's name after __finalize__ is called because __finalize__
|
|
# would set it back to self.name
|
|
out.name = name
|
|
return out
|
|
|
|
@doc(
|
|
_shared_docs["compare"],
|
|
"""
|
|
Returns
|
|
-------
|
|
Series or DataFrame
|
|
If axis is 0 or 'index' the result will be a Series.
|
|
The resulting index will be a MultiIndex with 'self' and 'other'
|
|
stacked alternately at the inner level.
|
|
|
|
If axis is 1 or 'columns' the result will be a DataFrame.
|
|
It will have two columns namely 'self' and 'other'.
|
|
|
|
See Also
|
|
--------
|
|
DataFrame.compare : Compare with another DataFrame and show differences.
|
|
|
|
Notes
|
|
-----
|
|
Matching NaNs will not appear as a difference.
|
|
|
|
Examples
|
|
--------
|
|
>>> s1 = pd.Series(["a", "b", "c", "d", "e"])
|
|
>>> s2 = pd.Series(["a", "a", "c", "b", "e"])
|
|
|
|
Align the differences on columns
|
|
|
|
>>> s1.compare(s2)
|
|
self other
|
|
1 b a
|
|
3 d b
|
|
|
|
Stack the differences on indices
|
|
|
|
>>> s1.compare(s2, align_axis=0)
|
|
1 self b
|
|
other a
|
|
3 self d
|
|
other b
|
|
dtype: object
|
|
|
|
Keep all original rows
|
|
|
|
>>> s1.compare(s2, keep_shape=True)
|
|
self other
|
|
0 NaN NaN
|
|
1 b a
|
|
2 NaN NaN
|
|
3 d b
|
|
4 NaN NaN
|
|
|
|
Keep all original rows and also all original values
|
|
|
|
>>> s1.compare(s2, keep_shape=True, keep_equal=True)
|
|
self other
|
|
0 a a
|
|
1 b a
|
|
2 c c
|
|
3 d b
|
|
4 e e
|
|
""",
|
|
klass=_shared_doc_kwargs["klass"],
|
|
)
|
|
def compare(
|
|
self,
|
|
other: Series,
|
|
align_axis: Axis = 1,
|
|
keep_shape: bool = False,
|
|
keep_equal: bool = False,
|
|
result_names: Suffixes = ("self", "other"),
|
|
) -> DataFrame | Series:
|
|
return super().compare(
|
|
other=other,
|
|
align_axis=align_axis,
|
|
keep_shape=keep_shape,
|
|
keep_equal=keep_equal,
|
|
result_names=result_names,
|
|
)
|
|
|
|
def combine(
|
|
self,
|
|
other: Series | Hashable,
|
|
func: Callable[[Hashable, Hashable], Hashable],
|
|
fill_value: Hashable = None,
|
|
) -> Series:
|
|
"""
|
|
Combine the Series with a Series or scalar according to `func`.
|
|
|
|
Combine the Series and `other` using `func` to perform elementwise
|
|
selection for combined Series.
|
|
`fill_value` is assumed when value is missing at some index
|
|
from one of the two objects being combined.
|
|
|
|
Parameters
|
|
----------
|
|
other : Series or scalar
|
|
The value(s) to be combined with the `Series`.
|
|
func : function
|
|
Function that takes two scalars as inputs and returns an element.
|
|
fill_value : scalar, optional
|
|
The value to assume when an index is missing from
|
|
one Series or the other. The default specifies to use the
|
|
appropriate NaN value for the underlying dtype of the Series.
|
|
|
|
Returns
|
|
-------
|
|
Series
|
|
The result of combining the Series with the other object.
|
|
|
|
See Also
|
|
--------
|
|
Series.combine_first : Combine Series values, choosing the calling
|
|
Series' values first.
|
|
|
|
Examples
|
|
--------
|
|
Consider 2 Datasets ``s1`` and ``s2`` containing
|
|
highest clocked speeds of different birds.
|
|
|
|
>>> s1 = pd.Series({'falcon': 330.0, 'eagle': 160.0})
|
|
>>> s1
|
|
falcon 330.0
|
|
eagle 160.0
|
|
dtype: float64
|
|
>>> s2 = pd.Series({'falcon': 345.0, 'eagle': 200.0, 'duck': 30.0})
|
|
>>> s2
|
|
falcon 345.0
|
|
eagle 200.0
|
|
duck 30.0
|
|
dtype: float64
|
|
|
|
Now, to combine the two datasets and view the highest speeds
|
|
of the birds across the two datasets
|
|
|
|
>>> s1.combine(s2, max)
|
|
duck NaN
|
|
eagle 200.0
|
|
falcon 345.0
|
|
dtype: float64
|
|
|
|
In the previous example, the resulting value for duck is missing,
|
|
because the maximum of a NaN and a float is a NaN.
|
|
So, in the example, we set ``fill_value=0``,
|
|
so the maximum value returned will be the value from some dataset.
|
|
|
|
>>> s1.combine(s2, max, fill_value=0)
|
|
duck 30.0
|
|
eagle 200.0
|
|
falcon 345.0
|
|
dtype: float64
|
|
"""
|
|
if fill_value is None:
|
|
fill_value = na_value_for_dtype(self.dtype, compat=False)
|
|
|
|
if isinstance(other, Series):
|
|
# If other is a Series, result is based on union of Series,
|
|
# so do this element by element
|
|
new_index = self.index.union(other.index)
|
|
new_name = ops.get_op_result_name(self, other)
|
|
new_values = np.empty(len(new_index), dtype=object)
|
|
for i, idx in enumerate(new_index):
|
|
lv = self.get(idx, fill_value)
|
|
rv = other.get(idx, fill_value)
|
|
with np.errstate(all="ignore"):
|
|
new_values[i] = func(lv, rv)
|
|
else:
|
|
# Assume that other is a scalar, so apply the function for
|
|
# each element in the Series
|
|
new_index = self.index
|
|
new_values = np.empty(len(new_index), dtype=object)
|
|
with np.errstate(all="ignore"):
|
|
new_values[:] = [func(lv, other) for lv in self._values]
|
|
new_name = self.name
|
|
|
|
# try_float=False is to match agg_series
|
|
npvalues = lib.maybe_convert_objects(new_values, try_float=False)
|
|
res_values = maybe_cast_pointwise_result(npvalues, self.dtype, same_dtype=False)
|
|
return self._constructor(res_values, index=new_index, name=new_name)
|
|
|
|
def combine_first(self, other) -> Series:
|
|
"""
|
|
Update null elements with value in the same location in 'other'.
|
|
|
|
Combine two Series objects by filling null values in one Series with
|
|
non-null values from the other Series. Result index will be the union
|
|
of the two indexes.
|
|
|
|
Parameters
|
|
----------
|
|
other : Series
|
|
The value(s) to be used for filling null values.
|
|
|
|
Returns
|
|
-------
|
|
Series
|
|
The result of combining the provided Series with the other object.
|
|
|
|
See Also
|
|
--------
|
|
Series.combine : Perform element-wise operation on two Series
|
|
using a given function.
|
|
|
|
Examples
|
|
--------
|
|
>>> s1 = pd.Series([1, np.nan])
|
|
>>> s2 = pd.Series([3, 4, 5])
|
|
>>> s1.combine_first(s2)
|
|
0 1.0
|
|
1 4.0
|
|
2 5.0
|
|
dtype: float64
|
|
|
|
Null values still persist if the location of that null value
|
|
does not exist in `other`
|
|
|
|
>>> s1 = pd.Series({'falcon': np.nan, 'eagle': 160.0})
|
|
>>> s2 = pd.Series({'eagle': 200.0, 'duck': 30.0})
|
|
>>> s1.combine_first(s2)
|
|
duck 30.0
|
|
eagle 160.0
|
|
falcon NaN
|
|
dtype: float64
|
|
"""
|
|
new_index = self.index.union(other.index)
|
|
this = self.reindex(new_index, copy=False)
|
|
other = other.reindex(new_index, copy=False)
|
|
if this.dtype.kind == "M" and other.dtype.kind != "M":
|
|
other = to_datetime(other)
|
|
|
|
return this.where(notna(this), other)
|
|
|
|
def update(self, other: Series | Sequence | Mapping) -> None:
|
|
"""
|
|
Modify Series in place using values from passed Series.
|
|
|
|
Uses non-NA values from passed Series to make updates. Aligns
|
|
on index.
|
|
|
|
Parameters
|
|
----------
|
|
other : Series, or object coercible into Series
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pd.Series([1, 2, 3])
|
|
>>> s.update(pd.Series([4, 5, 6]))
|
|
>>> s
|
|
0 4
|
|
1 5
|
|
2 6
|
|
dtype: int64
|
|
|
|
>>> s = pd.Series(['a', 'b', 'c'])
|
|
>>> s.update(pd.Series(['d', 'e'], index=[0, 2]))
|
|
>>> s
|
|
0 d
|
|
1 b
|
|
2 e
|
|
dtype: object
|
|
|
|
>>> s = pd.Series([1, 2, 3])
|
|
>>> s.update(pd.Series([4, 5, 6, 7, 8]))
|
|
>>> s
|
|
0 4
|
|
1 5
|
|
2 6
|
|
dtype: int64
|
|
|
|
If ``other`` contains NaNs the corresponding values are not updated
|
|
in the original Series.
|
|
|
|
>>> s = pd.Series([1, 2, 3])
|
|
>>> s.update(pd.Series([4, np.nan, 6]))
|
|
>>> s
|
|
0 4
|
|
1 2
|
|
2 6
|
|
dtype: int64
|
|
|
|
``other`` can also be a non-Series object type
|
|
that is coercible into a Series
|
|
|
|
>>> s = pd.Series([1, 2, 3])
|
|
>>> s.update([4, np.nan, 6])
|
|
>>> s
|
|
0 4
|
|
1 2
|
|
2 6
|
|
dtype: int64
|
|
|
|
>>> s = pd.Series([1, 2, 3])
|
|
>>> s.update({1: 9})
|
|
>>> s
|
|
0 1
|
|
1 9
|
|
2 3
|
|
dtype: int64
|
|
"""
|
|
|
|
if not isinstance(other, Series):
|
|
other = Series(other)
|
|
|
|
other = other.reindex_like(self)
|
|
mask = notna(other)
|
|
|
|
self._mgr = self._mgr.putmask(mask=mask, new=other)
|
|
self._maybe_update_cacher()
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Reindexing, sorting
|
|
|
|
# error: Signature of "sort_values" incompatible with supertype "NDFrame"
|
|
@overload # type: ignore[override]
|
|
def sort_values(
|
|
self,
|
|
*,
|
|
axis: Axis = ...,
|
|
ascending: bool | int | Sequence[bool] | Sequence[int] = ...,
|
|
inplace: Literal[False] = ...,
|
|
kind: str = ...,
|
|
na_position: str = ...,
|
|
ignore_index: bool = ...,
|
|
key: ValueKeyFunc = ...,
|
|
) -> Series:
|
|
...
|
|
|
|
@overload
|
|
def sort_values(
|
|
self,
|
|
*,
|
|
axis: Axis = ...,
|
|
ascending: bool | int | Sequence[bool] | Sequence[int] = ...,
|
|
inplace: Literal[True],
|
|
kind: str = ...,
|
|
na_position: str = ...,
|
|
ignore_index: bool = ...,
|
|
key: ValueKeyFunc = ...,
|
|
) -> None:
|
|
...
|
|
|
|
# error: Signature of "sort_values" incompatible with supertype "NDFrame"
|
|
@deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
|
|
def sort_values( # type: ignore[override]
|
|
self,
|
|
axis: Axis = 0,
|
|
ascending: bool | int | Sequence[bool] | Sequence[int] = True,
|
|
inplace: bool = False,
|
|
kind: str = "quicksort",
|
|
na_position: str = "last",
|
|
ignore_index: bool = False,
|
|
key: ValueKeyFunc = None,
|
|
) -> Series | None:
|
|
"""
|
|
Sort by the values.
|
|
|
|
Sort a Series in ascending or descending order by some
|
|
criterion.
|
|
|
|
Parameters
|
|
----------
|
|
axis : {0 or 'index'}
|
|
Unused. Parameter needed for compatibility with DataFrame.
|
|
ascending : bool or list of bools, default True
|
|
If True, sort values in ascending order, otherwise descending.
|
|
inplace : bool, default False
|
|
If True, perform operation in-place.
|
|
kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort'
|
|
Choice of sorting algorithm. See also :func:`numpy.sort` for more
|
|
information. 'mergesort' and 'stable' are the only stable algorithms.
|
|
na_position : {'first' or 'last'}, default 'last'
|
|
Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
|
|
the end.
|
|
ignore_index : bool, default False
|
|
If True, the resulting axis will be labeled 0, 1, …, n - 1.
|
|
|
|
.. versionadded:: 1.0.0
|
|
|
|
key : callable, optional
|
|
If not None, apply the key function to the series values
|
|
before sorting. This is similar to the `key` argument in the
|
|
builtin :meth:`sorted` function, with the notable difference that
|
|
this `key` function should be *vectorized*. It should expect a
|
|
``Series`` and return an array-like.
|
|
|
|
.. versionadded:: 1.1.0
|
|
|
|
Returns
|
|
-------
|
|
Series or None
|
|
Series ordered by values or None if ``inplace=True``.
|
|
|
|
See Also
|
|
--------
|
|
Series.sort_index : Sort by the Series indices.
|
|
DataFrame.sort_values : Sort DataFrame by the values along either axis.
|
|
DataFrame.sort_index : Sort DataFrame by indices.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pd.Series([np.nan, 1, 3, 10, 5])
|
|
>>> s
|
|
0 NaN
|
|
1 1.0
|
|
2 3.0
|
|
3 10.0
|
|
4 5.0
|
|
dtype: float64
|
|
|
|
Sort values ascending order (default behaviour)
|
|
|
|
>>> s.sort_values(ascending=True)
|
|
1 1.0
|
|
2 3.0
|
|
4 5.0
|
|
3 10.0
|
|
0 NaN
|
|
dtype: float64
|
|
|
|
Sort values descending order
|
|
|
|
>>> s.sort_values(ascending=False)
|
|
3 10.0
|
|
4 5.0
|
|
2 3.0
|
|
1 1.0
|
|
0 NaN
|
|
dtype: float64
|
|
|
|
Sort values inplace
|
|
|
|
>>> s.sort_values(ascending=False, inplace=True)
|
|
>>> s
|
|
3 10.0
|
|
4 5.0
|
|
2 3.0
|
|
1 1.0
|
|
0 NaN
|
|
dtype: float64
|
|
|
|
Sort values putting NAs first
|
|
|
|
>>> s.sort_values(na_position='first')
|
|
0 NaN
|
|
1 1.0
|
|
2 3.0
|
|
4 5.0
|
|
3 10.0
|
|
dtype: float64
|
|
|
|
Sort a series of strings
|
|
|
|
>>> s = pd.Series(['z', 'b', 'd', 'a', 'c'])
|
|
>>> s
|
|
0 z
|
|
1 b
|
|
2 d
|
|
3 a
|
|
4 c
|
|
dtype: object
|
|
|
|
>>> s.sort_values()
|
|
3 a
|
|
1 b
|
|
4 c
|
|
2 d
|
|
0 z
|
|
dtype: object
|
|
|
|
Sort using a key function. Your `key` function will be
|
|
given the ``Series`` of values and should return an array-like.
|
|
|
|
>>> s = pd.Series(['a', 'B', 'c', 'D', 'e'])
|
|
>>> s.sort_values()
|
|
1 B
|
|
3 D
|
|
0 a
|
|
2 c
|
|
4 e
|
|
dtype: object
|
|
>>> s.sort_values(key=lambda x: x.str.lower())
|
|
0 a
|
|
1 B
|
|
2 c
|
|
3 D
|
|
4 e
|
|
dtype: object
|
|
|
|
NumPy ufuncs work well here. For example, we can
|
|
sort by the ``sin`` of the value
|
|
|
|
>>> s = pd.Series([-4, -2, 0, 2, 4])
|
|
>>> s.sort_values(key=np.sin)
|
|
1 -2
|
|
4 4
|
|
2 0
|
|
0 -4
|
|
3 2
|
|
dtype: int64
|
|
|
|
More complicated user-defined functions can be used,
|
|
as long as they expect a Series and return an array-like
|
|
|
|
>>> s.sort_values(key=lambda x: (np.tan(x.cumsum())))
|
|
0 -4
|
|
3 2
|
|
4 4
|
|
1 -2
|
|
2 0
|
|
dtype: int64
|
|
"""
|
|
inplace = validate_bool_kwarg(inplace, "inplace")
|
|
# Validate the axis parameter
|
|
self._get_axis_number(axis)
|
|
|
|
# GH 5856/5853
|
|
if inplace and self._is_cached:
|
|
raise ValueError(
|
|
"This Series is a view of some other array, to "
|
|
"sort in-place you must create a copy"
|
|
)
|
|
|
|
if is_list_like(ascending):
|
|
ascending = cast(Sequence[Union[bool, int]], ascending)
|
|
if len(ascending) != 1:
|
|
raise ValueError(
|
|
f"Length of ascending ({len(ascending)}) must be 1 for Series"
|
|
)
|
|
ascending = ascending[0]
|
|
|
|
ascending = validate_ascending(ascending)
|
|
|
|
if na_position not in ["first", "last"]:
|
|
raise ValueError(f"invalid na_position: {na_position}")
|
|
|
|
# GH 35922. Make sorting stable by leveraging nargsort
|
|
values_to_sort = ensure_key_mapped(self, key)._values if key else self._values
|
|
sorted_index = nargsort(values_to_sort, kind, bool(ascending), na_position)
|
|
|
|
result = self._constructor(
|
|
self._values[sorted_index], index=self.index[sorted_index]
|
|
)
|
|
|
|
if ignore_index:
|
|
result.index = default_index(len(sorted_index))
|
|
|
|
if not inplace:
|
|
return result.__finalize__(self, method="sort_values")
|
|
self._update_inplace(result)
|
|
return None
|
|
|
|
@overload
|
|
def sort_index(
|
|
self,
|
|
*,
|
|
axis: Axis = ...,
|
|
level: IndexLabel = ...,
|
|
ascending: bool | Sequence[bool] = ...,
|
|
inplace: Literal[True],
|
|
kind: SortKind = ...,
|
|
na_position: NaPosition = ...,
|
|
sort_remaining: bool = ...,
|
|
ignore_index: bool = ...,
|
|
key: IndexKeyFunc = ...,
|
|
) -> None:
|
|
...
|
|
|
|
@overload
|
|
def sort_index(
|
|
self,
|
|
*,
|
|
axis: Axis = ...,
|
|
level: IndexLabel = ...,
|
|
ascending: bool | Sequence[bool] = ...,
|
|
inplace: Literal[False] = ...,
|
|
kind: SortKind = ...,
|
|
na_position: NaPosition = ...,
|
|
sort_remaining: bool = ...,
|
|
ignore_index: bool = ...,
|
|
key: IndexKeyFunc = ...,
|
|
) -> Series:
|
|
...
|
|
|
|
@overload
|
|
def sort_index(
|
|
self,
|
|
*,
|
|
axis: Axis = ...,
|
|
level: IndexLabel = ...,
|
|
ascending: bool | Sequence[bool] = ...,
|
|
inplace: bool = ...,
|
|
kind: SortKind = ...,
|
|
na_position: NaPosition = ...,
|
|
sort_remaining: bool = ...,
|
|
ignore_index: bool = ...,
|
|
key: IndexKeyFunc = ...,
|
|
) -> Series | None:
|
|
...
|
|
|
|
# error: Signature of "sort_index" incompatible with supertype "NDFrame"
|
|
@deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
|
|
def sort_index( # type: ignore[override]
|
|
self,
|
|
axis: Axis = 0,
|
|
level: IndexLabel = None,
|
|
ascending: bool | Sequence[bool] = True,
|
|
inplace: bool = False,
|
|
kind: SortKind = "quicksort",
|
|
na_position: NaPosition = "last",
|
|
sort_remaining: bool = True,
|
|
ignore_index: bool = False,
|
|
key: IndexKeyFunc = None,
|
|
) -> Series | None:
|
|
"""
|
|
Sort Series by index labels.
|
|
|
|
Returns a new Series sorted by label if `inplace` argument is
|
|
``False``, otherwise updates the original series and returns None.
|
|
|
|
Parameters
|
|
----------
|
|
axis : {0 or 'index'}
|
|
Unused. Parameter needed for compatibility with DataFrame.
|
|
level : int, optional
|
|
If not None, sort on values in specified index level(s).
|
|
ascending : bool or list-like of bools, default True
|
|
Sort ascending vs. descending. When the index is a MultiIndex the
|
|
sort direction can be controlled for each level individually.
|
|
inplace : bool, default False
|
|
If True, perform operation in-place.
|
|
kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort'
|
|
Choice of sorting algorithm. See also :func:`numpy.sort` for more
|
|
information. 'mergesort' and 'stable' are the only stable algorithms. For
|
|
DataFrames, this option is only applied when sorting on a single
|
|
column or label.
|
|
na_position : {'first', 'last'}, default 'last'
|
|
If 'first' puts NaNs at the beginning, 'last' puts NaNs at the end.
|
|
Not implemented for MultiIndex.
|
|
sort_remaining : bool, default True
|
|
If True and sorting by level and index is multilevel, sort by other
|
|
levels too (in order) after sorting by specified level.
|
|
ignore_index : bool, default False
|
|
If True, the resulting axis will be labeled 0, 1, …, n - 1.
|
|
|
|
.. versionadded:: 1.0.0
|
|
|
|
key : callable, optional
|
|
If not None, apply the key function to the index values
|
|
before sorting. This is similar to the `key` argument in the
|
|
builtin :meth:`sorted` function, with the notable difference that
|
|
this `key` function should be *vectorized*. It should expect an
|
|
``Index`` and return an ``Index`` of the same shape.
|
|
|
|
.. versionadded:: 1.1.0
|
|
|
|
Returns
|
|
-------
|
|
Series or None
|
|
The original Series sorted by the labels or None if ``inplace=True``.
|
|
|
|
See Also
|
|
--------
|
|
DataFrame.sort_index: Sort DataFrame by the index.
|
|
DataFrame.sort_values: Sort DataFrame by the value.
|
|
Series.sort_values : Sort Series by the value.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, 4])
|
|
>>> s.sort_index()
|
|
1 c
|
|
2 b
|
|
3 a
|
|
4 d
|
|
dtype: object
|
|
|
|
Sort Descending
|
|
|
|
>>> s.sort_index(ascending=False)
|
|
4 d
|
|
3 a
|
|
2 b
|
|
1 c
|
|
dtype: object
|
|
|
|
Sort Inplace
|
|
|
|
>>> s.sort_index(inplace=True)
|
|
>>> s
|
|
1 c
|
|
2 b
|
|
3 a
|
|
4 d
|
|
dtype: object
|
|
|
|
By default NaNs are put at the end, but use `na_position` to place
|
|
them at the beginning
|
|
|
|
>>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, np.nan])
|
|
>>> s.sort_index(na_position='first')
|
|
NaN d
|
|
1.0 c
|
|
2.0 b
|
|
3.0 a
|
|
dtype: object
|
|
|
|
Specify index level to sort
|
|
|
|
>>> arrays = [np.array(['qux', 'qux', 'foo', 'foo',
|
|
... 'baz', 'baz', 'bar', 'bar']),
|
|
... np.array(['two', 'one', 'two', 'one',
|
|
... 'two', 'one', 'two', 'one'])]
|
|
>>> s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=arrays)
|
|
>>> s.sort_index(level=1)
|
|
bar one 8
|
|
baz one 6
|
|
foo one 4
|
|
qux one 2
|
|
bar two 7
|
|
baz two 5
|
|
foo two 3
|
|
qux two 1
|
|
dtype: int64
|
|
|
|
Does not sort by remaining levels when sorting by levels
|
|
|
|
>>> s.sort_index(level=1, sort_remaining=False)
|
|
qux one 2
|
|
foo one 4
|
|
baz one 6
|
|
bar one 8
|
|
qux two 1
|
|
foo two 3
|
|
baz two 5
|
|
bar two 7
|
|
dtype: int64
|
|
|
|
Apply a key function before sorting
|
|
|
|
>>> s = pd.Series([1, 2, 3, 4], index=['A', 'b', 'C', 'd'])
|
|
>>> s.sort_index(key=lambda x : x.str.lower())
|
|
A 1
|
|
b 2
|
|
C 3
|
|
d 4
|
|
dtype: int64
|
|
"""
|
|
|
|
return super().sort_index(
|
|
axis=axis,
|
|
level=level,
|
|
ascending=ascending,
|
|
inplace=inplace,
|
|
kind=kind,
|
|
na_position=na_position,
|
|
sort_remaining=sort_remaining,
|
|
ignore_index=ignore_index,
|
|
key=key,
|
|
)
|
|
|
|
def argsort(
|
|
self,
|
|
axis: Axis = 0,
|
|
kind: SortKind = "quicksort",
|
|
order: None = None,
|
|
) -> Series:
|
|
"""
|
|
Return the integer indices that would sort the Series values.
|
|
|
|
Override ndarray.argsort. Argsorts the value, omitting NA/null values,
|
|
and places the result in the same locations as the non-NA values.
|
|
|
|
Parameters
|
|
----------
|
|
axis : {0 or 'index'}
|
|
Unused. Parameter needed for compatibility with DataFrame.
|
|
kind : {'mergesort', 'quicksort', 'heapsort', 'stable'}, default 'quicksort'
|
|
Choice of sorting algorithm. See :func:`numpy.sort` for more
|
|
information. 'mergesort' and 'stable' are the only stable algorithms.
|
|
order : None
|
|
Has no effect but is accepted for compatibility with numpy.
|
|
|
|
Returns
|
|
-------
|
|
Series[np.intp]
|
|
Positions of values within the sort order with -1 indicating
|
|
nan values.
|
|
|
|
See Also
|
|
--------
|
|
numpy.ndarray.argsort : Returns the indices that would sort this array.
|
|
"""
|
|
values = self._values
|
|
mask = isna(values)
|
|
|
|
if mask.any():
|
|
result = np.full(len(self), -1, dtype=np.intp)
|
|
notmask = ~mask
|
|
result[notmask] = np.argsort(values[notmask], kind=kind)
|
|
else:
|
|
result = np.argsort(values, kind=kind)
|
|
|
|
res = self._constructor(result, index=self.index, name=self.name, dtype=np.intp)
|
|
return res.__finalize__(self, method="argsort")
|
|
|
|
def nlargest(
|
|
self, n: int = 5, keep: Literal["first", "last", "all"] = "first"
|
|
) -> Series:
|
|
"""
|
|
Return the largest `n` elements.
|
|
|
|
Parameters
|
|
----------
|
|
n : int, default 5
|
|
Return this many descending sorted values.
|
|
keep : {'first', 'last', 'all'}, default 'first'
|
|
When there are duplicate values that cannot all fit in a
|
|
Series of `n` elements:
|
|
|
|
- ``first`` : return the first `n` occurrences in order
|
|
of appearance.
|
|
- ``last`` : return the last `n` occurrences in reverse
|
|
order of appearance.
|
|
- ``all`` : keep all occurrences. This can result in a Series of
|
|
size larger than `n`.
|
|
|
|
Returns
|
|
-------
|
|
Series
|
|
The `n` largest values in the Series, sorted in decreasing order.
|
|
|
|
See Also
|
|
--------
|
|
Series.nsmallest: Get the `n` smallest elements.
|
|
Series.sort_values: Sort Series by values.
|
|
Series.head: Return the first `n` rows.
|
|
|
|
Notes
|
|
-----
|
|
Faster than ``.sort_values(ascending=False).head(n)`` for small `n`
|
|
relative to the size of the ``Series`` object.
|
|
|
|
Examples
|
|
--------
|
|
>>> countries_population = {"Italy": 59000000, "France": 65000000,
|
|
... "Malta": 434000, "Maldives": 434000,
|
|
... "Brunei": 434000, "Iceland": 337000,
|
|
... "Nauru": 11300, "Tuvalu": 11300,
|
|
... "Anguilla": 11300, "Montserrat": 5200}
|
|
>>> s = pd.Series(countries_population)
|
|
>>> s
|
|
Italy 59000000
|
|
France 65000000
|
|
Malta 434000
|
|
Maldives 434000
|
|
Brunei 434000
|
|
Iceland 337000
|
|
Nauru 11300
|
|
Tuvalu 11300
|
|
Anguilla 11300
|
|
Montserrat 5200
|
|
dtype: int64
|
|
|
|
The `n` largest elements where ``n=5`` by default.
|
|
|
|
>>> s.nlargest()
|
|
France 65000000
|
|
Italy 59000000
|
|
Malta 434000
|
|
Maldives 434000
|
|
Brunei 434000
|
|
dtype: int64
|
|
|
|
The `n` largest elements where ``n=3``. Default `keep` value is 'first'
|
|
so Malta will be kept.
|
|
|
|
>>> s.nlargest(3)
|
|
France 65000000
|
|
Italy 59000000
|
|
Malta 434000
|
|
dtype: int64
|
|
|
|
The `n` largest elements where ``n=3`` and keeping the last duplicates.
|
|
Brunei will be kept since it is the last with value 434000 based on
|
|
the index order.
|
|
|
|
>>> s.nlargest(3, keep='last')
|
|
France 65000000
|
|
Italy 59000000
|
|
Brunei 434000
|
|
dtype: int64
|
|
|
|
The `n` largest elements where ``n=3`` with all duplicates kept. Note
|
|
that the returned Series has five elements due to the three duplicates.
|
|
|
|
>>> s.nlargest(3, keep='all')
|
|
France 65000000
|
|
Italy 59000000
|
|
Malta 434000
|
|
Maldives 434000
|
|
Brunei 434000
|
|
dtype: int64
|
|
"""
|
|
return algorithms.SelectNSeries(self, n=n, keep=keep).nlargest()
|
|
|
|
def nsmallest(self, n: int = 5, keep: str = "first") -> Series:
|
|
"""
|
|
Return the smallest `n` elements.
|
|
|
|
Parameters
|
|
----------
|
|
n : int, default 5
|
|
Return this many ascending sorted values.
|
|
keep : {'first', 'last', 'all'}, default 'first'
|
|
When there are duplicate values that cannot all fit in a
|
|
Series of `n` elements:
|
|
|
|
- ``first`` : return the first `n` occurrences in order
|
|
of appearance.
|
|
- ``last`` : return the last `n` occurrences in reverse
|
|
order of appearance.
|
|
- ``all`` : keep all occurrences. This can result in a Series of
|
|
size larger than `n`.
|
|
|
|
Returns
|
|
-------
|
|
Series
|
|
The `n` smallest values in the Series, sorted in increasing order.
|
|
|
|
See Also
|
|
--------
|
|
Series.nlargest: Get the `n` largest elements.
|
|
Series.sort_values: Sort Series by values.
|
|
Series.head: Return the first `n` rows.
|
|
|
|
Notes
|
|
-----
|
|
Faster than ``.sort_values().head(n)`` for small `n` relative to
|
|
the size of the ``Series`` object.
|
|
|
|
Examples
|
|
--------
|
|
>>> countries_population = {"Italy": 59000000, "France": 65000000,
|
|
... "Brunei": 434000, "Malta": 434000,
|
|
... "Maldives": 434000, "Iceland": 337000,
|
|
... "Nauru": 11300, "Tuvalu": 11300,
|
|
... "Anguilla": 11300, "Montserrat": 5200}
|
|
>>> s = pd.Series(countries_population)
|
|
>>> s
|
|
Italy 59000000
|
|
France 65000000
|
|
Brunei 434000
|
|
Malta 434000
|
|
Maldives 434000
|
|
Iceland 337000
|
|
Nauru 11300
|
|
Tuvalu 11300
|
|
Anguilla 11300
|
|
Montserrat 5200
|
|
dtype: int64
|
|
|
|
The `n` smallest elements where ``n=5`` by default.
|
|
|
|
>>> s.nsmallest()
|
|
Montserrat 5200
|
|
Nauru 11300
|
|
Tuvalu 11300
|
|
Anguilla 11300
|
|
Iceland 337000
|
|
dtype: int64
|
|
|
|
The `n` smallest elements where ``n=3``. Default `keep` value is
|
|
'first' so Nauru and Tuvalu will be kept.
|
|
|
|
>>> s.nsmallest(3)
|
|
Montserrat 5200
|
|
Nauru 11300
|
|
Tuvalu 11300
|
|
dtype: int64
|
|
|
|
The `n` smallest elements where ``n=3`` and keeping the last
|
|
duplicates. Anguilla and Tuvalu will be kept since they are the last
|
|
with value 11300 based on the index order.
|
|
|
|
>>> s.nsmallest(3, keep='last')
|
|
Montserrat 5200
|
|
Anguilla 11300
|
|
Tuvalu 11300
|
|
dtype: int64
|
|
|
|
The `n` smallest elements where ``n=3`` with all duplicates kept. Note
|
|
that the returned Series has four elements due to the three duplicates.
|
|
|
|
>>> s.nsmallest(3, keep='all')
|
|
Montserrat 5200
|
|
Nauru 11300
|
|
Tuvalu 11300
|
|
Anguilla 11300
|
|
dtype: int64
|
|
"""
|
|
return algorithms.SelectNSeries(self, n=n, keep=keep).nsmallest()
|
|
|
|
@doc(
|
|
klass=_shared_doc_kwargs["klass"],
|
|
extra_params=dedent(
|
|
"""copy : bool, default True
|
|
Whether to copy underlying data."""
|
|
),
|
|
examples=dedent(
|
|
"""\
|
|
Examples
|
|
--------
|
|
>>> s = pd.Series(
|
|
... ["A", "B", "A", "C"],
|
|
... index=[
|
|
... ["Final exam", "Final exam", "Coursework", "Coursework"],
|
|
... ["History", "Geography", "History", "Geography"],
|
|
... ["January", "February", "March", "April"],
|
|
... ],
|
|
... )
|
|
>>> s
|
|
Final exam History January A
|
|
Geography February B
|
|
Coursework History March A
|
|
Geography April C
|
|
dtype: object
|
|
|
|
In the following example, we will swap the levels of the indices.
|
|
Here, we will swap the levels column-wise, but levels can be swapped row-wise
|
|
in a similar manner. Note that column-wise is the default behaviour.
|
|
By not supplying any arguments for i and j, we swap the last and second to
|
|
last indices.
|
|
|
|
>>> s.swaplevel()
|
|
Final exam January History A
|
|
February Geography B
|
|
Coursework March History A
|
|
April Geography C
|
|
dtype: object
|
|
|
|
By supplying one argument, we can choose which index to swap the last
|
|
index with. We can for example swap the first index with the last one as
|
|
follows.
|
|
|
|
>>> s.swaplevel(0)
|
|
January History Final exam A
|
|
February Geography Final exam B
|
|
March History Coursework A
|
|
April Geography Coursework C
|
|
dtype: object
|
|
|
|
We can also define explicitly which indices we want to swap by supplying values
|
|
for both i and j. Here, we for example swap the first and second indices.
|
|
|
|
>>> s.swaplevel(0, 1)
|
|
History Final exam January A
|
|
Geography Final exam February B
|
|
History Coursework March A
|
|
Geography Coursework April C
|
|
dtype: object"""
|
|
),
|
|
)
|
|
def swaplevel(self, i: Level = -2, j: Level = -1, copy: bool = True) -> Series:
|
|
"""
|
|
Swap levels i and j in a :class:`MultiIndex`.
|
|
|
|
Default is to swap the two innermost levels of the index.
|
|
|
|
Parameters
|
|
----------
|
|
i, j : int or str
|
|
Levels of the indices to be swapped. Can pass level name as string.
|
|
{extra_params}
|
|
|
|
Returns
|
|
-------
|
|
{klass}
|
|
{klass} with levels swapped in MultiIndex.
|
|
|
|
{examples}
|
|
"""
|
|
assert isinstance(self.index, MultiIndex)
|
|
new_index = self.index.swaplevel(i, j)
|
|
return self._constructor(self._values, index=new_index, copy=copy).__finalize__(
|
|
self, method="swaplevel"
|
|
)
|
|
|
|
def reorder_levels(self, order: Sequence[Level]) -> Series:
|
|
"""
|
|
Rearrange index levels using input order.
|
|
|
|
May not drop or duplicate levels.
|
|
|
|
Parameters
|
|
----------
|
|
order : list of int representing new level order
|
|
Reference level by number or key.
|
|
|
|
Returns
|
|
-------
|
|
type of caller (new object)
|
|
"""
|
|
if not isinstance(self.index, MultiIndex): # pragma: no cover
|
|
raise Exception("Can only reorder levels on a hierarchical axis.")
|
|
|
|
result = self.copy()
|
|
assert isinstance(result.index, MultiIndex)
|
|
result.index = result.index.reorder_levels(order)
|
|
return result
|
|
|
|
def explode(self, ignore_index: bool = False) -> Series:
|
|
"""
|
|
Transform each element of a list-like to a row.
|
|
|
|
.. versionadded:: 0.25.0
|
|
|
|
Parameters
|
|
----------
|
|
ignore_index : bool, default False
|
|
If True, the resulting index will be labeled 0, 1, …, n - 1.
|
|
|
|
.. versionadded:: 1.1.0
|
|
|
|
Returns
|
|
-------
|
|
Series
|
|
Exploded lists to rows; index will be duplicated for these rows.
|
|
|
|
See Also
|
|
--------
|
|
Series.str.split : Split string values on specified separator.
|
|
Series.unstack : Unstack, a.k.a. pivot, Series with MultiIndex
|
|
to produce DataFrame.
|
|
DataFrame.melt : Unpivot a DataFrame from wide format to long format.
|
|
DataFrame.explode : Explode a DataFrame from list-like
|
|
columns to long format.
|
|
|
|
Notes
|
|
-----
|
|
This routine will explode list-likes including lists, tuples, sets,
|
|
Series, and np.ndarray. The result dtype of the subset rows will
|
|
be object. Scalars will be returned unchanged, and empty list-likes will
|
|
result in a np.nan for that row. In addition, the ordering of elements in
|
|
the output will be non-deterministic when exploding sets.
|
|
|
|
Reference :ref:`the user guide <reshaping.explode>` for more examples.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pd.Series([[1, 2, 3], 'foo', [], [3, 4]])
|
|
>>> s
|
|
0 [1, 2, 3]
|
|
1 foo
|
|
2 []
|
|
3 [3, 4]
|
|
dtype: object
|
|
|
|
>>> s.explode()
|
|
0 1
|
|
0 2
|
|
0 3
|
|
1 foo
|
|
2 NaN
|
|
3 3
|
|
3 4
|
|
dtype: object
|
|
"""
|
|
if not len(self) or not is_object_dtype(self):
|
|
result = self.copy()
|
|
return result.reset_index(drop=True) if ignore_index else result
|
|
|
|
values, counts = reshape.explode(np.asarray(self._values))
|
|
|
|
if ignore_index:
|
|
index = default_index(len(values))
|
|
else:
|
|
index = self.index.repeat(counts)
|
|
|
|
return self._constructor(values, index=index, name=self.name)
|
|
|
|
def unstack(self, level: IndexLabel = -1, fill_value: Hashable = None) -> DataFrame:
|
|
"""
|
|
Unstack, also known as pivot, Series with MultiIndex to produce DataFrame.
|
|
|
|
Parameters
|
|
----------
|
|
level : int, str, or list of these, default last level
|
|
Level(s) to unstack, can pass level name.
|
|
fill_value : scalar value, default None
|
|
Value to use when replacing NaN values.
|
|
|
|
Returns
|
|
-------
|
|
DataFrame
|
|
Unstacked Series.
|
|
|
|
Notes
|
|
-----
|
|
Reference :ref:`the user guide <reshaping.stacking>` for more examples.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pd.Series([1, 2, 3, 4],
|
|
... index=pd.MultiIndex.from_product([['one', 'two'],
|
|
... ['a', 'b']]))
|
|
>>> s
|
|
one a 1
|
|
b 2
|
|
two a 3
|
|
b 4
|
|
dtype: int64
|
|
|
|
>>> s.unstack(level=-1)
|
|
a b
|
|
one 1 2
|
|
two 3 4
|
|
|
|
>>> s.unstack(level=0)
|
|
one two
|
|
a 1 3
|
|
b 2 4
|
|
"""
|
|
from pandas.core.reshape.reshape import unstack
|
|
|
|
return unstack(self, level, fill_value)
|
|
|
|
# ----------------------------------------------------------------------
|
|
# function application
|
|
|
|
def map(
|
|
self,
|
|
arg: Callable | Mapping | Series,
|
|
na_action: Literal["ignore"] | None = None,
|
|
) -> Series:
|
|
"""
|
|
Map values of Series according to an input mapping or function.
|
|
|
|
Used for substituting each value in a Series with another value,
|
|
that may be derived from a function, a ``dict`` or
|
|
a :class:`Series`.
|
|
|
|
Parameters
|
|
----------
|
|
arg : function, collections.abc.Mapping subclass or Series
|
|
Mapping correspondence.
|
|
na_action : {None, 'ignore'}, default None
|
|
If 'ignore', propagate NaN values, without passing them to the
|
|
mapping correspondence.
|
|
|
|
Returns
|
|
-------
|
|
Series
|
|
Same index as caller.
|
|
|
|
See Also
|
|
--------
|
|
Series.apply : For applying more complex functions on a Series.
|
|
DataFrame.apply : Apply a function row-/column-wise.
|
|
DataFrame.applymap : Apply a function elementwise on a whole DataFrame.
|
|
|
|
Notes
|
|
-----
|
|
When ``arg`` is a dictionary, values in Series that are not in the
|
|
dictionary (as keys) are converted to ``NaN``. However, if the
|
|
dictionary is a ``dict`` subclass that defines ``__missing__`` (i.e.
|
|
provides a method for default values), then this default is used
|
|
rather than ``NaN``.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pd.Series(['cat', 'dog', np.nan, 'rabbit'])
|
|
>>> s
|
|
0 cat
|
|
1 dog
|
|
2 NaN
|
|
3 rabbit
|
|
dtype: object
|
|
|
|
``map`` accepts a ``dict`` or a ``Series``. Values that are not found
|
|
in the ``dict`` are converted to ``NaN``, unless the dict has a default
|
|
value (e.g. ``defaultdict``):
|
|
|
|
>>> s.map({'cat': 'kitten', 'dog': 'puppy'})
|
|
0 kitten
|
|
1 puppy
|
|
2 NaN
|
|
3 NaN
|
|
dtype: object
|
|
|
|
It also accepts a function:
|
|
|
|
>>> s.map('I am a {}'.format)
|
|
0 I am a cat
|
|
1 I am a dog
|
|
2 I am a nan
|
|
3 I am a rabbit
|
|
dtype: object
|
|
|
|
To avoid applying the function to missing values (and keep them as
|
|
``NaN``) ``na_action='ignore'`` can be used:
|
|
|
|
>>> s.map('I am a {}'.format, na_action='ignore')
|
|
0 I am a cat
|
|
1 I am a dog
|
|
2 NaN
|
|
3 I am a rabbit
|
|
dtype: object
|
|
"""
|
|
new_values = self._map_values(arg, na_action=na_action)
|
|
return self._constructor(new_values, index=self.index).__finalize__(
|
|
self, method="map"
|
|
)
|
|
|
|
def _gotitem(self, key, ndim, subset=None) -> Series:
|
|
"""
|
|
Sub-classes to define. Return a sliced object.
|
|
|
|
Parameters
|
|
----------
|
|
key : string / list of selections
|
|
ndim : {1, 2}
|
|
Requested ndim of result.
|
|
subset : object, default None
|
|
Subset to act on.
|
|
"""
|
|
return self
|
|
|
|
_agg_see_also_doc = dedent(
|
|
"""
|
|
See Also
|
|
--------
|
|
Series.apply : Invoke function on a Series.
|
|
Series.transform : Transform function producing a Series with like indexes.
|
|
"""
|
|
)
|
|
|
|
_agg_examples_doc = dedent(
|
|
"""
|
|
Examples
|
|
--------
|
|
>>> s = pd.Series([1, 2, 3, 4])
|
|
>>> s
|
|
0 1
|
|
1 2
|
|
2 3
|
|
3 4
|
|
dtype: int64
|
|
|
|
>>> s.agg('min')
|
|
1
|
|
|
|
>>> s.agg(['min', 'max'])
|
|
min 1
|
|
max 4
|
|
dtype: int64
|
|
"""
|
|
)
|
|
|
|
@doc(
|
|
_shared_docs["aggregate"],
|
|
klass=_shared_doc_kwargs["klass"],
|
|
axis=_shared_doc_kwargs["axis"],
|
|
see_also=_agg_see_also_doc,
|
|
examples=_agg_examples_doc,
|
|
)
|
|
def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs):
|
|
# Validate the axis parameter
|
|
self._get_axis_number(axis)
|
|
|
|
# if func is None, will switch to user-provided "named aggregation" kwargs
|
|
if func is None:
|
|
func = dict(kwargs.items())
|
|
|
|
op = SeriesApply(self, func, convert_dtype=False, args=args, kwargs=kwargs)
|
|
result = op.agg()
|
|
return result
|
|
|
|
agg = aggregate
|
|
|
|
# error: Signature of "any" incompatible with supertype "NDFrame" [override]
|
|
@overload # type: ignore[override]
|
|
def any(
|
|
self,
|
|
*,
|
|
axis: Axis = ...,
|
|
bool_only: bool | None = ...,
|
|
skipna: bool = ...,
|
|
level: None = ...,
|
|
**kwargs,
|
|
) -> bool:
|
|
...
|
|
|
|
@overload
|
|
def any(
|
|
self,
|
|
*,
|
|
axis: Axis = ...,
|
|
bool_only: bool | None = ...,
|
|
skipna: bool = ...,
|
|
level: Level,
|
|
**kwargs,
|
|
) -> Series | bool:
|
|
...
|
|
|
|
@doc(NDFrame.any, **_shared_doc_kwargs)
|
|
def any(
|
|
self,
|
|
axis: Axis = 0,
|
|
bool_only: bool | None = None,
|
|
skipna: bool = True,
|
|
level: Level | None = None,
|
|
**kwargs,
|
|
) -> Series | bool:
|
|
...
|
|
|
|
@doc(
|
|
_shared_docs["transform"],
|
|
klass=_shared_doc_kwargs["klass"],
|
|
axis=_shared_doc_kwargs["axis"],
|
|
)
|
|
def transform(
|
|
self, func: AggFuncType, axis: Axis = 0, *args, **kwargs
|
|
) -> DataFrame | Series:
|
|
# Validate axis argument
|
|
self._get_axis_number(axis)
|
|
result = SeriesApply(
|
|
self, func=func, convert_dtype=True, args=args, kwargs=kwargs
|
|
).transform()
|
|
return result
|
|
|
|
def apply(
|
|
self,
|
|
func: AggFuncType,
|
|
convert_dtype: bool = True,
|
|
args: tuple[Any, ...] = (),
|
|
**kwargs,
|
|
) -> DataFrame | Series:
|
|
"""
|
|
Invoke function on values of Series.
|
|
|
|
Can be ufunc (a NumPy function that applies to the entire Series)
|
|
or a Python function that only works on single values.
|
|
|
|
Parameters
|
|
----------
|
|
func : function
|
|
Python function or NumPy ufunc to apply.
|
|
convert_dtype : bool, default True
|
|
Try to find better dtype for elementwise function results. If
|
|
False, leave as dtype=object. Note that the dtype is always
|
|
preserved for some extension array dtypes, such as Categorical.
|
|
args : tuple
|
|
Positional arguments passed to func after the series value.
|
|
**kwargs
|
|
Additional keyword arguments passed to func.
|
|
|
|
Returns
|
|
-------
|
|
Series or DataFrame
|
|
If func returns a Series object the result will be a DataFrame.
|
|
|
|
See Also
|
|
--------
|
|
Series.map: For element-wise operations.
|
|
Series.agg: Only perform aggregating type operations.
|
|
Series.transform: Only perform transforming type operations.
|
|
|
|
Notes
|
|
-----
|
|
Functions that mutate the passed object can produce unexpected
|
|
behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
|
|
for more details.
|
|
|
|
Examples
|
|
--------
|
|
Create a series with typical summer temperatures for each city.
|
|
|
|
>>> s = pd.Series([20, 21, 12],
|
|
... index=['London', 'New York', 'Helsinki'])
|
|
>>> s
|
|
London 20
|
|
New York 21
|
|
Helsinki 12
|
|
dtype: int64
|
|
|
|
Square the values by defining a function and passing it as an
|
|
argument to ``apply()``.
|
|
|
|
>>> def square(x):
|
|
... return x ** 2
|
|
>>> s.apply(square)
|
|
London 400
|
|
New York 441
|
|
Helsinki 144
|
|
dtype: int64
|
|
|
|
Square the values by passing an anonymous function as an
|
|
argument to ``apply()``.
|
|
|
|
>>> s.apply(lambda x: x ** 2)
|
|
London 400
|
|
New York 441
|
|
Helsinki 144
|
|
dtype: int64
|
|
|
|
Define a custom function that needs additional positional
|
|
arguments and pass these additional arguments using the
|
|
``args`` keyword.
|
|
|
|
>>> def subtract_custom_value(x, custom_value):
|
|
... return x - custom_value
|
|
|
|
>>> s.apply(subtract_custom_value, args=(5,))
|
|
London 15
|
|
New York 16
|
|
Helsinki 7
|
|
dtype: int64
|
|
|
|
Define a custom function that takes keyword arguments
|
|
and pass these arguments to ``apply``.
|
|
|
|
>>> def add_custom_values(x, **kwargs):
|
|
... for month in kwargs:
|
|
... x += kwargs[month]
|
|
... return x
|
|
|
|
>>> s.apply(add_custom_values, june=30, july=20, august=25)
|
|
London 95
|
|
New York 96
|
|
Helsinki 87
|
|
dtype: int64
|
|
|
|
Use a function from the Numpy library.
|
|
|
|
>>> s.apply(np.log)
|
|
London 2.995732
|
|
New York 3.044522
|
|
Helsinki 2.484907
|
|
dtype: float64
|
|
"""
|
|
return SeriesApply(self, func, convert_dtype, args, kwargs).apply()
|
|
|
|
def _reduce(
|
|
self,
|
|
op,
|
|
name: str,
|
|
*,
|
|
axis=0,
|
|
skipna=True,
|
|
numeric_only=None,
|
|
filter_type=None,
|
|
**kwds,
|
|
):
|
|
"""
|
|
Perform a reduction operation.
|
|
|
|
If we have an ndarray as a value, then simply perform the operation,
|
|
otherwise delegate to the object.
|
|
"""
|
|
delegate = self._values
|
|
|
|
if axis is not None:
|
|
self._get_axis_number(axis)
|
|
|
|
if isinstance(delegate, ExtensionArray):
|
|
# dispatch to ExtensionArray interface
|
|
return delegate._reduce(name, skipna=skipna, **kwds)
|
|
|
|
else:
|
|
# dispatch to numpy arrays
|
|
if numeric_only and not is_numeric_dtype(self.dtype):
|
|
kwd_name = "numeric_only"
|
|
if name in ["any", "all"]:
|
|
kwd_name = "bool_only"
|
|
# GH#47500 - change to TypeError to match other methods
|
|
warnings.warn(
|
|
f"Calling Series.{name} with {kwd_name}={numeric_only} and "
|
|
f"dtype {self.dtype} will raise a TypeError in the future",
|
|
FutureWarning,
|
|
stacklevel=find_stack_level(),
|
|
)
|
|
raise NotImplementedError(
|
|
f"Series.{name} does not implement {kwd_name}."
|
|
)
|
|
with np.errstate(all="ignore"):
|
|
return op(delegate, skipna=skipna, **kwds)
|
|
|
|
def _reindex_indexer(
|
|
self, new_index: Index | None, indexer: npt.NDArray[np.intp] | None, copy: bool
|
|
) -> Series:
|
|
# Note: new_index is None iff indexer is None
|
|
# if not None, indexer is np.intp
|
|
if indexer is None and (
|
|
new_index is None or new_index.names == self.index.names
|
|
):
|
|
if copy:
|
|
return self.copy()
|
|
return self
|
|
|
|
new_values = algorithms.take_nd(
|
|
self._values, indexer, allow_fill=True, fill_value=None
|
|
)
|
|
return self._constructor(new_values, index=new_index)
|
|
|
|
def _needs_reindex_multi(self, axes, method, level) -> bool:
|
|
"""
|
|
Check if we do need a multi reindex; this is for compat with
|
|
higher dims.
|
|
"""
|
|
return False
|
|
|
|
# error: Cannot determine type of 'align'
|
|
@doc(
|
|
NDFrame.align, # type: ignore[has-type]
|
|
klass=_shared_doc_kwargs["klass"],
|
|
axes_single_arg=_shared_doc_kwargs["axes_single_arg"],
|
|
)
|
|
def align(
|
|
self,
|
|
other: Series,
|
|
join: Literal["outer", "inner", "left", "right"] = "outer",
|
|
axis: Axis | None = None,
|
|
level: Level = None,
|
|
copy: bool = True,
|
|
fill_value: Hashable = None,
|
|
method: FillnaOptions | None = None,
|
|
limit: int | None = None,
|
|
fill_axis: Axis = 0,
|
|
broadcast_axis: Axis | None = None,
|
|
) -> Series:
|
|
return super().align(
|
|
other,
|
|
join=join,
|
|
axis=axis,
|
|
level=level,
|
|
copy=copy,
|
|
fill_value=fill_value,
|
|
method=method,
|
|
limit=limit,
|
|
fill_axis=fill_axis,
|
|
broadcast_axis=broadcast_axis,
|
|
)
|
|
|
|
@overload
|
|
def rename(
|
|
self,
|
|
index: Renamer | Hashable | None = ...,
|
|
*,
|
|
axis: Axis | None = ...,
|
|
copy: bool = ...,
|
|
inplace: Literal[True],
|
|
level: Level | None = ...,
|
|
errors: IgnoreRaise = ...,
|
|
) -> None:
|
|
...
|
|
|
|
@overload
|
|
def rename(
|
|
self,
|
|
index: Renamer | Hashable | None = ...,
|
|
*,
|
|
axis: Axis | None = ...,
|
|
copy: bool = ...,
|
|
inplace: Literal[False] = ...,
|
|
level: Level | None = ...,
|
|
errors: IgnoreRaise = ...,
|
|
) -> Series:
|
|
...
|
|
|
|
@overload
|
|
def rename(
|
|
self,
|
|
index: Renamer | Hashable | None = ...,
|
|
*,
|
|
axis: Axis | None = ...,
|
|
copy: bool = ...,
|
|
inplace: bool = ...,
|
|
level: Level | None = ...,
|
|
errors: IgnoreRaise = ...,
|
|
) -> Series | None:
|
|
...
|
|
|
|
def rename(
|
|
self,
|
|
index: Renamer | Hashable | None = None,
|
|
*,
|
|
axis: Axis | None = None,
|
|
copy: bool = True,
|
|
inplace: bool = False,
|
|
level: Level | None = None,
|
|
errors: IgnoreRaise = "ignore",
|
|
) -> Series | None:
|
|
"""
|
|
Alter Series index labels or name.
|
|
|
|
Function / dict values must be unique (1-to-1). Labels not contained in
|
|
a dict / Series will be left as-is. Extra labels listed don't throw an
|
|
error.
|
|
|
|
Alternatively, change ``Series.name`` with a scalar value.
|
|
|
|
See the :ref:`user guide <basics.rename>` for more.
|
|
|
|
Parameters
|
|
----------
|
|
index : scalar, hashable sequence, dict-like or function optional
|
|
Functions or dict-like are transformations to apply to
|
|
the index.
|
|
Scalar or hashable sequence-like will alter the ``Series.name``
|
|
attribute.
|
|
axis : {0 or 'index'}
|
|
Unused. Parameter needed for compatibility with DataFrame.
|
|
copy : bool, default True
|
|
Also copy underlying data.
|
|
inplace : bool, default False
|
|
Whether to return a new Series. If True the value of copy is ignored.
|
|
level : int or level name, default None
|
|
In case of MultiIndex, only rename labels in the specified level.
|
|
errors : {'ignore', 'raise'}, default 'ignore'
|
|
If 'raise', raise `KeyError` when a `dict-like mapper` or
|
|
`index` contains labels that are not present in the index being transformed.
|
|
If 'ignore', existing keys will be renamed and extra keys will be ignored.
|
|
|
|
Returns
|
|
-------
|
|
Series or None
|
|
Series with index labels or name altered or None if ``inplace=True``.
|
|
|
|
See Also
|
|
--------
|
|
DataFrame.rename : Corresponding DataFrame method.
|
|
Series.rename_axis : Set the name of the axis.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pd.Series([1, 2, 3])
|
|
>>> s
|
|
0 1
|
|
1 2
|
|
2 3
|
|
dtype: int64
|
|
>>> s.rename("my_name") # scalar, changes Series.name
|
|
0 1
|
|
1 2
|
|
2 3
|
|
Name: my_name, dtype: int64
|
|
>>> s.rename(lambda x: x ** 2) # function, changes labels
|
|
0 1
|
|
1 2
|
|
4 3
|
|
dtype: int64
|
|
>>> s.rename({1: 3, 2: 5}) # mapping, changes labels
|
|
0 1
|
|
3 2
|
|
5 3
|
|
dtype: int64
|
|
"""
|
|
if axis is not None:
|
|
# Make sure we raise if an invalid 'axis' is passed.
|
|
axis = self._get_axis_number(axis)
|
|
|
|
if callable(index) or is_dict_like(index):
|
|
# error: Argument 1 to "_rename" of "NDFrame" has incompatible
|
|
# type "Union[Union[Mapping[Any, Hashable], Callable[[Any],
|
|
# Hashable]], Hashable, None]"; expected "Union[Mapping[Any,
|
|
# Hashable], Callable[[Any], Hashable], None]"
|
|
return super()._rename(
|
|
index, # type: ignore[arg-type]
|
|
copy=copy,
|
|
inplace=inplace,
|
|
level=level,
|
|
errors=errors,
|
|
)
|
|
else:
|
|
return self._set_name(index, inplace=inplace)
|
|
|
|
@overload
|
|
def set_axis(
|
|
self,
|
|
labels,
|
|
*,
|
|
axis: Axis = ...,
|
|
inplace: Literal[False] | lib.NoDefault = ...,
|
|
copy: bool | lib.NoDefault = ...,
|
|
) -> Series:
|
|
...
|
|
|
|
@overload
|
|
def set_axis(
|
|
self,
|
|
labels,
|
|
*,
|
|
axis: Axis = ...,
|
|
inplace: Literal[True],
|
|
copy: bool | lib.NoDefault = ...,
|
|
) -> None:
|
|
...
|
|
|
|
@overload
|
|
def set_axis(
|
|
self,
|
|
labels,
|
|
*,
|
|
axis: Axis = ...,
|
|
inplace: bool | lib.NoDefault = ...,
|
|
copy: bool | lib.NoDefault = ...,
|
|
) -> Series | None:
|
|
...
|
|
|
|
# error: Signature of "set_axis" incompatible with supertype "NDFrame"
|
|
@deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"])
|
|
@Appender(
|
|
"""
|
|
Examples
|
|
--------
|
|
>>> s = pd.Series([1, 2, 3])
|
|
>>> s
|
|
0 1
|
|
1 2
|
|
2 3
|
|
dtype: int64
|
|
|
|
>>> s.set_axis(['a', 'b', 'c'], axis=0)
|
|
a 1
|
|
b 2
|
|
c 3
|
|
dtype: int64
|
|
"""
|
|
)
|
|
@Substitution(
|
|
**_shared_doc_kwargs,
|
|
extended_summary_sub="",
|
|
axis_description_sub="",
|
|
see_also_sub="",
|
|
)
|
|
@Appender(NDFrame.set_axis.__doc__)
|
|
def set_axis( # type: ignore[override]
|
|
self,
|
|
labels,
|
|
axis: Axis = 0,
|
|
inplace: bool | lib.NoDefault = lib.no_default,
|
|
copy: bool | lib.NoDefault = lib.no_default,
|
|
) -> Series | None:
|
|
return super().set_axis(labels, axis=axis, inplace=inplace, copy=copy)
|
|
|
|
# error: Cannot determine type of 'reindex'
|
|
@doc(
|
|
NDFrame.reindex, # type: ignore[has-type]
|
|
klass=_shared_doc_kwargs["klass"],
|
|
axes=_shared_doc_kwargs["axes"],
|
|
optional_labels=_shared_doc_kwargs["optional_labels"],
|
|
optional_axis=_shared_doc_kwargs["optional_axis"],
|
|
)
|
|
def reindex(self, *args, **kwargs) -> Series:
|
|
if len(args) > 1:
|
|
raise TypeError("Only one positional argument ('index') is allowed")
|
|
if args:
|
|
(index,) = args
|
|
if "index" in kwargs:
|
|
raise TypeError(
|
|
"'index' passed as both positional and keyword argument"
|
|
)
|
|
kwargs.update({"index": index})
|
|
return super().reindex(**kwargs)
|
|
|
|
@overload
|
|
def drop(
|
|
self,
|
|
labels: IndexLabel = ...,
|
|
*,
|
|
axis: Axis = ...,
|
|
index: IndexLabel = ...,
|
|
columns: IndexLabel = ...,
|
|
level: Level | None = ...,
|
|
inplace: Literal[True],
|
|
errors: IgnoreRaise = ...,
|
|
) -> None:
|
|
...
|
|
|
|
@overload
|
|
def drop(
|
|
self,
|
|
labels: IndexLabel = ...,
|
|
*,
|
|
axis: Axis = ...,
|
|
index: IndexLabel = ...,
|
|
columns: IndexLabel = ...,
|
|
level: Level | None = ...,
|
|
inplace: Literal[False] = ...,
|
|
errors: IgnoreRaise = ...,
|
|
) -> Series:
|
|
...
|
|
|
|
@overload
|
|
def drop(
|
|
self,
|
|
labels: IndexLabel = ...,
|
|
*,
|
|
axis: Axis = ...,
|
|
index: IndexLabel = ...,
|
|
columns: IndexLabel = ...,
|
|
level: Level | None = ...,
|
|
inplace: bool = ...,
|
|
errors: IgnoreRaise = ...,
|
|
) -> Series | None:
|
|
...
|
|
|
|
# error: Signature of "drop" incompatible with supertype "NDFrame"
|
|
# github.com/python/mypy/issues/12387
|
|
@deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"])
|
|
def drop( # type: ignore[override]
|
|
self,
|
|
labels: IndexLabel = None,
|
|
axis: Axis = 0,
|
|
index: IndexLabel = None,
|
|
columns: IndexLabel = None,
|
|
level: Level | None = None,
|
|
inplace: bool = False,
|
|
errors: IgnoreRaise = "raise",
|
|
) -> Series | None:
|
|
"""
|
|
Return Series with specified index labels removed.
|
|
|
|
Remove elements of a Series based on specifying the index labels.
|
|
When using a multi-index, labels on different levels can be removed
|
|
by specifying the level.
|
|
|
|
Parameters
|
|
----------
|
|
labels : single label or list-like
|
|
Index labels to drop.
|
|
axis : {0 or 'index'}
|
|
Unused. Parameter needed for compatibility with DataFrame.
|
|
index : single label or list-like
|
|
Redundant for application on Series, but 'index' can be used instead
|
|
of 'labels'.
|
|
columns : single label or list-like
|
|
No change is made to the Series; use 'index' or 'labels' instead.
|
|
level : int or level name, optional
|
|
For MultiIndex, level for which the labels will be removed.
|
|
inplace : bool, default False
|
|
If True, do operation inplace and return None.
|
|
errors : {'ignore', 'raise'}, default 'raise'
|
|
If 'ignore', suppress error and only existing labels are dropped.
|
|
|
|
Returns
|
|
-------
|
|
Series or None
|
|
Series with specified index labels removed or None if ``inplace=True``.
|
|
|
|
Raises
|
|
------
|
|
KeyError
|
|
If none of the labels are found in the index.
|
|
|
|
See Also
|
|
--------
|
|
Series.reindex : Return only specified index labels of Series.
|
|
Series.dropna : Return series without null values.
|
|
Series.drop_duplicates : Return Series with duplicate values removed.
|
|
DataFrame.drop : Drop specified labels from rows or columns.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pd.Series(data=np.arange(3), index=['A', 'B', 'C'])
|
|
>>> s
|
|
A 0
|
|
B 1
|
|
C 2
|
|
dtype: int64
|
|
|
|
Drop labels B en C
|
|
|
|
>>> s.drop(labels=['B', 'C'])
|
|
A 0
|
|
dtype: int64
|
|
|
|
Drop 2nd level label in MultiIndex Series
|
|
|
|
>>> midx = pd.MultiIndex(levels=[['lama', 'cow', 'falcon'],
|
|
... ['speed', 'weight', 'length']],
|
|
... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2],
|
|
... [0, 1, 2, 0, 1, 2, 0, 1, 2]])
|
|
>>> s = pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3],
|
|
... index=midx)
|
|
>>> s
|
|
lama speed 45.0
|
|
weight 200.0
|
|
length 1.2
|
|
cow speed 30.0
|
|
weight 250.0
|
|
length 1.5
|
|
falcon speed 320.0
|
|
weight 1.0
|
|
length 0.3
|
|
dtype: float64
|
|
|
|
>>> s.drop(labels='weight', level=1)
|
|
lama speed 45.0
|
|
length 1.2
|
|
cow speed 30.0
|
|
length 1.5
|
|
falcon speed 320.0
|
|
length 0.3
|
|
dtype: float64
|
|
"""
|
|
return super().drop(
|
|
labels=labels,
|
|
axis=axis,
|
|
index=index,
|
|
columns=columns,
|
|
level=level,
|
|
inplace=inplace,
|
|
errors=errors,
|
|
)
|
|
|
|
@overload
|
|
def fillna(
|
|
self,
|
|
value: Hashable | Mapping | Series | DataFrame = ...,
|
|
*,
|
|
method: FillnaOptions | None = ...,
|
|
axis: Axis | None = ...,
|
|
inplace: Literal[False] = ...,
|
|
limit: int | None = ...,
|
|
downcast: dict | None = ...,
|
|
) -> Series:
|
|
...
|
|
|
|
@overload
|
|
def fillna(
|
|
self,
|
|
value: Hashable | Mapping | Series | DataFrame = ...,
|
|
*,
|
|
method: FillnaOptions | None = ...,
|
|
axis: Axis | None = ...,
|
|
inplace: Literal[True],
|
|
limit: int | None = ...,
|
|
downcast: dict | None = ...,
|
|
) -> None:
|
|
...
|
|
|
|
@overload
|
|
def fillna(
|
|
self,
|
|
value: Hashable | Mapping | Series | DataFrame = ...,
|
|
*,
|
|
method: FillnaOptions | None = ...,
|
|
axis: Axis | None = ...,
|
|
inplace: bool = ...,
|
|
limit: int | None = ...,
|
|
downcast: dict | None = ...,
|
|
) -> Series | None:
|
|
...
|
|
|
|
# error: Signature of "fillna" incompatible with supertype "NDFrame"
|
|
@deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "value"])
|
|
@doc(NDFrame.fillna, **_shared_doc_kwargs)
|
|
def fillna( # type: ignore[override]
|
|
self,
|
|
value: Hashable | Mapping | Series | DataFrame = None,
|
|
method: FillnaOptions | None = None,
|
|
axis: Axis | None = None,
|
|
inplace: bool = False,
|
|
limit: int | None = None,
|
|
downcast: dict | None = None,
|
|
) -> Series | None:
|
|
return super().fillna(
|
|
value=value,
|
|
method=method,
|
|
axis=axis,
|
|
inplace=inplace,
|
|
limit=limit,
|
|
downcast=downcast,
|
|
)
|
|
|
|
def pop(self, item: Hashable) -> Any:
|
|
"""
|
|
Return item and drops from series. Raise KeyError if not found.
|
|
|
|
Parameters
|
|
----------
|
|
item : label
|
|
Index of the element that needs to be removed.
|
|
|
|
Returns
|
|
-------
|
|
Value that is popped from series.
|
|
|
|
Examples
|
|
--------
|
|
>>> ser = pd.Series([1,2,3])
|
|
|
|
>>> ser.pop(0)
|
|
1
|
|
|
|
>>> ser
|
|
1 2
|
|
2 3
|
|
dtype: int64
|
|
"""
|
|
return super().pop(item=item)
|
|
|
|
# error: Signature of "replace" incompatible with supertype "NDFrame"
|
|
@overload # type: ignore[override]
|
|
def replace(
|
|
self,
|
|
to_replace=...,
|
|
value=...,
|
|
*,
|
|
inplace: Literal[False] = ...,
|
|
limit: int | None = ...,
|
|
regex: bool = ...,
|
|
method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ...,
|
|
) -> Series:
|
|
...
|
|
|
|
@overload
|
|
def replace(
|
|
self,
|
|
to_replace=...,
|
|
value=...,
|
|
*,
|
|
inplace: Literal[True],
|
|
limit: int | None = ...,
|
|
regex: bool = ...,
|
|
method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ...,
|
|
) -> None:
|
|
...
|
|
|
|
# error: Signature of "replace" incompatible with supertype "NDFrame"
|
|
@deprecate_nonkeyword_arguments(
|
|
version=None, allowed_args=["self", "to_replace", "value"]
|
|
)
|
|
@doc(
|
|
NDFrame.replace,
|
|
klass=_shared_doc_kwargs["klass"],
|
|
inplace=_shared_doc_kwargs["inplace"],
|
|
replace_iloc=_shared_doc_kwargs["replace_iloc"],
|
|
)
|
|
def replace( # type: ignore[override]
|
|
self,
|
|
to_replace=None,
|
|
value=lib.no_default,
|
|
inplace: bool = False,
|
|
limit: int | None = None,
|
|
regex: bool = False,
|
|
method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = lib.no_default,
|
|
) -> Series | None:
|
|
return super().replace(
|
|
to_replace=to_replace,
|
|
value=value,
|
|
inplace=inplace,
|
|
limit=limit,
|
|
regex=regex,
|
|
method=method,
|
|
)
|
|
|
|
@doc(INFO_DOCSTRING, **series_sub_kwargs)
|
|
def info(
|
|
self,
|
|
verbose: bool | None = None,
|
|
buf: IO[str] | None = None,
|
|
max_cols: int | None = None,
|
|
memory_usage: bool | str | None = None,
|
|
show_counts: bool = True,
|
|
) -> None:
|
|
return SeriesInfo(self, memory_usage).render(
|
|
buf=buf,
|
|
max_cols=max_cols,
|
|
verbose=verbose,
|
|
show_counts=show_counts,
|
|
)
|
|
|
|
def _replace_single(self, to_replace, method: str, inplace: bool, limit):
|
|
"""
|
|
Replaces values in a Series using the fill method specified when no
|
|
replacement value is given in the replace method
|
|
"""
|
|
|
|
result = self if inplace else self.copy()
|
|
|
|
values = result._values
|
|
mask = missing.mask_missing(values, to_replace)
|
|
|
|
if isinstance(values, ExtensionArray):
|
|
# dispatch to the EA's _pad_mask_inplace method
|
|
values._fill_mask_inplace(method, limit, mask)
|
|
else:
|
|
fill_f = missing.get_fill_func(method)
|
|
fill_f(values, limit=limit, mask=mask)
|
|
|
|
if inplace:
|
|
return
|
|
return result
|
|
|
|
# error: Cannot determine type of 'shift'
|
|
@doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type]
|
|
def shift(
|
|
self, periods: int = 1, freq=None, axis: Axis = 0, fill_value: Hashable = None
|
|
) -> Series:
|
|
return super().shift(
|
|
periods=periods, freq=freq, axis=axis, fill_value=fill_value
|
|
)
|
|
|
|
def memory_usage(self, index: bool = True, deep: bool = False) -> int:
|
|
"""
|
|
Return the memory usage of the Series.
|
|
|
|
The memory usage can optionally include the contribution of
|
|
the index and of elements of `object` dtype.
|
|
|
|
Parameters
|
|
----------
|
|
index : bool, default True
|
|
Specifies whether to include the memory usage of the Series index.
|
|
deep : bool, default False
|
|
If True, introspect the data deeply by interrogating
|
|
`object` dtypes for system-level memory consumption, and include
|
|
it in the returned value.
|
|
|
|
Returns
|
|
-------
|
|
int
|
|
Bytes of memory consumed.
|
|
|
|
See Also
|
|
--------
|
|
numpy.ndarray.nbytes : Total bytes consumed by the elements of the
|
|
array.
|
|
DataFrame.memory_usage : Bytes consumed by a DataFrame.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pd.Series(range(3))
|
|
>>> s.memory_usage()
|
|
152
|
|
|
|
Not including the index gives the size of the rest of the data, which
|
|
is necessarily smaller:
|
|
|
|
>>> s.memory_usage(index=False)
|
|
24
|
|
|
|
The memory footprint of `object` values is ignored by default:
|
|
|
|
>>> s = pd.Series(["a", "b"])
|
|
>>> s.values
|
|
array(['a', 'b'], dtype=object)
|
|
>>> s.memory_usage()
|
|
144
|
|
>>> s.memory_usage(deep=True)
|
|
244
|
|
"""
|
|
v = self._memory_usage(deep=deep)
|
|
if index:
|
|
v += self.index.memory_usage(deep=deep)
|
|
return v
|
|
|
|
def isin(self, values) -> Series:
|
|
"""
|
|
Whether elements in Series are contained in `values`.
|
|
|
|
Return a boolean Series showing whether each element in the Series
|
|
matches an element in the passed sequence of `values` exactly.
|
|
|
|
Parameters
|
|
----------
|
|
values : set or list-like
|
|
The sequence of values to test. Passing in a single string will
|
|
raise a ``TypeError``. Instead, turn a single string into a
|
|
list of one element.
|
|
|
|
Returns
|
|
-------
|
|
Series
|
|
Series of booleans indicating if each element is in values.
|
|
|
|
Raises
|
|
------
|
|
TypeError
|
|
* If `values` is a string
|
|
|
|
See Also
|
|
--------
|
|
DataFrame.isin : Equivalent method on DataFrame.
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama',
|
|
... 'hippo'], name='animal')
|
|
>>> s.isin(['cow', 'lama'])
|
|
0 True
|
|
1 True
|
|
2 True
|
|
3 False
|
|
4 True
|
|
5 False
|
|
Name: animal, dtype: bool
|
|
|
|
To invert the boolean values, use the ``~`` operator:
|
|
|
|
>>> ~s.isin(['cow', 'lama'])
|
|
0 False
|
|
1 False
|
|
2 False
|
|
3 True
|
|
4 False
|
|
5 True
|
|
Name: animal, dtype: bool
|
|
|
|
Passing a single string as ``s.isin('lama')`` will raise an error. Use
|
|
a list of one element instead:
|
|
|
|
>>> s.isin(['lama'])
|
|
0 True
|
|
1 False
|
|
2 True
|
|
3 False
|
|
4 True
|
|
5 False
|
|
Name: animal, dtype: bool
|
|
|
|
Strings and integers are distinct and are therefore not comparable:
|
|
|
|
>>> pd.Series([1]).isin(['1'])
|
|
0 False
|
|
dtype: bool
|
|
>>> pd.Series([1.1]).isin(['1.1'])
|
|
0 False
|
|
dtype: bool
|
|
"""
|
|
result = algorithms.isin(self._values, values)
|
|
return self._constructor(result, index=self.index).__finalize__(
|
|
self, method="isin"
|
|
)
|
|
|
|
def between(
|
|
self,
|
|
left,
|
|
right,
|
|
inclusive: Literal["both", "neither", "left", "right"] = "both",
|
|
) -> Series:
|
|
"""
|
|
Return boolean Series equivalent to left <= series <= right.
|
|
|
|
This function returns a boolean vector containing `True` wherever the
|
|
corresponding Series element is between the boundary values `left` and
|
|
`right`. NA values are treated as `False`.
|
|
|
|
Parameters
|
|
----------
|
|
left : scalar or list-like
|
|
Left boundary.
|
|
right : scalar or list-like
|
|
Right boundary.
|
|
inclusive : {"both", "neither", "left", "right"}
|
|
Include boundaries. Whether to set each bound as closed or open.
|
|
|
|
.. versionchanged:: 1.3.0
|
|
|
|
Returns
|
|
-------
|
|
Series
|
|
Series representing whether each element is between left and
|
|
right (inclusive).
|
|
|
|
See Also
|
|
--------
|
|
Series.gt : Greater than of series and other.
|
|
Series.lt : Less than of series and other.
|
|
|
|
Notes
|
|
-----
|
|
This function is equivalent to ``(left <= ser) & (ser <= right)``
|
|
|
|
Examples
|
|
--------
|
|
>>> s = pd.Series([2, 0, 4, 8, np.nan])
|
|
|
|
Boundary values are included by default:
|
|
|
|
>>> s.between(1, 4)
|
|
0 True
|
|
1 False
|
|
2 True
|
|
3 False
|
|
4 False
|
|
dtype: bool
|
|
|
|
With `inclusive` set to ``"neither"`` boundary values are excluded:
|
|
|
|
>>> s.between(1, 4, inclusive="neither")
|
|
0 True
|
|
1 False
|
|
2 False
|
|
3 False
|
|
4 False
|
|
dtype: bool
|
|
|
|
`left` and `right` can be any scalar value:
|
|
|
|
>>> s = pd.Series(['Alice', 'Bob', 'Carol', 'Eve'])
|
|
>>> s.between('Anna', 'Daniel')
|
|
0 False
|
|
1 True
|
|
2 True
|
|
3 False
|
|
dtype: bool
|
|
"""
|
|
# error: Non-overlapping identity check (left operand type: "Literal['both',
|
|
# 'neither', 'left', 'right']", right operand type: "Literal[False]")
|
|
if inclusive is True or inclusive is False: # type: ignore[comparison-overlap]
|
|
warnings.warn(
|
|
"Boolean inputs to the `inclusive` argument are deprecated in "
|
|
"favour of `both` or `neither`.",
|
|
FutureWarning,
|
|
stacklevel=find_stack_level(),
|
|
)
|
|
if inclusive:
|
|
inclusive = "both"
|
|
else:
|
|
inclusive = "neither"
|
|
if inclusive == "both":
|
|
lmask = self >= left
|
|
rmask = self <= right
|
|
elif inclusive == "left":
|
|
lmask = self >= left
|
|
rmask = self < right
|
|
elif inclusive == "right":
|
|
lmask = self > left
|
|
rmask = self <= right
|
|
elif inclusive == "neither":
|
|
lmask = self > left
|
|
rmask = self < right
|
|
else:
|
|
raise ValueError(
|
|
"Inclusive has to be either string of 'both',"
|
|
"'left', 'right', or 'neither'."
|
|
)
|
|
|
|
return lmask & rmask
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Convert to types that support pd.NA
|
|
|
|
def _convert_dtypes(
|
|
self,
|
|
infer_objects: bool = True,
|
|
convert_string: bool = True,
|
|
convert_integer: bool = True,
|
|
convert_boolean: bool = True,
|
|
convert_floating: bool = True,
|
|
) -> Series:
|
|
input_series = self
|
|
if infer_objects:
|
|
input_series = input_series.infer_objects()
|
|
if is_object_dtype(input_series):
|
|
input_series = input_series.copy()
|
|
|
|
if convert_string or convert_integer or convert_boolean or convert_floating:
|
|
inferred_dtype = convert_dtypes(
|
|
input_series._values,
|
|
convert_string,
|
|
convert_integer,
|
|
convert_boolean,
|
|
convert_floating,
|
|
)
|
|
result = input_series.astype(inferred_dtype)
|
|
else:
|
|
result = input_series.copy()
|
|
return result
|
|
|
|
# error: Cannot determine type of 'isna'
|
|
# error: Return type "Series" of "isna" incompatible with return type "ndarray
|
|
# [Any, dtype[bool_]]" in supertype "IndexOpsMixin"
|
|
@doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type]
|
|
def isna(self) -> Series: # type: ignore[override]
|
|
return NDFrame.isna(self)
|
|
|
|
# error: Cannot determine type of 'isna'
|
|
@doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type]
|
|
def isnull(self) -> Series:
|
|
"""
|
|
Series.isnull is an alias for Series.isna.
|
|
"""
|
|
return super().isnull()
|
|
|
|
# error: Cannot determine type of 'notna'
|
|
@doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type]
|
|
def notna(self) -> Series:
|
|
return super().notna()
|
|
|
|
# error: Cannot determine type of 'notna'
|
|
@doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type]
|
|
def notnull(self) -> Series:
|
|
"""
|
|
Series.notnull is an alias for Series.notna.
|
|
"""
|
|
return super().notnull()
|
|
|
|
@overload
|
|
def dropna(
|
|
self, *, axis: Axis = ..., inplace: Literal[False] = ..., how: str | None = ...
|
|
) -> Series:
|
|
...
|
|
|
|
@overload
|
|
def dropna(
|
|
self, *, axis: Axis = ..., inplace: Literal[True], how: str | None = ...
|
|
) -> None:
|
|
...
|
|
|
|
@deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
|
|
def dropna(
|
|
self, axis: Axis = 0, inplace: bool = False, how: str | None = None
|
|
) -> Series | None:
|
|
"""
|
|
Return a new Series with missing values removed.
|
|
|
|
See the :ref:`User Guide <missing_data>` for more on which values are
|
|
considered missing, and how to work with missing data.
|
|
|
|
Parameters
|
|
----------
|
|
axis : {0 or 'index'}
|
|
Unused. Parameter needed for compatibility with DataFrame.
|
|
inplace : bool, default False
|
|
If True, do operation inplace and return None.
|
|
how : str, optional
|
|
Not in use. Kept for compatibility.
|
|
|
|
Returns
|
|
-------
|
|
Series or None
|
|
Series with NA entries dropped from it or None if ``inplace=True``.
|
|
|
|
See Also
|
|
--------
|
|
Series.isna: Indicate missing values.
|
|
Series.notna : Indicate existing (non-missing) values.
|
|
Series.fillna : Replace missing values.
|
|
DataFrame.dropna : Drop rows or columns which contain NA values.
|
|
Index.dropna : Drop missing indices.
|
|
|
|
Examples
|
|
--------
|
|
>>> ser = pd.Series([1., 2., np.nan])
|
|
>>> ser
|
|
0 1.0
|
|
1 2.0
|
|
2 NaN
|
|
dtype: float64
|
|
|
|
Drop NA values from a Series.
|
|
|
|
>>> ser.dropna()
|
|
0 1.0
|
|
1 2.0
|
|
dtype: float64
|
|
|
|
Keep the Series with valid entries in the same variable.
|
|
|
|
>>> ser.dropna(inplace=True)
|
|
>>> ser
|
|
0 1.0
|
|
1 2.0
|
|
dtype: float64
|
|
|
|
Empty strings are not considered NA values. ``None`` is considered an
|
|
NA value.
|
|
|
|
>>> ser = pd.Series([np.NaN, 2, pd.NaT, '', None, 'I stay'])
|
|
>>> ser
|
|
0 NaN
|
|
1 2
|
|
2 NaT
|
|
3
|
|
4 None
|
|
5 I stay
|
|
dtype: object
|
|
>>> ser.dropna()
|
|
1 2
|
|
3
|
|
5 I stay
|
|
dtype: object
|
|
"""
|
|
inplace = validate_bool_kwarg(inplace, "inplace")
|
|
# Validate the axis parameter
|
|
self._get_axis_number(axis or 0)
|
|
|
|
if self._can_hold_na:
|
|
result = remove_na_arraylike(self)
|
|
if inplace:
|
|
self._update_inplace(result)
|
|
else:
|
|
return result
|
|
else:
|
|
if not inplace:
|
|
return self.copy()
|
|
return None
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Time series-oriented methods
|
|
|
|
# error: Cannot determine type of 'asfreq'
|
|
@doc(NDFrame.asfreq, **_shared_doc_kwargs) # type: ignore[has-type]
|
|
def asfreq(
|
|
self,
|
|
freq: Frequency,
|
|
method: FillnaOptions | None = None,
|
|
how: str | None = None,
|
|
normalize: bool = False,
|
|
fill_value: Hashable = None,
|
|
) -> Series:
|
|
return super().asfreq(
|
|
freq=freq,
|
|
method=method,
|
|
how=how,
|
|
normalize=normalize,
|
|
fill_value=fill_value,
|
|
)
|
|
|
|
# error: Cannot determine type of 'resample'
|
|
@doc(NDFrame.resample, **_shared_doc_kwargs) # type: ignore[has-type]
|
|
def resample(
|
|
self,
|
|
rule,
|
|
axis: Axis = 0,
|
|
closed: str | None = None,
|
|
label: str | None = None,
|
|
convention: str = "start",
|
|
kind: str | None = None,
|
|
loffset=None,
|
|
base: int | None = None,
|
|
on: Level = None,
|
|
level: Level = None,
|
|
origin: str | TimestampConvertibleTypes = "start_day",
|
|
offset: TimedeltaConvertibleTypes | None = None,
|
|
group_keys: bool | lib.NoDefault = no_default,
|
|
) -> Resampler:
|
|
return super().resample(
|
|
rule=rule,
|
|
axis=axis,
|
|
closed=closed,
|
|
label=label,
|
|
convention=convention,
|
|
kind=kind,
|
|
loffset=loffset,
|
|
base=base,
|
|
on=on,
|
|
level=level,
|
|
origin=origin,
|
|
offset=offset,
|
|
group_keys=group_keys,
|
|
)
|
|
|
|
def to_timestamp(
|
|
self,
|
|
freq=None,
|
|
how: Literal["s", "e", "start", "end"] = "start",
|
|
copy: bool = True,
|
|
) -> Series:
|
|
"""
|
|
Cast to DatetimeIndex of Timestamps, at *beginning* of period.
|
|
|
|
Parameters
|
|
----------
|
|
freq : str, default frequency of PeriodIndex
|
|
Desired frequency.
|
|
how : {'s', 'e', 'start', 'end'}
|
|
Convention for converting period to timestamp; start of period
|
|
vs. end.
|
|
copy : bool, default True
|
|
Whether or not to return a copy.
|
|
|
|
Returns
|
|
-------
|
|
Series with DatetimeIndex
|
|
"""
|
|
new_values = self._values
|
|
if copy:
|
|
new_values = new_values.copy()
|
|
|
|
if not isinstance(self.index, PeriodIndex):
|
|
raise TypeError(f"unsupported Type {type(self.index).__name__}")
|
|
new_index = self.index.to_timestamp(freq=freq, how=how)
|
|
return self._constructor(new_values, index=new_index).__finalize__(
|
|
self, method="to_timestamp"
|
|
)
|
|
|
|
def to_period(self, freq: str | None = None, copy: bool = True) -> Series:
|
|
"""
|
|
Convert Series from DatetimeIndex to PeriodIndex.
|
|
|
|
Parameters
|
|
----------
|
|
freq : str, default None
|
|
Frequency associated with the PeriodIndex.
|
|
copy : bool, default True
|
|
Whether or not to return a copy.
|
|
|
|
Returns
|
|
-------
|
|
Series
|
|
Series with index converted to PeriodIndex.
|
|
"""
|
|
new_values = self._values
|
|
if copy:
|
|
new_values = new_values.copy()
|
|
|
|
if not isinstance(self.index, DatetimeIndex):
|
|
raise TypeError(f"unsupported Type {type(self.index).__name__}")
|
|
new_index = self.index.to_period(freq=freq)
|
|
return self._constructor(new_values, index=new_index).__finalize__(
|
|
self, method="to_period"
|
|
)
|
|
|
|
@overload
|
|
def ffill(
|
|
self,
|
|
*,
|
|
axis: None | Axis = ...,
|
|
inplace: Literal[False] = ...,
|
|
limit: None | int = ...,
|
|
downcast: dict | None = ...,
|
|
) -> Series:
|
|
...
|
|
|
|
@overload
|
|
def ffill(
|
|
self,
|
|
*,
|
|
axis: None | Axis = ...,
|
|
inplace: Literal[True],
|
|
limit: None | int = ...,
|
|
downcast: dict | None = ...,
|
|
) -> None:
|
|
...
|
|
|
|
@overload
|
|
def ffill(
|
|
self,
|
|
*,
|
|
axis: None | Axis = ...,
|
|
inplace: bool = ...,
|
|
limit: None | int = ...,
|
|
downcast: dict | None = ...,
|
|
) -> Series | None:
|
|
...
|
|
|
|
# error: Signature of "ffill" incompatible with supertype "NDFrame"
|
|
@deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
|
|
def ffill( # type: ignore[override]
|
|
self,
|
|
axis: None | Axis = None,
|
|
inplace: bool = False,
|
|
limit: None | int = None,
|
|
downcast: dict | None = None,
|
|
) -> Series | None:
|
|
return super().ffill(axis=axis, inplace=inplace, limit=limit, downcast=downcast)
|
|
|
|
@overload
|
|
def bfill(
|
|
self,
|
|
*,
|
|
axis: None | Axis = ...,
|
|
inplace: Literal[False] = ...,
|
|
limit: None | int = ...,
|
|
downcast: dict | None = ...,
|
|
) -> Series:
|
|
...
|
|
|
|
@overload
|
|
def bfill(
|
|
self,
|
|
*,
|
|
axis: None | Axis = ...,
|
|
inplace: Literal[True],
|
|
limit: None | int = ...,
|
|
downcast: dict | None = ...,
|
|
) -> None:
|
|
...
|
|
|
|
@overload
|
|
def bfill(
|
|
self,
|
|
*,
|
|
axis: None | Axis = ...,
|
|
inplace: bool = ...,
|
|
limit: None | int = ...,
|
|
downcast: dict | None = ...,
|
|
) -> Series | None:
|
|
...
|
|
|
|
# error: Signature of "bfill" incompatible with supertype "NDFrame"
|
|
@deprecate_nonkeyword_arguments(version=None, allowed_args=["self"])
|
|
def bfill( # type: ignore[override]
|
|
self,
|
|
axis: None | Axis = None,
|
|
inplace: bool = False,
|
|
limit: None | int = None,
|
|
downcast: dict | None = None,
|
|
) -> Series | None:
|
|
return super().bfill(axis=axis, inplace=inplace, limit=limit, downcast=downcast)
|
|
|
|
@deprecate_nonkeyword_arguments(
|
|
version=None, allowed_args=["self", "lower", "upper"]
|
|
)
|
|
def clip(
|
|
self: Series,
|
|
lower=None,
|
|
upper=None,
|
|
axis: Axis | None = None,
|
|
inplace: bool = False,
|
|
*args,
|
|
**kwargs,
|
|
) -> Series | None:
|
|
return super().clip(lower, upper, axis, inplace, *args, **kwargs)
|
|
|
|
@deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "method"])
|
|
def interpolate(
|
|
self: Series,
|
|
method: str = "linear",
|
|
axis: Axis = 0,
|
|
limit: int | None = None,
|
|
inplace: bool = False,
|
|
limit_direction: str | None = None,
|
|
limit_area: str | None = None,
|
|
downcast: str | None = None,
|
|
**kwargs,
|
|
) -> Series | None:
|
|
return super().interpolate(
|
|
method,
|
|
axis,
|
|
limit,
|
|
inplace,
|
|
limit_direction,
|
|
limit_area,
|
|
downcast,
|
|
**kwargs,
|
|
)
|
|
|
|
@overload
|
|
def where(
|
|
self,
|
|
cond,
|
|
other=...,
|
|
*,
|
|
inplace: Literal[False] = ...,
|
|
axis: Axis | None = ...,
|
|
level: Level = ...,
|
|
errors: IgnoreRaise | lib.NoDefault = ...,
|
|
try_cast: bool | lib.NoDefault = ...,
|
|
) -> Series:
|
|
...
|
|
|
|
@overload
|
|
def where(
|
|
self,
|
|
cond,
|
|
other=...,
|
|
*,
|
|
inplace: Literal[True],
|
|
axis: Axis | None = ...,
|
|
level: Level = ...,
|
|
errors: IgnoreRaise | lib.NoDefault = ...,
|
|
try_cast: bool | lib.NoDefault = ...,
|
|
) -> None:
|
|
...
|
|
|
|
@overload
|
|
def where(
|
|
self,
|
|
cond,
|
|
other=...,
|
|
*,
|
|
inplace: bool = ...,
|
|
axis: Axis | None = ...,
|
|
level: Level = ...,
|
|
errors: IgnoreRaise | lib.NoDefault = ...,
|
|
try_cast: bool | lib.NoDefault = ...,
|
|
) -> Series | None:
|
|
...
|
|
|
|
# error: Signature of "where" incompatible with supertype "NDFrame"
|
|
@deprecate_kwarg(old_arg_name="errors", new_arg_name=None)
|
|
@deprecate_nonkeyword_arguments(
|
|
version=None, allowed_args=["self", "cond", "other"]
|
|
)
|
|
def where( # type: ignore[override]
|
|
self,
|
|
cond,
|
|
other=lib.no_default,
|
|
inplace: bool = False,
|
|
axis: Axis | None = None,
|
|
level: Level = None,
|
|
errors: IgnoreRaise | lib.NoDefault = lib.no_default,
|
|
try_cast: bool | lib.NoDefault = lib.no_default,
|
|
) -> Series | None:
|
|
return super().where(
|
|
cond,
|
|
other,
|
|
inplace=inplace,
|
|
axis=axis,
|
|
level=level,
|
|
try_cast=try_cast,
|
|
)
|
|
|
|
@overload
|
|
def mask(
|
|
self,
|
|
cond,
|
|
other=...,
|
|
*,
|
|
inplace: Literal[False] = ...,
|
|
axis: Axis | None = ...,
|
|
level: Level = ...,
|
|
errors: IgnoreRaise | lib.NoDefault = ...,
|
|
try_cast: bool | lib.NoDefault = ...,
|
|
) -> Series:
|
|
...
|
|
|
|
@overload
|
|
def mask(
|
|
self,
|
|
cond,
|
|
other=...,
|
|
*,
|
|
inplace: Literal[True],
|
|
axis: Axis | None = ...,
|
|
level: Level = ...,
|
|
errors: IgnoreRaise | lib.NoDefault = ...,
|
|
try_cast: bool | lib.NoDefault = ...,
|
|
) -> None:
|
|
...
|
|
|
|
@overload
|
|
def mask(
|
|
self,
|
|
cond,
|
|
other=...,
|
|
*,
|
|
inplace: bool = ...,
|
|
axis: Axis | None = ...,
|
|
level: Level = ...,
|
|
errors: IgnoreRaise | lib.NoDefault = ...,
|
|
try_cast: bool | lib.NoDefault = ...,
|
|
) -> Series | None:
|
|
...
|
|
|
|
# error: Signature of "mask" incompatible with supertype "NDFrame"
|
|
@deprecate_kwarg(old_arg_name="errors", new_arg_name=None)
|
|
@deprecate_nonkeyword_arguments(
|
|
version=None, allowed_args=["self", "cond", "other"]
|
|
)
|
|
def mask( # type: ignore[override]
|
|
self,
|
|
cond,
|
|
other=np.nan,
|
|
inplace: bool = False,
|
|
axis: Axis | None = None,
|
|
level: Level = None,
|
|
errors: IgnoreRaise | lib.NoDefault = lib.no_default,
|
|
try_cast: bool | lib.NoDefault = lib.no_default,
|
|
) -> Series | None:
|
|
return super().mask(
|
|
cond,
|
|
other,
|
|
inplace=inplace,
|
|
axis=axis,
|
|
level=level,
|
|
try_cast=try_cast,
|
|
)
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Add index
|
|
_AXIS_ORDERS = ["index"]
|
|
_AXIS_LEN = len(_AXIS_ORDERS)
|
|
_info_axis_number = 0
|
|
_info_axis_name = "index"
|
|
|
|
index = properties.AxisProperty(
|
|
axis=0, doc="The index (axis labels) of the Series."
|
|
)
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Accessor Methods
|
|
# ----------------------------------------------------------------------
|
|
str = CachedAccessor("str", StringMethods)
|
|
dt = CachedAccessor("dt", CombinedDatetimelikeProperties)
|
|
cat = CachedAccessor("cat", CategoricalAccessor)
|
|
plot = CachedAccessor("plot", pandas.plotting.PlotAccessor)
|
|
sparse = CachedAccessor("sparse", SparseAccessor)
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Add plotting methods to Series
|
|
hist = pandas.plotting.hist_series
|
|
|
|
# ----------------------------------------------------------------------
|
|
# Template-Based Arithmetic/Comparison Methods
|
|
|
|
def _cmp_method(self, other, op):
|
|
res_name = ops.get_op_result_name(self, other)
|
|
|
|
if isinstance(other, Series) and not self._indexed_same(other):
|
|
raise ValueError("Can only compare identically-labeled Series objects")
|
|
|
|
lvalues = self._values
|
|
rvalues = extract_array(other, extract_numpy=True, extract_range=True)
|
|
|
|
with np.errstate(all="ignore"):
|
|
res_values = ops.comparison_op(lvalues, rvalues, op)
|
|
|
|
return self._construct_result(res_values, name=res_name)
|
|
|
|
def _logical_method(self, other, op):
|
|
res_name = ops.get_op_result_name(self, other)
|
|
self, other = ops.align_method_SERIES(self, other, align_asobject=True)
|
|
|
|
lvalues = self._values
|
|
rvalues = extract_array(other, extract_numpy=True, extract_range=True)
|
|
|
|
res_values = ops.logical_op(lvalues, rvalues, op)
|
|
return self._construct_result(res_values, name=res_name)
|
|
|
|
def _arith_method(self, other, op):
|
|
self, other = ops.align_method_SERIES(self, other)
|
|
return base.IndexOpsMixin._arith_method(self, other, op)
|
|
|
|
|
|
Series._add_numeric_operations()
|
|
|
|
# Add arithmetic!
|
|
ops.add_flex_arithmetic_methods(Series)
|