aoc-2022/venv/Lib/site-packages/pandas/_libs/arrays.pyx

184 lines
5.7 KiB
Cython
Raw Normal View History

"""
Cython implementations for internal ExtensionArrays.
"""
cimport cython
import numpy as np
cimport numpy as cnp
from cpython cimport PyErr_Clear
from numpy cimport ndarray
cnp.import_array()
@cython.freelist(16)
cdef class NDArrayBacked:
"""
Implementing these methods in cython improves performance quite a bit.
import pandas as pd
from pandas._libs.arrays import NDArrayBacked as cls
dti = pd.date_range("2016-01-01", periods=3)
dta = dti._data
arr = dta._ndarray
obj = cls._simple_new(arr, arr.dtype)
# for foo in [arr, dta, obj]: ...
%timeit foo.copy()
299 ns ± 30 ns per loop # <-- arr underlying ndarray (for reference)
530 ns ± 9.24 ns per loop # <-- dta with cython NDArrayBacked
1.66 µs ± 46.3 ns per loop # <-- dta without cython NDArrayBacked
328 ns ± 5.29 ns per loop # <-- obj with NDArrayBacked.__cinit__
371 ns ± 6.97 ns per loop # <-- obj with NDArrayBacked._simple_new
%timeit foo.T
125 ns ± 6.27 ns per loop # <-- arr underlying ndarray (for reference)
226 ns ± 7.66 ns per loop # <-- dta with cython NDArrayBacked
911 ns ± 16.6 ns per loop # <-- dta without cython NDArrayBacked
215 ns ± 4.54 ns per loop # <-- obj with NDArrayBacked._simple_new
"""
# TODO: implement take in terms of cnp.PyArray_TakeFrom
# TODO: implement concat_same_type in terms of cnp.PyArray_Concatenate
# cdef:
# readonly ndarray _ndarray
# readonly object _dtype
def __init__(self, ndarray values, object dtype):
self._ndarray = values
self._dtype = dtype
@classmethod
def _simple_new(cls, ndarray values, object dtype):
cdef:
NDArrayBacked obj
obj = NDArrayBacked.__new__(cls)
obj._ndarray = values
obj._dtype = dtype
return obj
cpdef NDArrayBacked _from_backing_data(self, ndarray values):
"""
Construct a new ExtensionArray `new_array` with `arr` as its _ndarray.
This should round-trip:
self == self._from_backing_data(self._ndarray)
"""
# TODO: re-reuse simple_new if/when it can be cpdef
cdef:
NDArrayBacked obj
obj = NDArrayBacked.__new__(type(self))
obj._ndarray = values
obj._dtype = self._dtype
return obj
cpdef __setstate__(self, state):
if isinstance(state, dict):
if "_data" in state:
data = state.pop("_data")
elif "_ndarray" in state:
data = state.pop("_ndarray")
else:
raise ValueError # pragma: no cover
self._ndarray = data
self._dtype = state.pop("_dtype")
for key, val in state.items():
setattr(self, key, val)
elif isinstance(state, tuple):
if len(state) != 3:
if len(state) == 1 and isinstance(state[0], dict):
self.__setstate__(state[0])
return
raise NotImplementedError(state) # pragma: no cover
data, dtype = state[:2]
if isinstance(dtype, np.ndarray):
dtype, data = data, dtype
self._ndarray = data
self._dtype = dtype
if isinstance(state[2], dict):
for key, val in state[2].items():
setattr(self, key, val)
else:
raise NotImplementedError(state) # pragma: no cover
else:
raise NotImplementedError(state) # pragma: no cover
def __len__(self) -> int:
return len(self._ndarray)
@property
def shape(self):
# object cast bc _ndarray.shape is npy_intp*
return (<object>(self._ndarray)).shape
@property
def ndim(self) -> int:
return self._ndarray.ndim
@property
def size(self) -> int:
return self._ndarray.size
@property
def nbytes(self) -> int:
return self._ndarray.nbytes
def copy(self, order="C"):
cdef:
cnp.NPY_ORDER order_code
int success
success = cnp.PyArray_OrderConverter(order, &order_code)
if not success:
# clear exception so that we don't get a SystemError
PyErr_Clear()
# same message used by numpy
msg = f"order must be one of 'C', 'F', 'A', or 'K' (got '{order}')"
raise ValueError(msg)
res_values = cnp.PyArray_NewCopy(self._ndarray, order_code)
return self._from_backing_data(res_values)
def delete(self, loc, axis=0):
res_values = np.delete(self._ndarray, loc, axis=axis)
return self._from_backing_data(res_values)
def swapaxes(self, axis1, axis2):
res_values = cnp.PyArray_SwapAxes(self._ndarray, axis1, axis2)
return self._from_backing_data(res_values)
# TODO: pass NPY_MAXDIMS equiv to axis=None?
def repeat(self, repeats, axis: int | np.integer = 0):
if axis is None:
axis = 0
res_values = cnp.PyArray_Repeat(self._ndarray, repeats, <int>axis)
return self._from_backing_data(res_values)
def reshape(self, *args, **kwargs):
res_values = self._ndarray.reshape(*args, **kwargs)
return self._from_backing_data(res_values)
def ravel(self, order="C"):
# cnp.PyArray_OrderConverter(PyObject* obj, NPY_ORDER* order)
# res_values = cnp.PyArray_Ravel(self._ndarray, order)
res_values = self._ndarray.ravel(order)
return self._from_backing_data(res_values)
@property
def T(self):
res_values = self._ndarray.T
return self._from_backing_data(res_values)
def transpose(self, *axes):
res_values = self._ndarray.transpose(*axes)
return self._from_backing_data(res_values)