184 lines
5.7 KiB
Cython
184 lines
5.7 KiB
Cython
|
"""
|
||
|
Cython implementations for internal ExtensionArrays.
|
||
|
"""
|
||
|
cimport cython
|
||
|
|
||
|
import numpy as np
|
||
|
|
||
|
cimport numpy as cnp
|
||
|
from cpython cimport PyErr_Clear
|
||
|
from numpy cimport ndarray
|
||
|
|
||
|
cnp.import_array()
|
||
|
|
||
|
|
||
|
@cython.freelist(16)
|
||
|
cdef class NDArrayBacked:
|
||
|
"""
|
||
|
Implementing these methods in cython improves performance quite a bit.
|
||
|
|
||
|
import pandas as pd
|
||
|
|
||
|
from pandas._libs.arrays import NDArrayBacked as cls
|
||
|
|
||
|
dti = pd.date_range("2016-01-01", periods=3)
|
||
|
dta = dti._data
|
||
|
arr = dta._ndarray
|
||
|
|
||
|
obj = cls._simple_new(arr, arr.dtype)
|
||
|
|
||
|
# for foo in [arr, dta, obj]: ...
|
||
|
|
||
|
%timeit foo.copy()
|
||
|
299 ns ± 30 ns per loop # <-- arr underlying ndarray (for reference)
|
||
|
530 ns ± 9.24 ns per loop # <-- dta with cython NDArrayBacked
|
||
|
1.66 µs ± 46.3 ns per loop # <-- dta without cython NDArrayBacked
|
||
|
328 ns ± 5.29 ns per loop # <-- obj with NDArrayBacked.__cinit__
|
||
|
371 ns ± 6.97 ns per loop # <-- obj with NDArrayBacked._simple_new
|
||
|
|
||
|
%timeit foo.T
|
||
|
125 ns ± 6.27 ns per loop # <-- arr underlying ndarray (for reference)
|
||
|
226 ns ± 7.66 ns per loop # <-- dta with cython NDArrayBacked
|
||
|
911 ns ± 16.6 ns per loop # <-- dta without cython NDArrayBacked
|
||
|
215 ns ± 4.54 ns per loop # <-- obj with NDArrayBacked._simple_new
|
||
|
|
||
|
"""
|
||
|
# TODO: implement take in terms of cnp.PyArray_TakeFrom
|
||
|
# TODO: implement concat_same_type in terms of cnp.PyArray_Concatenate
|
||
|
|
||
|
# cdef:
|
||
|
# readonly ndarray _ndarray
|
||
|
# readonly object _dtype
|
||
|
|
||
|
def __init__(self, ndarray values, object dtype):
|
||
|
self._ndarray = values
|
||
|
self._dtype = dtype
|
||
|
|
||
|
@classmethod
|
||
|
def _simple_new(cls, ndarray values, object dtype):
|
||
|
cdef:
|
||
|
NDArrayBacked obj
|
||
|
obj = NDArrayBacked.__new__(cls)
|
||
|
obj._ndarray = values
|
||
|
obj._dtype = dtype
|
||
|
return obj
|
||
|
|
||
|
cpdef NDArrayBacked _from_backing_data(self, ndarray values):
|
||
|
"""
|
||
|
Construct a new ExtensionArray `new_array` with `arr` as its _ndarray.
|
||
|
|
||
|
This should round-trip:
|
||
|
self == self._from_backing_data(self._ndarray)
|
||
|
"""
|
||
|
# TODO: re-reuse simple_new if/when it can be cpdef
|
||
|
cdef:
|
||
|
NDArrayBacked obj
|
||
|
obj = NDArrayBacked.__new__(type(self))
|
||
|
obj._ndarray = values
|
||
|
obj._dtype = self._dtype
|
||
|
return obj
|
||
|
|
||
|
cpdef __setstate__(self, state):
|
||
|
if isinstance(state, dict):
|
||
|
if "_data" in state:
|
||
|
data = state.pop("_data")
|
||
|
elif "_ndarray" in state:
|
||
|
data = state.pop("_ndarray")
|
||
|
else:
|
||
|
raise ValueError # pragma: no cover
|
||
|
self._ndarray = data
|
||
|
self._dtype = state.pop("_dtype")
|
||
|
|
||
|
for key, val in state.items():
|
||
|
setattr(self, key, val)
|
||
|
elif isinstance(state, tuple):
|
||
|
if len(state) != 3:
|
||
|
if len(state) == 1 and isinstance(state[0], dict):
|
||
|
self.__setstate__(state[0])
|
||
|
return
|
||
|
raise NotImplementedError(state) # pragma: no cover
|
||
|
|
||
|
data, dtype = state[:2]
|
||
|
if isinstance(dtype, np.ndarray):
|
||
|
dtype, data = data, dtype
|
||
|
self._ndarray = data
|
||
|
self._dtype = dtype
|
||
|
|
||
|
if isinstance(state[2], dict):
|
||
|
for key, val in state[2].items():
|
||
|
setattr(self, key, val)
|
||
|
else:
|
||
|
raise NotImplementedError(state) # pragma: no cover
|
||
|
else:
|
||
|
raise NotImplementedError(state) # pragma: no cover
|
||
|
|
||
|
def __len__(self) -> int:
|
||
|
return len(self._ndarray)
|
||
|
|
||
|
@property
|
||
|
def shape(self):
|
||
|
# object cast bc _ndarray.shape is npy_intp*
|
||
|
return (<object>(self._ndarray)).shape
|
||
|
|
||
|
@property
|
||
|
def ndim(self) -> int:
|
||
|
return self._ndarray.ndim
|
||
|
|
||
|
@property
|
||
|
def size(self) -> int:
|
||
|
return self._ndarray.size
|
||
|
|
||
|
@property
|
||
|
def nbytes(self) -> int:
|
||
|
return self._ndarray.nbytes
|
||
|
|
||
|
def copy(self, order="C"):
|
||
|
cdef:
|
||
|
cnp.NPY_ORDER order_code
|
||
|
int success
|
||
|
|
||
|
success = cnp.PyArray_OrderConverter(order, &order_code)
|
||
|
if not success:
|
||
|
# clear exception so that we don't get a SystemError
|
||
|
PyErr_Clear()
|
||
|
# same message used by numpy
|
||
|
msg = f"order must be one of 'C', 'F', 'A', or 'K' (got '{order}')"
|
||
|
raise ValueError(msg)
|
||
|
|
||
|
res_values = cnp.PyArray_NewCopy(self._ndarray, order_code)
|
||
|
return self._from_backing_data(res_values)
|
||
|
|
||
|
def delete(self, loc, axis=0):
|
||
|
res_values = np.delete(self._ndarray, loc, axis=axis)
|
||
|
return self._from_backing_data(res_values)
|
||
|
|
||
|
def swapaxes(self, axis1, axis2):
|
||
|
res_values = cnp.PyArray_SwapAxes(self._ndarray, axis1, axis2)
|
||
|
return self._from_backing_data(res_values)
|
||
|
|
||
|
# TODO: pass NPY_MAXDIMS equiv to axis=None?
|
||
|
def repeat(self, repeats, axis: int | np.integer = 0):
|
||
|
if axis is None:
|
||
|
axis = 0
|
||
|
res_values = cnp.PyArray_Repeat(self._ndarray, repeats, <int>axis)
|
||
|
return self._from_backing_data(res_values)
|
||
|
|
||
|
def reshape(self, *args, **kwargs):
|
||
|
res_values = self._ndarray.reshape(*args, **kwargs)
|
||
|
return self._from_backing_data(res_values)
|
||
|
|
||
|
def ravel(self, order="C"):
|
||
|
# cnp.PyArray_OrderConverter(PyObject* obj, NPY_ORDER* order)
|
||
|
# res_values = cnp.PyArray_Ravel(self._ndarray, order)
|
||
|
res_values = self._ndarray.ravel(order)
|
||
|
return self._from_backing_data(res_values)
|
||
|
|
||
|
@property
|
||
|
def T(self):
|
||
|
res_values = self._ndarray.T
|
||
|
return self._from_backing_data(res_values)
|
||
|
|
||
|
def transpose(self, *axes):
|
||
|
res_values = self._ndarray.transpose(*axes)
|
||
|
return self._from_backing_data(res_values)
|