285 lines
8.5 KiB
Python
285 lines
8.5 KiB
Python
|
import numpy as np
|
||
|
import pytest
|
||
|
|
||
|
from pandas import (
|
||
|
DataFrame,
|
||
|
NaT,
|
||
|
date_range,
|
||
|
)
|
||
|
import pandas._testing as tm
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def float_frame_with_na():
|
||
|
"""
|
||
|
Fixture for DataFrame of floats with index of unique strings
|
||
|
|
||
|
Columns are ['A', 'B', 'C', 'D']; some entries are missing
|
||
|
|
||
|
A B C D
|
||
|
ABwBzA0ljw -1.128865 -0.897161 0.046603 0.274997
|
||
|
DJiRzmbyQF 0.728869 0.233502 0.722431 -0.890872
|
||
|
neMgPD5UBF 0.486072 -1.027393 -0.031553 1.449522
|
||
|
0yWA4n8VeX -1.937191 -1.142531 0.805215 -0.462018
|
||
|
3slYUbbqU1 0.153260 1.164691 1.489795 -0.545826
|
||
|
soujjZ0A08 NaN NaN NaN NaN
|
||
|
7W6NLGsjB9 NaN NaN NaN NaN
|
||
|
... ... ... ... ...
|
||
|
uhfeaNkCR1 -0.231210 -0.340472 0.244717 -0.901590
|
||
|
n6p7GYuBIV -0.419052 1.922721 -0.125361 -0.727717
|
||
|
ZhzAeY6p1y 1.234374 -1.425359 -0.827038 -0.633189
|
||
|
uWdPsORyUh 0.046738 -0.980445 -1.102965 0.605503
|
||
|
3DJA6aN590 -0.091018 -1.684734 -1.100900 0.215947
|
||
|
2GBPAzdbMk -2.883405 -1.021071 1.209877 1.633083
|
||
|
sHadBoyVHw -2.223032 -0.326384 0.258931 0.245517
|
||
|
|
||
|
[30 rows x 4 columns]
|
||
|
"""
|
||
|
df = DataFrame(tm.getSeriesData())
|
||
|
# set some NAs
|
||
|
df.iloc[5:10] = np.nan
|
||
|
df.iloc[15:20, -2:] = np.nan
|
||
|
return df
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def bool_frame_with_na():
|
||
|
"""
|
||
|
Fixture for DataFrame of booleans with index of unique strings
|
||
|
|
||
|
Columns are ['A', 'B', 'C', 'D']; some entries are missing
|
||
|
|
||
|
A B C D
|
||
|
zBZxY2IDGd False False False False
|
||
|
IhBWBMWllt False True True True
|
||
|
ctjdvZSR6R True False True True
|
||
|
AVTujptmxb False True False True
|
||
|
G9lrImrSWq False False False True
|
||
|
sFFwdIUfz2 NaN NaN NaN NaN
|
||
|
s15ptEJnRb NaN NaN NaN NaN
|
||
|
... ... ... ... ...
|
||
|
UW41KkDyZ4 True True False False
|
||
|
l9l6XkOdqV True False False False
|
||
|
X2MeZfzDYA False True False False
|
||
|
xWkIKU7vfX False True False True
|
||
|
QOhL6VmpGU False False False True
|
||
|
22PwkRJdat False True False False
|
||
|
kfboQ3VeIK True False True False
|
||
|
|
||
|
[30 rows x 4 columns]
|
||
|
"""
|
||
|
df = DataFrame(tm.getSeriesData()) > 0
|
||
|
df = df.astype(object)
|
||
|
# set some NAs
|
||
|
df.iloc[5:10] = np.nan
|
||
|
df.iloc[15:20, -2:] = np.nan
|
||
|
|
||
|
# For `any` tests we need to have at least one True before the first NaN
|
||
|
# in each column
|
||
|
for i in range(4):
|
||
|
df.iloc[i, i] = True
|
||
|
return df
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def float_string_frame():
|
||
|
"""
|
||
|
Fixture for DataFrame of floats and strings with index of unique strings
|
||
|
|
||
|
Columns are ['A', 'B', 'C', 'D', 'foo'].
|
||
|
|
||
|
A B C D foo
|
||
|
w3orJvq07g -1.594062 -1.084273 -1.252457 0.356460 bar
|
||
|
PeukuVdmz2 0.109855 -0.955086 -0.809485 0.409747 bar
|
||
|
ahp2KvwiM8 -1.533729 -0.142519 -0.154666 1.302623 bar
|
||
|
3WSJ7BUCGd 2.484964 0.213829 0.034778 -2.327831 bar
|
||
|
khdAmufk0U -0.193480 -0.743518 -0.077987 0.153646 bar
|
||
|
LE2DZiFlrE -0.193566 -1.343194 -0.107321 0.959978 bar
|
||
|
HJXSJhVn7b 0.142590 1.257603 -0.659409 -0.223844 bar
|
||
|
... ... ... ... ... ...
|
||
|
9a1Vypttgw -1.316394 1.601354 0.173596 1.213196 bar
|
||
|
h5d1gVFbEy 0.609475 1.106738 -0.155271 0.294630 bar
|
||
|
mK9LsTQG92 1.303613 0.857040 -1.019153 0.369468 bar
|
||
|
oOLksd9gKH 0.558219 -0.134491 -0.289869 -0.951033 bar
|
||
|
9jgoOjKyHg 0.058270 -0.496110 -0.413212 -0.852659 bar
|
||
|
jZLDHclHAO 0.096298 1.267510 0.549206 -0.005235 bar
|
||
|
lR0nxDp1C2 -2.119350 -0.794384 0.544118 0.145849 bar
|
||
|
|
||
|
[30 rows x 5 columns]
|
||
|
"""
|
||
|
df = DataFrame(tm.getSeriesData())
|
||
|
df["foo"] = "bar"
|
||
|
return df
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def mixed_float_frame():
|
||
|
"""
|
||
|
Fixture for DataFrame of different float types with index of unique strings
|
||
|
|
||
|
Columns are ['A', 'B', 'C', 'D'].
|
||
|
|
||
|
A B C D
|
||
|
GI7bbDaEZe -0.237908 -0.246225 -0.468506 0.752993
|
||
|
KGp9mFepzA -1.140809 -0.644046 -1.225586 0.801588
|
||
|
VeVYLAb1l2 -1.154013 -1.677615 0.690430 -0.003731
|
||
|
kmPME4WKhO 0.979578 0.998274 -0.776367 0.897607
|
||
|
CPyopdXTiz 0.048119 -0.257174 0.836426 0.111266
|
||
|
0kJZQndAj0 0.274357 -0.281135 -0.344238 0.834541
|
||
|
tqdwQsaHG8 -0.979716 -0.519897 0.582031 0.144710
|
||
|
... ... ... ... ...
|
||
|
7FhZTWILQj -2.906357 1.261039 -0.780273 -0.537237
|
||
|
4pUDPM4eGq -2.042512 -0.464382 -0.382080 1.132612
|
||
|
B8dUgUzwTi -1.506637 -0.364435 1.087891 0.297653
|
||
|
hErlVYjVv9 1.477453 -0.495515 -0.713867 1.438427
|
||
|
1BKN3o7YLs 0.127535 -0.349812 -0.881836 0.489827
|
||
|
9S4Ekn7zga 1.445518 -2.095149 0.031982 0.373204
|
||
|
xN1dNn6OV6 1.425017 -0.983995 -0.363281 -0.224502
|
||
|
|
||
|
[30 rows x 4 columns]
|
||
|
"""
|
||
|
df = DataFrame(tm.getSeriesData())
|
||
|
df.A = df.A.astype("float32")
|
||
|
df.B = df.B.astype("float32")
|
||
|
df.C = df.C.astype("float16")
|
||
|
df.D = df.D.astype("float64")
|
||
|
return df
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def mixed_int_frame():
|
||
|
"""
|
||
|
Fixture for DataFrame of different int types with index of unique strings
|
||
|
|
||
|
Columns are ['A', 'B', 'C', 'D'].
|
||
|
|
||
|
A B C D
|
||
|
mUrCZ67juP 0 1 2 2
|
||
|
rw99ACYaKS 0 1 0 0
|
||
|
7QsEcpaaVU 0 1 1 1
|
||
|
xkrimI2pcE 0 1 0 0
|
||
|
dz01SuzoS8 0 1 255 255
|
||
|
ccQkqOHX75 -1 1 0 0
|
||
|
DN0iXaoDLd 0 1 0 0
|
||
|
... .. .. ... ...
|
||
|
Dfb141wAaQ 1 1 254 254
|
||
|
IPD8eQOVu5 0 1 0 0
|
||
|
CcaKulsCmv 0 1 0 0
|
||
|
rIBa8gu7E5 0 1 0 0
|
||
|
RP6peZmh5o 0 1 1 1
|
||
|
NMb9pipQWQ 0 1 0 0
|
||
|
PqgbJEzjib 0 1 3 3
|
||
|
|
||
|
[30 rows x 4 columns]
|
||
|
"""
|
||
|
df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()})
|
||
|
df.A = df.A.astype("int32")
|
||
|
df.B = np.ones(len(df.B), dtype="uint64")
|
||
|
df.C = df.C.astype("uint8")
|
||
|
df.D = df.C.astype("int64")
|
||
|
return df
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def timezone_frame():
|
||
|
"""
|
||
|
Fixture for DataFrame of date_range Series with different time zones
|
||
|
|
||
|
Columns are ['A', 'B', 'C']; some entries are missing
|
||
|
|
||
|
A B C
|
||
|
0 2013-01-01 2013-01-01 00:00:00-05:00 2013-01-01 00:00:00+01:00
|
||
|
1 2013-01-02 NaT NaT
|
||
|
2 2013-01-03 2013-01-03 00:00:00-05:00 2013-01-03 00:00:00+01:00
|
||
|
"""
|
||
|
df = DataFrame(
|
||
|
{
|
||
|
"A": date_range("20130101", periods=3),
|
||
|
"B": date_range("20130101", periods=3, tz="US/Eastern"),
|
||
|
"C": date_range("20130101", periods=3, tz="CET"),
|
||
|
}
|
||
|
)
|
||
|
df.iloc[1, 1] = NaT
|
||
|
df.iloc[1, 2] = NaT
|
||
|
return df
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def uint64_frame():
|
||
|
"""
|
||
|
Fixture for DataFrame with uint64 values
|
||
|
|
||
|
Columns are ['A', 'B']
|
||
|
"""
|
||
|
return DataFrame(
|
||
|
{"A": np.arange(3), "B": [2**63, 2**63 + 5, 2**63 + 10]}, dtype=np.uint64
|
||
|
)
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def simple_frame():
|
||
|
"""
|
||
|
Fixture for simple 3x3 DataFrame
|
||
|
|
||
|
Columns are ['one', 'two', 'three'], index is ['a', 'b', 'c'].
|
||
|
|
||
|
one two three
|
||
|
a 1.0 2.0 3.0
|
||
|
b 4.0 5.0 6.0
|
||
|
c 7.0 8.0 9.0
|
||
|
"""
|
||
|
arr = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
|
||
|
|
||
|
return DataFrame(arr, columns=["one", "two", "three"], index=["a", "b", "c"])
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def frame_of_index_cols():
|
||
|
"""
|
||
|
Fixture for DataFrame of columns that can be used for indexing
|
||
|
|
||
|
Columns are ['A', 'B', 'C', 'D', 'E', ('tuple', 'as', 'label')];
|
||
|
'A' & 'B' contain duplicates (but are jointly unique), the rest are unique.
|
||
|
|
||
|
A B C D E (tuple, as, label)
|
||
|
0 foo one a 0.608477 -0.012500 -1.664297
|
||
|
1 foo two b -0.633460 0.249614 -0.364411
|
||
|
2 foo three c 0.615256 2.154968 -0.834666
|
||
|
3 bar one d 0.234246 1.085675 0.718445
|
||
|
4 bar two e 0.533841 -0.005702 -3.533912
|
||
|
"""
|
||
|
df = DataFrame(
|
||
|
{
|
||
|
"A": ["foo", "foo", "foo", "bar", "bar"],
|
||
|
"B": ["one", "two", "three", "one", "two"],
|
||
|
"C": ["a", "b", "c", "d", "e"],
|
||
|
"D": np.random.randn(5),
|
||
|
"E": np.random.randn(5),
|
||
|
("tuple", "as", "label"): np.random.randn(5),
|
||
|
}
|
||
|
)
|
||
|
return df
|
||
|
|
||
|
|
||
|
@pytest.fixture(
|
||
|
params=[
|
||
|
"any",
|
||
|
"all",
|
||
|
"count",
|
||
|
"sum",
|
||
|
"prod",
|
||
|
"max",
|
||
|
"min",
|
||
|
"mean",
|
||
|
"median",
|
||
|
"skew",
|
||
|
"kurt",
|
||
|
"sem",
|
||
|
"var",
|
||
|
"std",
|
||
|
"mad",
|
||
|
]
|
||
|
)
|
||
|
def reduction_functions(request):
|
||
|
return request.param
|