aoc-2022/venv/Lib/site-packages/pandas/tests/indexes/period/test_setops.py

361 lines
12 KiB
Python
Raw Normal View History

import numpy as np
import pandas as pd
from pandas import (
PeriodIndex,
date_range,
period_range,
)
import pandas._testing as tm
def _permute(obj):
return obj.take(np.random.permutation(len(obj)))
class TestPeriodIndex:
def test_union(self, sort):
# union
other1 = period_range("1/1/2000", freq="D", periods=5)
rng1 = period_range("1/6/2000", freq="D", periods=5)
expected1 = PeriodIndex(
[
"2000-01-06",
"2000-01-07",
"2000-01-08",
"2000-01-09",
"2000-01-10",
"2000-01-01",
"2000-01-02",
"2000-01-03",
"2000-01-04",
"2000-01-05",
],
freq="D",
)
rng2 = period_range("1/1/2000", freq="D", periods=5)
other2 = period_range("1/4/2000", freq="D", periods=5)
expected2 = period_range("1/1/2000", freq="D", periods=8)
rng3 = period_range("1/1/2000", freq="D", periods=5)
other3 = PeriodIndex([], freq="D")
expected3 = period_range("1/1/2000", freq="D", periods=5)
rng4 = period_range("2000-01-01 09:00", freq="H", periods=5)
other4 = period_range("2000-01-02 09:00", freq="H", periods=5)
expected4 = PeriodIndex(
[
"2000-01-01 09:00",
"2000-01-01 10:00",
"2000-01-01 11:00",
"2000-01-01 12:00",
"2000-01-01 13:00",
"2000-01-02 09:00",
"2000-01-02 10:00",
"2000-01-02 11:00",
"2000-01-02 12:00",
"2000-01-02 13:00",
],
freq="H",
)
rng5 = PeriodIndex(
["2000-01-01 09:01", "2000-01-01 09:03", "2000-01-01 09:05"], freq="T"
)
other5 = PeriodIndex(
["2000-01-01 09:01", "2000-01-01 09:05", "2000-01-01 09:08"], freq="T"
)
expected5 = PeriodIndex(
[
"2000-01-01 09:01",
"2000-01-01 09:03",
"2000-01-01 09:05",
"2000-01-01 09:08",
],
freq="T",
)
rng6 = period_range("2000-01-01", freq="M", periods=7)
other6 = period_range("2000-04-01", freq="M", periods=7)
expected6 = period_range("2000-01-01", freq="M", periods=10)
rng7 = period_range("2003-01-01", freq="A", periods=5)
other7 = period_range("1998-01-01", freq="A", periods=8)
expected7 = PeriodIndex(
[
"2003",
"2004",
"2005",
"2006",
"2007",
"1998",
"1999",
"2000",
"2001",
"2002",
],
freq="A",
)
rng8 = PeriodIndex(
["1/3/2000", "1/2/2000", "1/1/2000", "1/5/2000", "1/4/2000"], freq="D"
)
other8 = period_range("1/6/2000", freq="D", periods=5)
expected8 = PeriodIndex(
[
"1/3/2000",
"1/2/2000",
"1/1/2000",
"1/5/2000",
"1/4/2000",
"1/6/2000",
"1/7/2000",
"1/8/2000",
"1/9/2000",
"1/10/2000",
],
freq="D",
)
for rng, other, expected in [
(rng1, other1, expected1),
(rng2, other2, expected2),
(rng3, other3, expected3),
(rng4, other4, expected4),
(rng5, other5, expected5),
(rng6, other6, expected6),
(rng7, other7, expected7),
(rng8, other8, expected8),
]:
result_union = rng.union(other, sort=sort)
if sort is None:
expected = expected.sort_values()
tm.assert_index_equal(result_union, expected)
def test_union_misc(self, sort):
index = period_range("1/1/2000", "1/20/2000", freq="D")
result = index[:-5].union(index[10:], sort=sort)
tm.assert_index_equal(result, index)
# not in order
result = _permute(index[:-5]).union(_permute(index[10:]), sort=sort)
if sort is None:
tm.assert_index_equal(result, index)
assert tm.equalContents(result, index)
# cast if different frequencies
index = period_range("1/1/2000", "1/20/2000", freq="D")
index2 = period_range("1/1/2000", "1/20/2000", freq="W-WED")
result = index.union(index2, sort=sort)
expected = index.astype(object).union(index2.astype(object), sort=sort)
tm.assert_index_equal(result, expected)
def test_intersection(self, sort):
index = period_range("1/1/2000", "1/20/2000", freq="D")
result = index[:-5].intersection(index[10:], sort=sort)
tm.assert_index_equal(result, index[10:-5])
# not in order
left = _permute(index[:-5])
right = _permute(index[10:])
result = left.intersection(right, sort=sort)
if sort is None:
tm.assert_index_equal(result, index[10:-5])
assert tm.equalContents(result, index[10:-5])
# cast if different frequencies
index = period_range("1/1/2000", "1/20/2000", freq="D")
index2 = period_range("1/1/2000", "1/20/2000", freq="W-WED")
result = index.intersection(index2, sort=sort)
expected = pd.Index([], dtype=object)
tm.assert_index_equal(result, expected)
index3 = period_range("1/1/2000", "1/20/2000", freq="2D")
result = index.intersection(index3, sort=sort)
tm.assert_index_equal(result, expected)
def test_intersection_cases(self, sort):
base = period_range("6/1/2000", "6/30/2000", freq="D", name="idx")
# if target has the same name, it is preserved
rng2 = period_range("5/15/2000", "6/20/2000", freq="D", name="idx")
expected2 = period_range("6/1/2000", "6/20/2000", freq="D", name="idx")
# if target name is different, it will be reset
rng3 = period_range("5/15/2000", "6/20/2000", freq="D", name="other")
expected3 = period_range("6/1/2000", "6/20/2000", freq="D", name=None)
rng4 = period_range("7/1/2000", "7/31/2000", freq="D", name="idx")
expected4 = PeriodIndex([], name="idx", freq="D")
for (rng, expected) in [
(rng2, expected2),
(rng3, expected3),
(rng4, expected4),
]:
result = base.intersection(rng, sort=sort)
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == expected.freq
# non-monotonic
base = PeriodIndex(
["2011-01-05", "2011-01-04", "2011-01-02", "2011-01-03"],
freq="D",
name="idx",
)
rng2 = PeriodIndex(
["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"],
freq="D",
name="idx",
)
expected2 = PeriodIndex(["2011-01-04", "2011-01-02"], freq="D", name="idx")
rng3 = PeriodIndex(
["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"],
freq="D",
name="other",
)
expected3 = PeriodIndex(["2011-01-04", "2011-01-02"], freq="D", name=None)
rng4 = period_range("7/1/2000", "7/31/2000", freq="D", name="idx")
expected4 = PeriodIndex([], freq="D", name="idx")
for (rng, expected) in [
(rng2, expected2),
(rng3, expected3),
(rng4, expected4),
]:
result = base.intersection(rng, sort=sort)
if sort is None:
expected = expected.sort_values()
tm.assert_index_equal(result, expected)
assert result.name == expected.name
assert result.freq == "D"
# empty same freq
rng = date_range("6/1/2000", "6/15/2000", freq="T")
result = rng[0:0].intersection(rng)
assert len(result) == 0
result = rng.intersection(rng[0:0])
assert len(result) == 0
def test_difference(self, sort):
# diff
period_rng = ["1/3/2000", "1/2/2000", "1/1/2000", "1/5/2000", "1/4/2000"]
rng1 = PeriodIndex(period_rng, freq="D")
other1 = period_range("1/6/2000", freq="D", periods=5)
expected1 = rng1
rng2 = PeriodIndex(period_rng, freq="D")
other2 = period_range("1/4/2000", freq="D", periods=5)
expected2 = PeriodIndex(["1/3/2000", "1/2/2000", "1/1/2000"], freq="D")
rng3 = PeriodIndex(period_rng, freq="D")
other3 = PeriodIndex([], freq="D")
expected3 = rng3
period_rng = [
"2000-01-01 10:00",
"2000-01-01 09:00",
"2000-01-01 12:00",
"2000-01-01 11:00",
"2000-01-01 13:00",
]
rng4 = PeriodIndex(period_rng, freq="H")
other4 = period_range("2000-01-02 09:00", freq="H", periods=5)
expected4 = rng4
rng5 = PeriodIndex(
["2000-01-01 09:03", "2000-01-01 09:01", "2000-01-01 09:05"], freq="T"
)
other5 = PeriodIndex(["2000-01-01 09:01", "2000-01-01 09:05"], freq="T")
expected5 = PeriodIndex(["2000-01-01 09:03"], freq="T")
period_rng = [
"2000-02-01",
"2000-01-01",
"2000-06-01",
"2000-07-01",
"2000-05-01",
"2000-03-01",
"2000-04-01",
]
rng6 = PeriodIndex(period_rng, freq="M")
other6 = period_range("2000-04-01", freq="M", periods=7)
expected6 = PeriodIndex(["2000-02-01", "2000-01-01", "2000-03-01"], freq="M")
period_rng = ["2003", "2007", "2006", "2005", "2004"]
rng7 = PeriodIndex(period_rng, freq="A")
other7 = period_range("1998-01-01", freq="A", periods=8)
expected7 = PeriodIndex(["2007", "2006"], freq="A")
for rng, other, expected in [
(rng1, other1, expected1),
(rng2, other2, expected2),
(rng3, other3, expected3),
(rng4, other4, expected4),
(rng5, other5, expected5),
(rng6, other6, expected6),
(rng7, other7, expected7),
]:
result_difference = rng.difference(other, sort=sort)
if sort is None and len(other):
# We dont sort (yet?) when empty GH#24959
expected = expected.sort_values()
tm.assert_index_equal(result_difference, expected)
def test_difference_freq(self, sort):
# GH14323: difference of Period MUST preserve frequency
# but the ability to union results must be preserved
index = period_range("20160920", "20160925", freq="D")
other = period_range("20160921", "20160924", freq="D")
expected = PeriodIndex(["20160920", "20160925"], freq="D")
idx_diff = index.difference(other, sort)
tm.assert_index_equal(idx_diff, expected)
tm.assert_attr_equal("freq", idx_diff, expected)
other = period_range("20160922", "20160925", freq="D")
idx_diff = index.difference(other, sort)
expected = PeriodIndex(["20160920", "20160921"], freq="D")
tm.assert_index_equal(idx_diff, expected)
tm.assert_attr_equal("freq", idx_diff, expected)
def test_intersection_equal_duplicates(self):
# GH#38302
idx = period_range("2011-01-01", periods=2)
idx_dup = idx.append(idx)
result = idx_dup.intersection(idx_dup)
tm.assert_index_equal(result, idx)
def test_union_duplicates(self):
# GH#36289
idx = period_range("2011-01-01", periods=2)
idx_dup = idx.append(idx)
idx2 = period_range("2011-01-02", periods=2)
idx2_dup = idx2.append(idx2)
result = idx_dup.union(idx2_dup)
expected = PeriodIndex(
[
"2011-01-01",
"2011-01-01",
"2011-01-02",
"2011-01-02",
"2011-01-03",
"2011-01-03",
],
freq="D",
)
tm.assert_index_equal(result, expected)