""" Testing that we work in the downstream packages """ import importlib import subprocess import sys import numpy as np import pytest import pandas.util._test_decorators as td import pandas as pd from pandas import ( DataFrame, Series, ) import pandas._testing as tm # geopandas, xarray, fsspec, fastparquet all produce these pytestmark = pytest.mark.filterwarnings( "ignore:distutils Version classes are deprecated.*:DeprecationWarning" ) def import_module(name): # we *only* want to skip if the module is truly not available # and NOT just an actual import error because of pandas changes try: return importlib.import_module(name) except ModuleNotFoundError: pytest.skip(f"skipping as {name} not available") @pytest.fixture def df(): return DataFrame({"A": [1, 2, 3]}) @pytest.mark.filterwarnings("ignore:.*64Index is deprecated:FutureWarning") def test_dask(df): # dask sets "compute.use_numexpr" to False, so catch the current value # and ensure to reset it afterwards to avoid impacting other tests olduse = pd.get_option("compute.use_numexpr") try: toolz = import_module("toolz") # noqa:F841 dask = import_module("dask") # noqa:F841 import dask.dataframe as dd ddf = dd.from_pandas(df, npartitions=3) assert ddf.A is not None assert ddf.compute() is not None finally: pd.set_option("compute.use_numexpr", olduse) @pytest.mark.filterwarnings("ignore:.*64Index is deprecated:FutureWarning") @pytest.mark.filterwarnings("ignore:The __array_wrap__:DeprecationWarning") def test_dask_ufunc(): # At the time of dask 2022.01.0, dask is still directly using __array_wrap__ # for some ufuncs (https://github.com/dask/dask/issues/8580). # dask sets "compute.use_numexpr" to False, so catch the current value # and ensure to reset it afterwards to avoid impacting other tests olduse = pd.get_option("compute.use_numexpr") try: dask = import_module("dask") # noqa:F841 import dask.array as da import dask.dataframe as dd s = Series([1.5, 2.3, 3.7, 4.0]) ds = dd.from_pandas(s, npartitions=2) result = da.fix(ds).compute() expected = np.fix(s) tm.assert_series_equal(result, expected) finally: pd.set_option("compute.use_numexpr", olduse) @td.skip_if_no("dask") def test_construct_dask_float_array_int_dtype_match_ndarray(): # GH#40110 make sure we treat a float-dtype dask array with the same # rules we would for an ndarray import dask.dataframe as dd arr = np.array([1, 2.5, 3]) darr = dd.from_array(arr) res = Series(darr) expected = Series(arr) tm.assert_series_equal(res, expected) res = Series(darr, dtype="i8") expected = Series(arr, dtype="i8") tm.assert_series_equal(res, expected) msg = "In a future version, passing float-dtype values containing NaN" arr[2] = np.nan with tm.assert_produces_warning(FutureWarning, match=msg): res = Series(darr, dtype="i8") with tm.assert_produces_warning(FutureWarning, match=msg): expected = Series(arr, dtype="i8") tm.assert_series_equal(res, expected) def test_xarray(df): xarray = import_module("xarray") # noqa:F841 assert df.to_xarray() is not None @td.skip_if_no("cftime") @td.skip_if_no("xarray", "0.10.4") def test_xarray_cftimeindex_nearest(): # https://github.com/pydata/xarray/issues/3751 import cftime import xarray times = xarray.cftime_range("0001", periods=2) key = cftime.DatetimeGregorian(2000, 1, 1) with tm.assert_produces_warning( FutureWarning, match="deprecated", check_stacklevel=False ): result = times.get_loc(key, method="nearest") expected = 1 assert result == expected def test_oo_optimizable(): # GH 21071 subprocess.check_call([sys.executable, "-OO", "-c", "import pandas"]) def test_oo_optimized_datetime_index_unpickle(): # GH 42866 subprocess.check_call( [ sys.executable, "-OO", "-c", ( "import pandas as pd, pickle; " "pickle.loads(pickle.dumps(pd.date_range('2021-01-01', periods=1)))" ), ] ) @pytest.mark.network @tm.network # Cython import warning @pytest.mark.filterwarnings("ignore:pandas.util.testing is deprecated") @pytest.mark.filterwarnings("ignore:can't:ImportWarning") @pytest.mark.filterwarnings("ignore:.*64Index is deprecated:FutureWarning") @pytest.mark.filterwarnings( # patsy needs to update their imports "ignore:Using or importing the ABCs from 'collections:DeprecationWarning" ) @pytest.mark.filterwarnings( # numpy 1.22 "ignore:`np.MachAr` is deprecated.*:DeprecationWarning" ) def test_statsmodels(): statsmodels = import_module("statsmodels") # noqa:F841 import statsmodels.api as sm import statsmodels.formula.api as smf df = sm.datasets.get_rdataset("Guerry", "HistData").data smf.ols("Lottery ~ Literacy + np.log(Pop1831)", data=df).fit() # Cython import warning @pytest.mark.filterwarnings("ignore:can't:ImportWarning") def test_scikit_learn(): sklearn = import_module("sklearn") # noqa:F841 from sklearn import ( datasets, svm, ) digits = datasets.load_digits() clf = svm.SVC(gamma=0.001, C=100.0) clf.fit(digits.data[:-1], digits.target[:-1]) clf.predict(digits.data[-1:]) # Cython import warning and traitlets @pytest.mark.network @tm.network @pytest.mark.filterwarnings("ignore") def test_seaborn(): seaborn = import_module("seaborn") tips = seaborn.load_dataset("tips") seaborn.stripplot(x="day", y="total_bill", data=tips) def test_pandas_gbq(): # Older versions import from non-public, non-existent pandas funcs pytest.importorskip("pandas_gbq", minversion="0.10.0") pandas_gbq = import_module("pandas_gbq") # noqa:F841 @pytest.mark.network @tm.network @pytest.mark.xfail( raises=ValueError, reason="The Quandl API key must be provided either through the api_key " "variable or through the environmental variable QUANDL_API_KEY", ) def test_pandas_datareader(): pandas_datareader = import_module("pandas_datareader") pandas_datareader.DataReader("F", "quandl", "2017-01-01", "2017-02-01") def test_geopandas(): geopandas = import_module("geopandas") gdf = geopandas.GeoDataFrame( {"col": [1, 2, 3], "geometry": geopandas.points_from_xy([1, 2, 3], [1, 2, 3])} ) assert gdf[["col", "geometry"]].geometry.x.equals(Series([1.0, 2.0, 3.0])) # Cython import warning @pytest.mark.filterwarnings("ignore:can't resolve:ImportWarning") @pytest.mark.filterwarnings("ignore:RangeIndex.* is deprecated:DeprecationWarning") def test_pyarrow(df): pyarrow = import_module("pyarrow") table = pyarrow.Table.from_pandas(df) result = table.to_pandas() tm.assert_frame_equal(result, df) def test_torch_frame_construction(using_array_manager): # GH#44616 torch = import_module("torch") val_tensor = torch.randn(700, 64) df = DataFrame(val_tensor) if not using_array_manager: assert np.shares_memory(df, val_tensor) ser = Series(val_tensor[0]) assert np.shares_memory(ser, val_tensor) def test_yaml_dump(df): # GH#42748 yaml = import_module("yaml") dumped = yaml.dump(df) loaded = yaml.load(dumped, Loader=yaml.Loader) tm.assert_frame_equal(df, loaded) loaded2 = yaml.load(dumped, Loader=yaml.UnsafeLoader) tm.assert_frame_equal(df, loaded2) def test_missing_required_dependency(): # GH 23868 # To ensure proper isolation, we pass these flags # -S : disable site-packages # -s : disable user site-packages # -E : disable PYTHON* env vars, especially PYTHONPATH # https://github.com/MacPython/pandas-wheels/pull/50 pyexe = sys.executable.replace("\\", "/") # We skip this test if pandas is installed as a site package. We first # import the package normally and check the path to the module before # executing the test which imports pandas with site packages disabled. call = [pyexe, "-c", "import pandas;print(pandas.__file__)"] output = subprocess.check_output(call).decode() if "site-packages" in output: pytest.skip("pandas installed as site package") # This test will fail if pandas is installed as a site package. The flags # prevent pandas being imported and the test will report Failed: DID NOT # RAISE call = [pyexe, "-sSE", "-c", "import pandas"] msg = ( rf"Command '\['{pyexe}', '-sSE', '-c', 'import pandas'\]' " "returned non-zero exit status 1." ) with pytest.raises(subprocess.CalledProcessError, match=msg) as exc: subprocess.check_output(call, stderr=subprocess.STDOUT) output = exc.value.stdout.decode() for name in ["numpy", "pytz", "dateutil"]: assert name in output def test_frame_setitem_dask_array_into_new_col(): # GH#47128 # dask sets "compute.use_numexpr" to False, so catch the current value # and ensure to reset it afterwards to avoid impacting other tests olduse = pd.get_option("compute.use_numexpr") try: dask = import_module("dask") # noqa:F841 import dask.array as da dda = da.array([1, 2]) df = DataFrame({"a": ["a", "b"]}) df["b"] = dda df["c"] = dda df.loc[[False, True], "b"] = 100 result = df.loc[[1], :] expected = DataFrame({"a": ["b"], "b": [100], "c": [2]}, index=[1]) tm.assert_frame_equal(result, expected) finally: pd.set_option("compute.use_numexpr", olduse)