2184 lines
68 KiB
Python
2184 lines
68 KiB
Python
|
"""
|
||
|
Internal module for formatting output data in csv, html, xml,
|
||
|
and latex files. This module also applies to display formatting.
|
||
|
"""
|
||
|
from __future__ import annotations
|
||
|
|
||
|
from contextlib import contextmanager
|
||
|
from csv import (
|
||
|
QUOTE_NONE,
|
||
|
QUOTE_NONNUMERIC,
|
||
|
)
|
||
|
import decimal
|
||
|
from functools import partial
|
||
|
from io import StringIO
|
||
|
import math
|
||
|
import re
|
||
|
from shutil import get_terminal_size
|
||
|
from typing import (
|
||
|
IO,
|
||
|
TYPE_CHECKING,
|
||
|
Any,
|
||
|
Callable,
|
||
|
Final,
|
||
|
Hashable,
|
||
|
Iterable,
|
||
|
Iterator,
|
||
|
List,
|
||
|
Mapping,
|
||
|
Sequence,
|
||
|
cast,
|
||
|
)
|
||
|
from unicodedata import east_asian_width
|
||
|
|
||
|
import numpy as np
|
||
|
|
||
|
from pandas._config.config import (
|
||
|
get_option,
|
||
|
set_option,
|
||
|
)
|
||
|
|
||
|
from pandas._libs import lib
|
||
|
from pandas._libs.missing import NA
|
||
|
from pandas._libs.tslibs import (
|
||
|
NaT,
|
||
|
Timedelta,
|
||
|
Timestamp,
|
||
|
get_unit_from_dtype,
|
||
|
iNaT,
|
||
|
periods_per_day,
|
||
|
)
|
||
|
from pandas._libs.tslibs.nattype import NaTType
|
||
|
from pandas._typing import (
|
||
|
ArrayLike,
|
||
|
Axes,
|
||
|
ColspaceArgType,
|
||
|
ColspaceType,
|
||
|
CompressionOptions,
|
||
|
FilePath,
|
||
|
FloatFormatType,
|
||
|
FormattersType,
|
||
|
IndexLabel,
|
||
|
StorageOptions,
|
||
|
WriteBuffer,
|
||
|
)
|
||
|
from pandas.util._decorators import deprecate_kwarg
|
||
|
|
||
|
from pandas.core.dtypes.common import (
|
||
|
is_categorical_dtype,
|
||
|
is_complex_dtype,
|
||
|
is_datetime64_dtype,
|
||
|
is_extension_array_dtype,
|
||
|
is_float,
|
||
|
is_float_dtype,
|
||
|
is_integer,
|
||
|
is_integer_dtype,
|
||
|
is_list_like,
|
||
|
is_numeric_dtype,
|
||
|
is_scalar,
|
||
|
is_timedelta64_dtype,
|
||
|
)
|
||
|
from pandas.core.dtypes.dtypes import DatetimeTZDtype
|
||
|
from pandas.core.dtypes.missing import (
|
||
|
isna,
|
||
|
notna,
|
||
|
)
|
||
|
|
||
|
from pandas.core.arrays import (
|
||
|
Categorical,
|
||
|
DatetimeArray,
|
||
|
TimedeltaArray,
|
||
|
)
|
||
|
from pandas.core.base import PandasObject
|
||
|
import pandas.core.common as com
|
||
|
from pandas.core.construction import extract_array
|
||
|
from pandas.core.indexes.api import (
|
||
|
Index,
|
||
|
MultiIndex,
|
||
|
PeriodIndex,
|
||
|
ensure_index,
|
||
|
)
|
||
|
from pandas.core.indexes.datetimes import DatetimeIndex
|
||
|
from pandas.core.indexes.timedeltas import TimedeltaIndex
|
||
|
from pandas.core.reshape.concat import concat
|
||
|
|
||
|
from pandas.io.common import (
|
||
|
check_parent_directory,
|
||
|
stringify_path,
|
||
|
)
|
||
|
from pandas.io.formats.printing import (
|
||
|
adjoin,
|
||
|
justify,
|
||
|
pprint_thing,
|
||
|
)
|
||
|
|
||
|
if TYPE_CHECKING:
|
||
|
from pandas import (
|
||
|
DataFrame,
|
||
|
Series,
|
||
|
)
|
||
|
|
||
|
|
||
|
common_docstring: Final = """
|
||
|
Parameters
|
||
|
----------
|
||
|
buf : str, Path or StringIO-like, optional, default None
|
||
|
Buffer to write to. If None, the output is returned as a string.
|
||
|
columns : sequence, optional, default None
|
||
|
The subset of columns to write. Writes all columns by default.
|
||
|
col_space : %(col_space_type)s, optional
|
||
|
%(col_space)s.
|
||
|
header : %(header_type)s, optional
|
||
|
%(header)s.
|
||
|
index : bool, optional, default True
|
||
|
Whether to print index (row) labels.
|
||
|
na_rep : str, optional, default 'NaN'
|
||
|
String representation of ``NaN`` to use.
|
||
|
formatters : list, tuple or dict of one-param. functions, optional
|
||
|
Formatter functions to apply to columns' elements by position or
|
||
|
name.
|
||
|
The result of each function must be a unicode string.
|
||
|
List/tuple must be of length equal to the number of columns.
|
||
|
float_format : one-parameter function, optional, default None
|
||
|
Formatter function to apply to columns' elements if they are
|
||
|
floats. This function must return a unicode string and will be
|
||
|
applied only to the non-``NaN`` elements, with ``NaN`` being
|
||
|
handled by ``na_rep``.
|
||
|
|
||
|
.. versionchanged:: 1.2.0
|
||
|
|
||
|
sparsify : bool, optional, default True
|
||
|
Set to False for a DataFrame with a hierarchical index to print
|
||
|
every multiindex key at each row.
|
||
|
index_names : bool, optional, default True
|
||
|
Prints the names of the indexes.
|
||
|
justify : str, default None
|
||
|
How to justify the column labels. If None uses the option from
|
||
|
the print configuration (controlled by set_option), 'right' out
|
||
|
of the box. Valid values are
|
||
|
|
||
|
* left
|
||
|
* right
|
||
|
* center
|
||
|
* justify
|
||
|
* justify-all
|
||
|
* start
|
||
|
* end
|
||
|
* inherit
|
||
|
* match-parent
|
||
|
* initial
|
||
|
* unset.
|
||
|
max_rows : int, optional
|
||
|
Maximum number of rows to display in the console.
|
||
|
max_cols : int, optional
|
||
|
Maximum number of columns to display in the console.
|
||
|
show_dimensions : bool, default False
|
||
|
Display DataFrame dimensions (number of rows by number of columns).
|
||
|
decimal : str, default '.'
|
||
|
Character recognized as decimal separator, e.g. ',' in Europe.
|
||
|
"""
|
||
|
|
||
|
_VALID_JUSTIFY_PARAMETERS = (
|
||
|
"left",
|
||
|
"right",
|
||
|
"center",
|
||
|
"justify",
|
||
|
"justify-all",
|
||
|
"start",
|
||
|
"end",
|
||
|
"inherit",
|
||
|
"match-parent",
|
||
|
"initial",
|
||
|
"unset",
|
||
|
)
|
||
|
|
||
|
return_docstring: Final = """
|
||
|
Returns
|
||
|
-------
|
||
|
str or None
|
||
|
If buf is None, returns the result as a string. Otherwise returns
|
||
|
None.
|
||
|
"""
|
||
|
|
||
|
|
||
|
class CategoricalFormatter:
|
||
|
def __init__(
|
||
|
self,
|
||
|
categorical: Categorical,
|
||
|
buf: IO[str] | None = None,
|
||
|
length: bool = True,
|
||
|
na_rep: str = "NaN",
|
||
|
footer: bool = True,
|
||
|
) -> None:
|
||
|
self.categorical = categorical
|
||
|
self.buf = buf if buf is not None else StringIO("")
|
||
|
self.na_rep = na_rep
|
||
|
self.length = length
|
||
|
self.footer = footer
|
||
|
self.quoting = QUOTE_NONNUMERIC
|
||
|
|
||
|
def _get_footer(self) -> str:
|
||
|
footer = ""
|
||
|
|
||
|
if self.length:
|
||
|
if footer:
|
||
|
footer += ", "
|
||
|
footer += f"Length: {len(self.categorical)}"
|
||
|
|
||
|
level_info = self.categorical._repr_categories_info()
|
||
|
|
||
|
# Levels are added in a newline
|
||
|
if footer:
|
||
|
footer += "\n"
|
||
|
footer += level_info
|
||
|
|
||
|
return str(footer)
|
||
|
|
||
|
def _get_formatted_values(self) -> list[str]:
|
||
|
return format_array(
|
||
|
self.categorical._internal_get_values(),
|
||
|
None,
|
||
|
float_format=None,
|
||
|
na_rep=self.na_rep,
|
||
|
quoting=self.quoting,
|
||
|
)
|
||
|
|
||
|
def to_string(self) -> str:
|
||
|
categorical = self.categorical
|
||
|
|
||
|
if len(categorical) == 0:
|
||
|
if self.footer:
|
||
|
return self._get_footer()
|
||
|
else:
|
||
|
return ""
|
||
|
|
||
|
fmt_values = self._get_formatted_values()
|
||
|
|
||
|
fmt_values = [i.strip() for i in fmt_values]
|
||
|
values = ", ".join(fmt_values)
|
||
|
result = ["[" + values + "]"]
|
||
|
if self.footer:
|
||
|
footer = self._get_footer()
|
||
|
if footer:
|
||
|
result.append(footer)
|
||
|
|
||
|
return str("\n".join(result))
|
||
|
|
||
|
|
||
|
class SeriesFormatter:
|
||
|
def __init__(
|
||
|
self,
|
||
|
series: Series,
|
||
|
buf: IO[str] | None = None,
|
||
|
length: bool | str = True,
|
||
|
header: bool = True,
|
||
|
index: bool = True,
|
||
|
na_rep: str = "NaN",
|
||
|
name: bool = False,
|
||
|
float_format: str | None = None,
|
||
|
dtype: bool = True,
|
||
|
max_rows: int | None = None,
|
||
|
min_rows: int | None = None,
|
||
|
) -> None:
|
||
|
self.series = series
|
||
|
self.buf = buf if buf is not None else StringIO()
|
||
|
self.name = name
|
||
|
self.na_rep = na_rep
|
||
|
self.header = header
|
||
|
self.length = length
|
||
|
self.index = index
|
||
|
self.max_rows = max_rows
|
||
|
self.min_rows = min_rows
|
||
|
|
||
|
if float_format is None:
|
||
|
float_format = get_option("display.float_format")
|
||
|
self.float_format = float_format
|
||
|
self.dtype = dtype
|
||
|
self.adj = get_adjustment()
|
||
|
|
||
|
self._chk_truncate()
|
||
|
|
||
|
def _chk_truncate(self) -> None:
|
||
|
self.tr_row_num: int | None
|
||
|
|
||
|
min_rows = self.min_rows
|
||
|
max_rows = self.max_rows
|
||
|
# truncation determined by max_rows, actual truncated number of rows
|
||
|
# used below by min_rows
|
||
|
is_truncated_vertically = max_rows and (len(self.series) > max_rows)
|
||
|
series = self.series
|
||
|
if is_truncated_vertically:
|
||
|
max_rows = cast(int, max_rows)
|
||
|
if min_rows:
|
||
|
# if min_rows is set (not None or 0), set max_rows to minimum
|
||
|
# of both
|
||
|
max_rows = min(min_rows, max_rows)
|
||
|
if max_rows == 1:
|
||
|
row_num = max_rows
|
||
|
series = series.iloc[:max_rows]
|
||
|
else:
|
||
|
row_num = max_rows // 2
|
||
|
series = concat((series.iloc[:row_num], series.iloc[-row_num:]))
|
||
|
self.tr_row_num = row_num
|
||
|
else:
|
||
|
self.tr_row_num = None
|
||
|
self.tr_series = series
|
||
|
self.is_truncated_vertically = is_truncated_vertically
|
||
|
|
||
|
def _get_footer(self) -> str:
|
||
|
name = self.series.name
|
||
|
footer = ""
|
||
|
|
||
|
if getattr(self.series.index, "freq", None) is not None:
|
||
|
assert isinstance(
|
||
|
self.series.index, (DatetimeIndex, PeriodIndex, TimedeltaIndex)
|
||
|
)
|
||
|
footer += f"Freq: {self.series.index.freqstr}"
|
||
|
|
||
|
if self.name is not False and name is not None:
|
||
|
if footer:
|
||
|
footer += ", "
|
||
|
|
||
|
series_name = pprint_thing(name, escape_chars=("\t", "\r", "\n"))
|
||
|
footer += f"Name: {series_name}"
|
||
|
|
||
|
if self.length is True or (
|
||
|
self.length == "truncate" and self.is_truncated_vertically
|
||
|
):
|
||
|
if footer:
|
||
|
footer += ", "
|
||
|
footer += f"Length: {len(self.series)}"
|
||
|
|
||
|
if self.dtype is not False and self.dtype is not None:
|
||
|
dtype_name = getattr(self.tr_series.dtype, "name", None)
|
||
|
if dtype_name:
|
||
|
if footer:
|
||
|
footer += ", "
|
||
|
footer += f"dtype: {pprint_thing(dtype_name)}"
|
||
|
|
||
|
# level infos are added to the end and in a new line, like it is done
|
||
|
# for Categoricals
|
||
|
if is_categorical_dtype(self.tr_series.dtype):
|
||
|
level_info = self.tr_series._values._repr_categories_info()
|
||
|
if footer:
|
||
|
footer += "\n"
|
||
|
footer += level_info
|
||
|
|
||
|
return str(footer)
|
||
|
|
||
|
def _get_formatted_index(self) -> tuple[list[str], bool]:
|
||
|
index = self.tr_series.index
|
||
|
|
||
|
if isinstance(index, MultiIndex):
|
||
|
have_header = any(name for name in index.names)
|
||
|
fmt_index = index.format(names=True)
|
||
|
else:
|
||
|
have_header = index.name is not None
|
||
|
fmt_index = index.format(name=True)
|
||
|
return fmt_index, have_header
|
||
|
|
||
|
def _get_formatted_values(self) -> list[str]:
|
||
|
return format_array(
|
||
|
self.tr_series._values,
|
||
|
None,
|
||
|
float_format=self.float_format,
|
||
|
na_rep=self.na_rep,
|
||
|
leading_space=self.index,
|
||
|
)
|
||
|
|
||
|
def to_string(self) -> str:
|
||
|
series = self.tr_series
|
||
|
footer = self._get_footer()
|
||
|
|
||
|
if len(series) == 0:
|
||
|
return f"{type(self.series).__name__}([], {footer})"
|
||
|
|
||
|
fmt_index, have_header = self._get_formatted_index()
|
||
|
fmt_values = self._get_formatted_values()
|
||
|
|
||
|
if self.is_truncated_vertically:
|
||
|
n_header_rows = 0
|
||
|
row_num = self.tr_row_num
|
||
|
row_num = cast(int, row_num)
|
||
|
width = self.adj.len(fmt_values[row_num - 1])
|
||
|
if width > 3:
|
||
|
dot_str = "..."
|
||
|
else:
|
||
|
dot_str = ".."
|
||
|
# Series uses mode=center because it has single value columns
|
||
|
# DataFrame uses mode=left
|
||
|
dot_str = self.adj.justify([dot_str], width, mode="center")[0]
|
||
|
fmt_values.insert(row_num + n_header_rows, dot_str)
|
||
|
fmt_index.insert(row_num + 1, "")
|
||
|
|
||
|
if self.index:
|
||
|
result = self.adj.adjoin(3, *[fmt_index[1:], fmt_values])
|
||
|
else:
|
||
|
result = self.adj.adjoin(3, fmt_values)
|
||
|
|
||
|
if self.header and have_header:
|
||
|
result = fmt_index[0] + "\n" + result
|
||
|
|
||
|
if footer:
|
||
|
result += "\n" + footer
|
||
|
|
||
|
return str("".join(result))
|
||
|
|
||
|
|
||
|
class TextAdjustment:
|
||
|
def __init__(self) -> None:
|
||
|
self.encoding = get_option("display.encoding")
|
||
|
|
||
|
def len(self, text: str) -> int:
|
||
|
return len(text)
|
||
|
|
||
|
def justify(self, texts: Any, max_len: int, mode: str = "right") -> list[str]:
|
||
|
return justify(texts, max_len, mode=mode)
|
||
|
|
||
|
def adjoin(self, space: int, *lists, **kwargs) -> str:
|
||
|
return adjoin(space, *lists, strlen=self.len, justfunc=self.justify, **kwargs)
|
||
|
|
||
|
|
||
|
class EastAsianTextAdjustment(TextAdjustment):
|
||
|
def __init__(self) -> None:
|
||
|
super().__init__()
|
||
|
if get_option("display.unicode.ambiguous_as_wide"):
|
||
|
self.ambiguous_width = 2
|
||
|
else:
|
||
|
self.ambiguous_width = 1
|
||
|
|
||
|
# Definition of East Asian Width
|
||
|
# https://unicode.org/reports/tr11/
|
||
|
# Ambiguous width can be changed by option
|
||
|
self._EAW_MAP = {"Na": 1, "N": 1, "W": 2, "F": 2, "H": 1}
|
||
|
|
||
|
def len(self, text: str) -> int:
|
||
|
"""
|
||
|
Calculate display width considering unicode East Asian Width
|
||
|
"""
|
||
|
if not isinstance(text, str):
|
||
|
return len(text)
|
||
|
|
||
|
return sum(
|
||
|
self._EAW_MAP.get(east_asian_width(c), self.ambiguous_width) for c in text
|
||
|
)
|
||
|
|
||
|
def justify(
|
||
|
self, texts: Iterable[str], max_len: int, mode: str = "right"
|
||
|
) -> list[str]:
|
||
|
# re-calculate padding space per str considering East Asian Width
|
||
|
def _get_pad(t):
|
||
|
return max_len - self.len(t) + len(t)
|
||
|
|
||
|
if mode == "left":
|
||
|
return [x.ljust(_get_pad(x)) for x in texts]
|
||
|
elif mode == "center":
|
||
|
return [x.center(_get_pad(x)) for x in texts]
|
||
|
else:
|
||
|
return [x.rjust(_get_pad(x)) for x in texts]
|
||
|
|
||
|
|
||
|
def get_adjustment() -> TextAdjustment:
|
||
|
use_east_asian_width = get_option("display.unicode.east_asian_width")
|
||
|
if use_east_asian_width:
|
||
|
return EastAsianTextAdjustment()
|
||
|
else:
|
||
|
return TextAdjustment()
|
||
|
|
||
|
|
||
|
def get_dataframe_repr_params() -> dict[str, Any]:
|
||
|
"""Get the parameters used to repr(dataFrame) calls using DataFrame.to_string.
|
||
|
|
||
|
Supplying these parameters to DataFrame.to_string is equivalent to calling
|
||
|
``repr(DataFrame)``. This is useful if you want to adjust the repr output.
|
||
|
|
||
|
.. versionadded:: 1.4.0
|
||
|
|
||
|
Example
|
||
|
-------
|
||
|
>>> import pandas as pd
|
||
|
>>>
|
||
|
>>> df = pd.DataFrame([[1, 2], [3, 4]])
|
||
|
>>> repr_params = pd.io.formats.format.get_dataframe_repr_params()
|
||
|
>>> repr(df) == df.to_string(**repr_params)
|
||
|
True
|
||
|
"""
|
||
|
from pandas.io.formats import console
|
||
|
|
||
|
if get_option("display.expand_frame_repr"):
|
||
|
line_width, _ = console.get_console_size()
|
||
|
else:
|
||
|
line_width = None
|
||
|
return {
|
||
|
"max_rows": get_option("display.max_rows"),
|
||
|
"min_rows": get_option("display.min_rows"),
|
||
|
"max_cols": get_option("display.max_columns"),
|
||
|
"max_colwidth": get_option("display.max_colwidth"),
|
||
|
"show_dimensions": get_option("display.show_dimensions"),
|
||
|
"line_width": line_width,
|
||
|
}
|
||
|
|
||
|
|
||
|
def get_series_repr_params() -> dict[str, Any]:
|
||
|
"""Get the parameters used to repr(Series) calls using Series.to_string.
|
||
|
|
||
|
Supplying these parameters to Series.to_string is equivalent to calling
|
||
|
``repr(series)``. This is useful if you want to adjust the series repr output.
|
||
|
|
||
|
.. versionadded:: 1.4.0
|
||
|
|
||
|
Example
|
||
|
-------
|
||
|
>>> import pandas as pd
|
||
|
>>>
|
||
|
>>> ser = pd.Series([1, 2, 3, 4])
|
||
|
>>> repr_params = pd.io.formats.format.get_series_repr_params()
|
||
|
>>> repr(ser) == ser.to_string(**repr_params)
|
||
|
True
|
||
|
"""
|
||
|
width, height = get_terminal_size()
|
||
|
max_rows = (
|
||
|
height
|
||
|
if get_option("display.max_rows") == 0
|
||
|
else get_option("display.max_rows")
|
||
|
)
|
||
|
min_rows = (
|
||
|
height
|
||
|
if get_option("display.max_rows") == 0
|
||
|
else get_option("display.min_rows")
|
||
|
)
|
||
|
|
||
|
return {
|
||
|
"name": True,
|
||
|
"dtype": True,
|
||
|
"min_rows": min_rows,
|
||
|
"max_rows": max_rows,
|
||
|
"length": get_option("display.show_dimensions"),
|
||
|
}
|
||
|
|
||
|
|
||
|
class DataFrameFormatter:
|
||
|
"""Class for processing dataframe formatting options and data."""
|
||
|
|
||
|
__doc__ = __doc__ if __doc__ else ""
|
||
|
__doc__ += common_docstring + return_docstring
|
||
|
|
||
|
def __init__(
|
||
|
self,
|
||
|
frame: DataFrame,
|
||
|
columns: Sequence[Hashable] | None = None,
|
||
|
col_space: ColspaceArgType | None = None,
|
||
|
header: bool | Sequence[str] = True,
|
||
|
index: bool = True,
|
||
|
na_rep: str = "NaN",
|
||
|
formatters: FormattersType | None = None,
|
||
|
justify: str | None = None,
|
||
|
float_format: FloatFormatType | None = None,
|
||
|
sparsify: bool | None = None,
|
||
|
index_names: bool = True,
|
||
|
max_rows: int | None = None,
|
||
|
min_rows: int | None = None,
|
||
|
max_cols: int | None = None,
|
||
|
show_dimensions: bool | str = False,
|
||
|
decimal: str = ".",
|
||
|
bold_rows: bool = False,
|
||
|
escape: bool = True,
|
||
|
) -> None:
|
||
|
self.frame = frame
|
||
|
self.columns = self._initialize_columns(columns)
|
||
|
self.col_space = self._initialize_colspace(col_space)
|
||
|
self.header = header
|
||
|
self.index = index
|
||
|
self.na_rep = na_rep
|
||
|
self.formatters = self._initialize_formatters(formatters)
|
||
|
self.justify = self._initialize_justify(justify)
|
||
|
self.float_format = float_format
|
||
|
self.sparsify = self._initialize_sparsify(sparsify)
|
||
|
self.show_index_names = index_names
|
||
|
self.decimal = decimal
|
||
|
self.bold_rows = bold_rows
|
||
|
self.escape = escape
|
||
|
self.max_rows = max_rows
|
||
|
self.min_rows = min_rows
|
||
|
self.max_cols = max_cols
|
||
|
self.show_dimensions = show_dimensions
|
||
|
|
||
|
self.max_cols_fitted = self._calc_max_cols_fitted()
|
||
|
self.max_rows_fitted = self._calc_max_rows_fitted()
|
||
|
|
||
|
self.tr_frame = self.frame
|
||
|
self.truncate()
|
||
|
self.adj = get_adjustment()
|
||
|
|
||
|
def get_strcols(self) -> list[list[str]]:
|
||
|
"""
|
||
|
Render a DataFrame to a list of columns (as lists of strings).
|
||
|
"""
|
||
|
strcols = self._get_strcols_without_index()
|
||
|
|
||
|
if self.index:
|
||
|
str_index = self._get_formatted_index(self.tr_frame)
|
||
|
strcols.insert(0, str_index)
|
||
|
|
||
|
return strcols
|
||
|
|
||
|
@property
|
||
|
def should_show_dimensions(self) -> bool:
|
||
|
return self.show_dimensions is True or (
|
||
|
self.show_dimensions == "truncate" and self.is_truncated
|
||
|
)
|
||
|
|
||
|
@property
|
||
|
def is_truncated(self) -> bool:
|
||
|
return bool(self.is_truncated_horizontally or self.is_truncated_vertically)
|
||
|
|
||
|
@property
|
||
|
def is_truncated_horizontally(self) -> bool:
|
||
|
return bool(self.max_cols_fitted and (len(self.columns) > self.max_cols_fitted))
|
||
|
|
||
|
@property
|
||
|
def is_truncated_vertically(self) -> bool:
|
||
|
return bool(self.max_rows_fitted and (len(self.frame) > self.max_rows_fitted))
|
||
|
|
||
|
@property
|
||
|
def dimensions_info(self) -> str:
|
||
|
return f"\n\n[{len(self.frame)} rows x {len(self.frame.columns)} columns]"
|
||
|
|
||
|
@property
|
||
|
def has_index_names(self) -> bool:
|
||
|
return _has_names(self.frame.index)
|
||
|
|
||
|
@property
|
||
|
def has_column_names(self) -> bool:
|
||
|
return _has_names(self.frame.columns)
|
||
|
|
||
|
@property
|
||
|
def show_row_idx_names(self) -> bool:
|
||
|
return all((self.has_index_names, self.index, self.show_index_names))
|
||
|
|
||
|
@property
|
||
|
def show_col_idx_names(self) -> bool:
|
||
|
return all((self.has_column_names, self.show_index_names, self.header))
|
||
|
|
||
|
@property
|
||
|
def max_rows_displayed(self) -> int:
|
||
|
return min(self.max_rows or len(self.frame), len(self.frame))
|
||
|
|
||
|
def _initialize_sparsify(self, sparsify: bool | None) -> bool:
|
||
|
if sparsify is None:
|
||
|
return get_option("display.multi_sparse")
|
||
|
return sparsify
|
||
|
|
||
|
def _initialize_formatters(
|
||
|
self, formatters: FormattersType | None
|
||
|
) -> FormattersType:
|
||
|
if formatters is None:
|
||
|
return {}
|
||
|
elif len(self.frame.columns) == len(formatters) or isinstance(formatters, dict):
|
||
|
return formatters
|
||
|
else:
|
||
|
raise ValueError(
|
||
|
f"Formatters length({len(formatters)}) should match "
|
||
|
f"DataFrame number of columns({len(self.frame.columns)})"
|
||
|
)
|
||
|
|
||
|
def _initialize_justify(self, justify: str | None) -> str:
|
||
|
if justify is None:
|
||
|
return get_option("display.colheader_justify")
|
||
|
else:
|
||
|
return justify
|
||
|
|
||
|
def _initialize_columns(self, columns: Sequence[Hashable] | None) -> Index:
|
||
|
if columns is not None:
|
||
|
# GH 47231 - columns doesn't have to be `Sequence[str]`
|
||
|
# Will fix in later PR
|
||
|
cols = ensure_index(cast(Axes, columns))
|
||
|
self.frame = self.frame[cols]
|
||
|
return cols
|
||
|
else:
|
||
|
return self.frame.columns
|
||
|
|
||
|
def _initialize_colspace(self, col_space: ColspaceArgType | None) -> ColspaceType:
|
||
|
result: ColspaceType
|
||
|
|
||
|
if col_space is None:
|
||
|
result = {}
|
||
|
elif isinstance(col_space, (int, str)):
|
||
|
result = {"": col_space}
|
||
|
result.update({column: col_space for column in self.frame.columns})
|
||
|
elif isinstance(col_space, Mapping):
|
||
|
for column in col_space.keys():
|
||
|
if column not in self.frame.columns and column != "":
|
||
|
raise ValueError(
|
||
|
f"Col_space is defined for an unknown column: {column}"
|
||
|
)
|
||
|
result = col_space
|
||
|
else:
|
||
|
if len(self.frame.columns) != len(col_space):
|
||
|
raise ValueError(
|
||
|
f"Col_space length({len(col_space)}) should match "
|
||
|
f"DataFrame number of columns({len(self.frame.columns)})"
|
||
|
)
|
||
|
result = dict(zip(self.frame.columns, col_space))
|
||
|
return result
|
||
|
|
||
|
def _calc_max_cols_fitted(self) -> int | None:
|
||
|
"""Number of columns fitting the screen."""
|
||
|
if not self._is_in_terminal():
|
||
|
return self.max_cols
|
||
|
|
||
|
width, _ = get_terminal_size()
|
||
|
if self._is_screen_narrow(width):
|
||
|
return width
|
||
|
else:
|
||
|
return self.max_cols
|
||
|
|
||
|
def _calc_max_rows_fitted(self) -> int | None:
|
||
|
"""Number of rows with data fitting the screen."""
|
||
|
max_rows: int | None
|
||
|
|
||
|
if self._is_in_terminal():
|
||
|
_, height = get_terminal_size()
|
||
|
if self.max_rows == 0:
|
||
|
# rows available to fill with actual data
|
||
|
return height - self._get_number_of_auxillary_rows()
|
||
|
|
||
|
if self._is_screen_short(height):
|
||
|
max_rows = height
|
||
|
else:
|
||
|
max_rows = self.max_rows
|
||
|
else:
|
||
|
max_rows = self.max_rows
|
||
|
|
||
|
return self._adjust_max_rows(max_rows)
|
||
|
|
||
|
def _adjust_max_rows(self, max_rows: int | None) -> int | None:
|
||
|
"""Adjust max_rows using display logic.
|
||
|
|
||
|
See description here:
|
||
|
https://pandas.pydata.org/docs/dev/user_guide/options.html#frequently-used-options
|
||
|
|
||
|
GH #37359
|
||
|
"""
|
||
|
if max_rows:
|
||
|
if (len(self.frame) > max_rows) and self.min_rows:
|
||
|
# if truncated, set max_rows showed to min_rows
|
||
|
max_rows = min(self.min_rows, max_rows)
|
||
|
return max_rows
|
||
|
|
||
|
def _is_in_terminal(self) -> bool:
|
||
|
"""Check if the output is to be shown in terminal."""
|
||
|
return bool(self.max_cols == 0 or self.max_rows == 0)
|
||
|
|
||
|
def _is_screen_narrow(self, max_width) -> bool:
|
||
|
return bool(self.max_cols == 0 and len(self.frame.columns) > max_width)
|
||
|
|
||
|
def _is_screen_short(self, max_height) -> bool:
|
||
|
return bool(self.max_rows == 0 and len(self.frame) > max_height)
|
||
|
|
||
|
def _get_number_of_auxillary_rows(self) -> int:
|
||
|
"""Get number of rows occupied by prompt, dots and dimension info."""
|
||
|
dot_row = 1
|
||
|
prompt_row = 1
|
||
|
num_rows = dot_row + prompt_row
|
||
|
|
||
|
if self.show_dimensions:
|
||
|
num_rows += len(self.dimensions_info.splitlines())
|
||
|
|
||
|
if self.header:
|
||
|
num_rows += 1
|
||
|
|
||
|
return num_rows
|
||
|
|
||
|
def truncate(self) -> None:
|
||
|
"""
|
||
|
Check whether the frame should be truncated. If so, slice the frame up.
|
||
|
"""
|
||
|
if self.is_truncated_horizontally:
|
||
|
self._truncate_horizontally()
|
||
|
|
||
|
if self.is_truncated_vertically:
|
||
|
self._truncate_vertically()
|
||
|
|
||
|
def _truncate_horizontally(self) -> None:
|
||
|
"""Remove columns, which are not to be displayed and adjust formatters.
|
||
|
|
||
|
Attributes affected:
|
||
|
- tr_frame
|
||
|
- formatters
|
||
|
- tr_col_num
|
||
|
"""
|
||
|
assert self.max_cols_fitted is not None
|
||
|
col_num = self.max_cols_fitted // 2
|
||
|
if col_num >= 1:
|
||
|
left = self.tr_frame.iloc[:, :col_num]
|
||
|
right = self.tr_frame.iloc[:, -col_num:]
|
||
|
self.tr_frame = concat((left, right), axis=1)
|
||
|
|
||
|
# truncate formatter
|
||
|
if isinstance(self.formatters, (list, tuple)):
|
||
|
self.formatters = [
|
||
|
*self.formatters[:col_num],
|
||
|
*self.formatters[-col_num:],
|
||
|
]
|
||
|
else:
|
||
|
col_num = cast(int, self.max_cols)
|
||
|
self.tr_frame = self.tr_frame.iloc[:, :col_num]
|
||
|
self.tr_col_num = col_num
|
||
|
|
||
|
def _truncate_vertically(self) -> None:
|
||
|
"""Remove rows, which are not to be displayed.
|
||
|
|
||
|
Attributes affected:
|
||
|
- tr_frame
|
||
|
- tr_row_num
|
||
|
"""
|
||
|
assert self.max_rows_fitted is not None
|
||
|
row_num = self.max_rows_fitted // 2
|
||
|
if row_num >= 1:
|
||
|
head = self.tr_frame.iloc[:row_num, :]
|
||
|
tail = self.tr_frame.iloc[-row_num:, :]
|
||
|
self.tr_frame = concat((head, tail))
|
||
|
else:
|
||
|
row_num = cast(int, self.max_rows)
|
||
|
self.tr_frame = self.tr_frame.iloc[:row_num, :]
|
||
|
self.tr_row_num = row_num
|
||
|
|
||
|
def _get_strcols_without_index(self) -> list[list[str]]:
|
||
|
strcols: list[list[str]] = []
|
||
|
|
||
|
if not is_list_like(self.header) and not self.header:
|
||
|
for i, c in enumerate(self.tr_frame):
|
||
|
fmt_values = self.format_col(i)
|
||
|
fmt_values = _make_fixed_width(
|
||
|
strings=fmt_values,
|
||
|
justify=self.justify,
|
||
|
minimum=int(self.col_space.get(c, 0)),
|
||
|
adj=self.adj,
|
||
|
)
|
||
|
strcols.append(fmt_values)
|
||
|
return strcols
|
||
|
|
||
|
if is_list_like(self.header):
|
||
|
# cast here since can't be bool if is_list_like
|
||
|
self.header = cast(List[str], self.header)
|
||
|
if len(self.header) != len(self.columns):
|
||
|
raise ValueError(
|
||
|
f"Writing {len(self.columns)} cols "
|
||
|
f"but got {len(self.header)} aliases"
|
||
|
)
|
||
|
str_columns = [[label] for label in self.header]
|
||
|
else:
|
||
|
str_columns = self._get_formatted_column_labels(self.tr_frame)
|
||
|
|
||
|
if self.show_row_idx_names:
|
||
|
for x in str_columns:
|
||
|
x.append("")
|
||
|
|
||
|
for i, c in enumerate(self.tr_frame):
|
||
|
cheader = str_columns[i]
|
||
|
header_colwidth = max(
|
||
|
int(self.col_space.get(c, 0)), *(self.adj.len(x) for x in cheader)
|
||
|
)
|
||
|
fmt_values = self.format_col(i)
|
||
|
fmt_values = _make_fixed_width(
|
||
|
fmt_values, self.justify, minimum=header_colwidth, adj=self.adj
|
||
|
)
|
||
|
|
||
|
max_len = max(max(self.adj.len(x) for x in fmt_values), header_colwidth)
|
||
|
cheader = self.adj.justify(cheader, max_len, mode=self.justify)
|
||
|
strcols.append(cheader + fmt_values)
|
||
|
|
||
|
return strcols
|
||
|
|
||
|
def format_col(self, i: int) -> list[str]:
|
||
|
frame = self.tr_frame
|
||
|
formatter = self._get_formatter(i)
|
||
|
return format_array(
|
||
|
frame.iloc[:, i]._values,
|
||
|
formatter,
|
||
|
float_format=self.float_format,
|
||
|
na_rep=self.na_rep,
|
||
|
space=self.col_space.get(frame.columns[i]),
|
||
|
decimal=self.decimal,
|
||
|
leading_space=self.index,
|
||
|
)
|
||
|
|
||
|
def _get_formatter(self, i: str | int) -> Callable | None:
|
||
|
if isinstance(self.formatters, (list, tuple)):
|
||
|
if is_integer(i):
|
||
|
i = cast(int, i)
|
||
|
return self.formatters[i]
|
||
|
else:
|
||
|
return None
|
||
|
else:
|
||
|
if is_integer(i) and i not in self.columns:
|
||
|
i = self.columns[i]
|
||
|
return self.formatters.get(i, None)
|
||
|
|
||
|
def _get_formatted_column_labels(self, frame: DataFrame) -> list[list[str]]:
|
||
|
from pandas.core.indexes.multi import sparsify_labels
|
||
|
|
||
|
columns = frame.columns
|
||
|
|
||
|
if isinstance(columns, MultiIndex):
|
||
|
fmt_columns = columns.format(sparsify=False, adjoin=False)
|
||
|
fmt_columns = list(zip(*fmt_columns))
|
||
|
dtypes = self.frame.dtypes._values
|
||
|
|
||
|
# if we have a Float level, they don't use leading space at all
|
||
|
restrict_formatting = any(level.is_floating for level in columns.levels)
|
||
|
need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
|
||
|
|
||
|
def space_format(x, y):
|
||
|
if (
|
||
|
y not in self.formatters
|
||
|
and need_leadsp[x]
|
||
|
and not restrict_formatting
|
||
|
):
|
||
|
return " " + y
|
||
|
return y
|
||
|
|
||
|
str_columns = list(
|
||
|
zip(*([space_format(x, y) for y in x] for x in fmt_columns))
|
||
|
)
|
||
|
if self.sparsify and len(str_columns):
|
||
|
str_columns = sparsify_labels(str_columns)
|
||
|
|
||
|
str_columns = [list(x) for x in zip(*str_columns)]
|
||
|
else:
|
||
|
fmt_columns = columns.format()
|
||
|
dtypes = self.frame.dtypes
|
||
|
need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
|
||
|
str_columns = [
|
||
|
[" " + x if not self._get_formatter(i) and need_leadsp[x] else x]
|
||
|
for i, x in enumerate(fmt_columns)
|
||
|
]
|
||
|
# self.str_columns = str_columns
|
||
|
return str_columns
|
||
|
|
||
|
def _get_formatted_index(self, frame: DataFrame) -> list[str]:
|
||
|
# Note: this is only used by to_string() and to_latex(), not by
|
||
|
# to_html(). so safe to cast col_space here.
|
||
|
col_space = {k: cast(int, v) for k, v in self.col_space.items()}
|
||
|
index = frame.index
|
||
|
columns = frame.columns
|
||
|
fmt = self._get_formatter("__index__")
|
||
|
|
||
|
if isinstance(index, MultiIndex):
|
||
|
fmt_index = index.format(
|
||
|
sparsify=self.sparsify,
|
||
|
adjoin=False,
|
||
|
names=self.show_row_idx_names,
|
||
|
formatter=fmt,
|
||
|
)
|
||
|
else:
|
||
|
fmt_index = [index.format(name=self.show_row_idx_names, formatter=fmt)]
|
||
|
|
||
|
fmt_index = [
|
||
|
tuple(
|
||
|
_make_fixed_width(
|
||
|
list(x), justify="left", minimum=col_space.get("", 0), adj=self.adj
|
||
|
)
|
||
|
)
|
||
|
for x in fmt_index
|
||
|
]
|
||
|
|
||
|
adjoined = self.adj.adjoin(1, *fmt_index).split("\n")
|
||
|
|
||
|
# empty space for columns
|
||
|
if self.show_col_idx_names:
|
||
|
col_header = [str(x) for x in self._get_column_name_list()]
|
||
|
else:
|
||
|
col_header = [""] * columns.nlevels
|
||
|
|
||
|
if self.header:
|
||
|
return col_header + adjoined
|
||
|
else:
|
||
|
return adjoined
|
||
|
|
||
|
def _get_column_name_list(self) -> list[Hashable]:
|
||
|
names: list[Hashable] = []
|
||
|
columns = self.frame.columns
|
||
|
if isinstance(columns, MultiIndex):
|
||
|
names.extend("" if name is None else name for name in columns.names)
|
||
|
else:
|
||
|
names.append("" if columns.name is None else columns.name)
|
||
|
return names
|
||
|
|
||
|
|
||
|
class DataFrameRenderer:
|
||
|
"""Class for creating dataframe output in multiple formats.
|
||
|
|
||
|
Called in pandas.core.generic.NDFrame:
|
||
|
- to_csv
|
||
|
- to_latex
|
||
|
|
||
|
Called in pandas.core.frame.DataFrame:
|
||
|
- to_html
|
||
|
- to_string
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
fmt : DataFrameFormatter
|
||
|
Formatter with the formatting options.
|
||
|
"""
|
||
|
|
||
|
def __init__(self, fmt: DataFrameFormatter) -> None:
|
||
|
self.fmt = fmt
|
||
|
|
||
|
def to_latex(
|
||
|
self,
|
||
|
buf: FilePath | WriteBuffer[str] | None = None,
|
||
|
column_format: str | None = None,
|
||
|
longtable: bool = False,
|
||
|
encoding: str | None = None,
|
||
|
multicolumn: bool = False,
|
||
|
multicolumn_format: str | None = None,
|
||
|
multirow: bool = False,
|
||
|
caption: str | tuple[str, str] | None = None,
|
||
|
label: str | None = None,
|
||
|
position: str | None = None,
|
||
|
) -> str | None:
|
||
|
"""
|
||
|
Render a DataFrame to a LaTeX tabular/longtable environment output.
|
||
|
"""
|
||
|
from pandas.io.formats.latex import LatexFormatter
|
||
|
|
||
|
latex_formatter = LatexFormatter(
|
||
|
self.fmt,
|
||
|
longtable=longtable,
|
||
|
column_format=column_format,
|
||
|
multicolumn=multicolumn,
|
||
|
multicolumn_format=multicolumn_format,
|
||
|
multirow=multirow,
|
||
|
caption=caption,
|
||
|
label=label,
|
||
|
position=position,
|
||
|
)
|
||
|
string = latex_formatter.to_string()
|
||
|
return save_to_buffer(string, buf=buf, encoding=encoding)
|
||
|
|
||
|
def to_html(
|
||
|
self,
|
||
|
buf: FilePath | WriteBuffer[str] | None = None,
|
||
|
encoding: str | None = None,
|
||
|
classes: str | list | tuple | None = None,
|
||
|
notebook: bool = False,
|
||
|
border: int | bool | None = None,
|
||
|
table_id: str | None = None,
|
||
|
render_links: bool = False,
|
||
|
) -> str | None:
|
||
|
"""
|
||
|
Render a DataFrame to a html table.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
buf : str, path object, file-like object, or None, default None
|
||
|
String, path object (implementing ``os.PathLike[str]``), or file-like
|
||
|
object implementing a string ``write()`` function. If None, the result is
|
||
|
returned as a string.
|
||
|
encoding : str, default “utf-8”
|
||
|
Set character encoding.
|
||
|
classes : str or list-like
|
||
|
classes to include in the `class` attribute of the opening
|
||
|
``<table>`` tag, in addition to the default "dataframe".
|
||
|
notebook : {True, False}, optional, default False
|
||
|
Whether the generated HTML is for IPython Notebook.
|
||
|
border : int
|
||
|
A ``border=border`` attribute is included in the opening
|
||
|
``<table>`` tag. Default ``pd.options.display.html.border``.
|
||
|
table_id : str, optional
|
||
|
A css id is included in the opening `<table>` tag if specified.
|
||
|
render_links : bool, default False
|
||
|
Convert URLs to HTML links.
|
||
|
"""
|
||
|
from pandas.io.formats.html import (
|
||
|
HTMLFormatter,
|
||
|
NotebookFormatter,
|
||
|
)
|
||
|
|
||
|
Klass = NotebookFormatter if notebook else HTMLFormatter
|
||
|
|
||
|
html_formatter = Klass(
|
||
|
self.fmt,
|
||
|
classes=classes,
|
||
|
border=border,
|
||
|
table_id=table_id,
|
||
|
render_links=render_links,
|
||
|
)
|
||
|
string = html_formatter.to_string()
|
||
|
return save_to_buffer(string, buf=buf, encoding=encoding)
|
||
|
|
||
|
def to_string(
|
||
|
self,
|
||
|
buf: FilePath | WriteBuffer[str] | None = None,
|
||
|
encoding: str | None = None,
|
||
|
line_width: int | None = None,
|
||
|
) -> str | None:
|
||
|
"""
|
||
|
Render a DataFrame to a console-friendly tabular output.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
buf : str, path object, file-like object, or None, default None
|
||
|
String, path object (implementing ``os.PathLike[str]``), or file-like
|
||
|
object implementing a string ``write()`` function. If None, the result is
|
||
|
returned as a string.
|
||
|
encoding: str, default “utf-8”
|
||
|
Set character encoding.
|
||
|
line_width : int, optional
|
||
|
Width to wrap a line in characters.
|
||
|
"""
|
||
|
from pandas.io.formats.string import StringFormatter
|
||
|
|
||
|
string_formatter = StringFormatter(self.fmt, line_width=line_width)
|
||
|
string = string_formatter.to_string()
|
||
|
return save_to_buffer(string, buf=buf, encoding=encoding)
|
||
|
|
||
|
@deprecate_kwarg(old_arg_name="line_terminator", new_arg_name="lineterminator")
|
||
|
def to_csv(
|
||
|
self,
|
||
|
path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
|
||
|
encoding: str | None = None,
|
||
|
sep: str = ",",
|
||
|
columns: Sequence[Hashable] | None = None,
|
||
|
index_label: IndexLabel | None = None,
|
||
|
mode: str = "w",
|
||
|
compression: CompressionOptions = "infer",
|
||
|
quoting: int | None = None,
|
||
|
quotechar: str = '"',
|
||
|
lineterminator: str | None = None,
|
||
|
chunksize: int | None = None,
|
||
|
date_format: str | None = None,
|
||
|
doublequote: bool = True,
|
||
|
escapechar: str | None = None,
|
||
|
errors: str = "strict",
|
||
|
storage_options: StorageOptions = None,
|
||
|
) -> str | None:
|
||
|
"""
|
||
|
Render dataframe as comma-separated file.
|
||
|
"""
|
||
|
from pandas.io.formats.csvs import CSVFormatter
|
||
|
|
||
|
if path_or_buf is None:
|
||
|
created_buffer = True
|
||
|
path_or_buf = StringIO()
|
||
|
else:
|
||
|
created_buffer = False
|
||
|
|
||
|
csv_formatter = CSVFormatter(
|
||
|
path_or_buf=path_or_buf,
|
||
|
lineterminator=lineterminator,
|
||
|
sep=sep,
|
||
|
encoding=encoding,
|
||
|
errors=errors,
|
||
|
compression=compression,
|
||
|
quoting=quoting,
|
||
|
cols=columns,
|
||
|
index_label=index_label,
|
||
|
mode=mode,
|
||
|
chunksize=chunksize,
|
||
|
quotechar=quotechar,
|
||
|
date_format=date_format,
|
||
|
doublequote=doublequote,
|
||
|
escapechar=escapechar,
|
||
|
storage_options=storage_options,
|
||
|
formatter=self.fmt,
|
||
|
)
|
||
|
csv_formatter.save()
|
||
|
|
||
|
if created_buffer:
|
||
|
assert isinstance(path_or_buf, StringIO)
|
||
|
content = path_or_buf.getvalue()
|
||
|
path_or_buf.close()
|
||
|
return content
|
||
|
|
||
|
return None
|
||
|
|
||
|
|
||
|
def save_to_buffer(
|
||
|
string: str,
|
||
|
buf: FilePath | WriteBuffer[str] | None = None,
|
||
|
encoding: str | None = None,
|
||
|
) -> str | None:
|
||
|
"""
|
||
|
Perform serialization. Write to buf or return as string if buf is None.
|
||
|
"""
|
||
|
with get_buffer(buf, encoding=encoding) as f:
|
||
|
f.write(string)
|
||
|
if buf is None:
|
||
|
# error: "WriteBuffer[str]" has no attribute "getvalue"
|
||
|
return f.getvalue() # type: ignore[attr-defined]
|
||
|
return None
|
||
|
|
||
|
|
||
|
@contextmanager
|
||
|
def get_buffer(
|
||
|
buf: FilePath | WriteBuffer[str] | None, encoding: str | None = None
|
||
|
) -> Iterator[WriteBuffer[str]] | Iterator[StringIO]:
|
||
|
"""
|
||
|
Context manager to open, yield and close buffer for filenames or Path-like
|
||
|
objects, otherwise yield buf unchanged.
|
||
|
"""
|
||
|
if buf is not None:
|
||
|
buf = stringify_path(buf)
|
||
|
else:
|
||
|
buf = StringIO()
|
||
|
|
||
|
if encoding is None:
|
||
|
encoding = "utf-8"
|
||
|
elif not isinstance(buf, str):
|
||
|
raise ValueError("buf is not a file name and encoding is specified.")
|
||
|
|
||
|
if hasattr(buf, "write"):
|
||
|
yield buf
|
||
|
elif isinstance(buf, str):
|
||
|
check_parent_directory(str(buf))
|
||
|
with open(buf, "w", encoding=encoding, newline="") as f:
|
||
|
# GH#30034 open instead of codecs.open prevents a file leak
|
||
|
# if we have an invalid encoding argument.
|
||
|
# newline="" is needed to roundtrip correctly on
|
||
|
# windows test_to_latex_filename
|
||
|
yield f
|
||
|
else:
|
||
|
raise TypeError("buf is not a file name and it has no write method")
|
||
|
|
||
|
|
||
|
# ----------------------------------------------------------------------
|
||
|
# Array formatters
|
||
|
|
||
|
|
||
|
def format_array(
|
||
|
values: Any,
|
||
|
formatter: Callable | None,
|
||
|
float_format: FloatFormatType | None = None,
|
||
|
na_rep: str = "NaN",
|
||
|
digits: int | None = None,
|
||
|
space: str | int | None = None,
|
||
|
justify: str = "right",
|
||
|
decimal: str = ".",
|
||
|
leading_space: bool | None = True,
|
||
|
quoting: int | None = None,
|
||
|
) -> list[str]:
|
||
|
"""
|
||
|
Format an array for printing.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
values
|
||
|
formatter
|
||
|
float_format
|
||
|
na_rep
|
||
|
digits
|
||
|
space
|
||
|
justify
|
||
|
decimal
|
||
|
leading_space : bool, optional, default True
|
||
|
Whether the array should be formatted with a leading space.
|
||
|
When an array as a column of a Series or DataFrame, we do want
|
||
|
the leading space to pad between columns.
|
||
|
|
||
|
When formatting an Index subclass
|
||
|
(e.g. IntervalIndex._format_native_types), we don't want the
|
||
|
leading space since it should be left-aligned.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
List[str]
|
||
|
"""
|
||
|
fmt_klass: type[GenericArrayFormatter]
|
||
|
if is_datetime64_dtype(values.dtype):
|
||
|
fmt_klass = Datetime64Formatter
|
||
|
elif isinstance(values.dtype, DatetimeTZDtype):
|
||
|
fmt_klass = Datetime64TZFormatter
|
||
|
elif is_timedelta64_dtype(values.dtype):
|
||
|
fmt_klass = Timedelta64Formatter
|
||
|
elif is_extension_array_dtype(values.dtype):
|
||
|
fmt_klass = ExtensionArrayFormatter
|
||
|
elif is_float_dtype(values.dtype) or is_complex_dtype(values.dtype):
|
||
|
fmt_klass = FloatArrayFormatter
|
||
|
elif is_integer_dtype(values.dtype):
|
||
|
fmt_klass = IntArrayFormatter
|
||
|
else:
|
||
|
fmt_klass = GenericArrayFormatter
|
||
|
|
||
|
if space is None:
|
||
|
space = 12
|
||
|
|
||
|
if float_format is None:
|
||
|
float_format = get_option("display.float_format")
|
||
|
|
||
|
if digits is None:
|
||
|
digits = get_option("display.precision")
|
||
|
|
||
|
fmt_obj = fmt_klass(
|
||
|
values,
|
||
|
digits=digits,
|
||
|
na_rep=na_rep,
|
||
|
float_format=float_format,
|
||
|
formatter=formatter,
|
||
|
space=space,
|
||
|
justify=justify,
|
||
|
decimal=decimal,
|
||
|
leading_space=leading_space,
|
||
|
quoting=quoting,
|
||
|
)
|
||
|
|
||
|
return fmt_obj.get_result()
|
||
|
|
||
|
|
||
|
class GenericArrayFormatter:
|
||
|
def __init__(
|
||
|
self,
|
||
|
values: Any,
|
||
|
digits: int = 7,
|
||
|
formatter: Callable | None = None,
|
||
|
na_rep: str = "NaN",
|
||
|
space: str | int = 12,
|
||
|
float_format: FloatFormatType | None = None,
|
||
|
justify: str = "right",
|
||
|
decimal: str = ".",
|
||
|
quoting: int | None = None,
|
||
|
fixed_width: bool = True,
|
||
|
leading_space: bool | None = True,
|
||
|
) -> None:
|
||
|
self.values = values
|
||
|
self.digits = digits
|
||
|
self.na_rep = na_rep
|
||
|
self.space = space
|
||
|
self.formatter = formatter
|
||
|
self.float_format = float_format
|
||
|
self.justify = justify
|
||
|
self.decimal = decimal
|
||
|
self.quoting = quoting
|
||
|
self.fixed_width = fixed_width
|
||
|
self.leading_space = leading_space
|
||
|
|
||
|
def get_result(self) -> list[str]:
|
||
|
fmt_values = self._format_strings()
|
||
|
return _make_fixed_width(fmt_values, self.justify)
|
||
|
|
||
|
def _format_strings(self) -> list[str]:
|
||
|
if self.float_format is None:
|
||
|
float_format = get_option("display.float_format")
|
||
|
if float_format is None:
|
||
|
precision = get_option("display.precision")
|
||
|
float_format = lambda x: _trim_zeros_single_float(
|
||
|
f"{x: .{precision:d}f}"
|
||
|
)
|
||
|
else:
|
||
|
float_format = self.float_format
|
||
|
|
||
|
if self.formatter is not None:
|
||
|
formatter = self.formatter
|
||
|
else:
|
||
|
quote_strings = self.quoting is not None and self.quoting != QUOTE_NONE
|
||
|
formatter = partial(
|
||
|
pprint_thing,
|
||
|
escape_chars=("\t", "\r", "\n"),
|
||
|
quote_strings=quote_strings,
|
||
|
)
|
||
|
|
||
|
def _format(x):
|
||
|
if self.na_rep is not None and is_scalar(x) and isna(x):
|
||
|
try:
|
||
|
# try block for np.isnat specifically
|
||
|
# determine na_rep if x is None or NaT-like
|
||
|
if x is None:
|
||
|
return "None"
|
||
|
elif x is NA:
|
||
|
return str(NA)
|
||
|
elif x is NaT or np.isnat(x):
|
||
|
return "NaT"
|
||
|
except (TypeError, ValueError):
|
||
|
# np.isnat only handles datetime or timedelta objects
|
||
|
pass
|
||
|
return self.na_rep
|
||
|
elif isinstance(x, PandasObject):
|
||
|
return str(x)
|
||
|
else:
|
||
|
# object dtype
|
||
|
return str(formatter(x))
|
||
|
|
||
|
vals = extract_array(self.values, extract_numpy=True)
|
||
|
if not isinstance(vals, np.ndarray):
|
||
|
raise TypeError(
|
||
|
"ExtensionArray formatting should use ExtensionArrayFormatter"
|
||
|
)
|
||
|
inferred = lib.map_infer(vals, is_float)
|
||
|
is_float_type = (
|
||
|
inferred
|
||
|
# vals may have 2 or more dimensions
|
||
|
& np.all(notna(vals), axis=tuple(range(1, len(vals.shape))))
|
||
|
)
|
||
|
leading_space = self.leading_space
|
||
|
if leading_space is None:
|
||
|
leading_space = is_float_type.any()
|
||
|
|
||
|
fmt_values = []
|
||
|
for i, v in enumerate(vals):
|
||
|
if not is_float_type[i] and leading_space:
|
||
|
fmt_values.append(f" {_format(v)}")
|
||
|
elif is_float_type[i]:
|
||
|
fmt_values.append(float_format(v))
|
||
|
else:
|
||
|
if leading_space is False:
|
||
|
# False specifically, so that the default is
|
||
|
# to include a space if we get here.
|
||
|
tpl = "{v}"
|
||
|
else:
|
||
|
tpl = " {v}"
|
||
|
fmt_values.append(tpl.format(v=_format(v)))
|
||
|
|
||
|
return fmt_values
|
||
|
|
||
|
|
||
|
class FloatArrayFormatter(GenericArrayFormatter):
|
||
|
def __init__(self, *args, **kwargs) -> None:
|
||
|
super().__init__(*args, **kwargs)
|
||
|
|
||
|
# float_format is expected to be a string
|
||
|
# formatter should be used to pass a function
|
||
|
if self.float_format is not None and self.formatter is None:
|
||
|
# GH21625, GH22270
|
||
|
self.fixed_width = False
|
||
|
if callable(self.float_format):
|
||
|
self.formatter = self.float_format
|
||
|
self.float_format = None
|
||
|
|
||
|
def _value_formatter(
|
||
|
self,
|
||
|
float_format: FloatFormatType | None = None,
|
||
|
threshold: float | None = None,
|
||
|
) -> Callable:
|
||
|
"""Returns a function to be applied on each value to format it"""
|
||
|
# the float_format parameter supersedes self.float_format
|
||
|
if float_format is None:
|
||
|
float_format = self.float_format
|
||
|
|
||
|
# we are going to compose different functions, to first convert to
|
||
|
# a string, then replace the decimal symbol, and finally chop according
|
||
|
# to the threshold
|
||
|
|
||
|
# when there is no float_format, we use str instead of '%g'
|
||
|
# because str(0.0) = '0.0' while '%g' % 0.0 = '0'
|
||
|
if float_format:
|
||
|
|
||
|
def base_formatter(v):
|
||
|
assert float_format is not None # for mypy
|
||
|
# error: "str" not callable
|
||
|
# error: Unexpected keyword argument "value" for "__call__" of
|
||
|
# "EngFormatter"
|
||
|
return (
|
||
|
float_format(value=v) # type: ignore[operator,call-arg]
|
||
|
if notna(v)
|
||
|
else self.na_rep
|
||
|
)
|
||
|
|
||
|
else:
|
||
|
|
||
|
def base_formatter(v):
|
||
|
return str(v) if notna(v) else self.na_rep
|
||
|
|
||
|
if self.decimal != ".":
|
||
|
|
||
|
def decimal_formatter(v):
|
||
|
return base_formatter(v).replace(".", self.decimal, 1)
|
||
|
|
||
|
else:
|
||
|
decimal_formatter = base_formatter
|
||
|
|
||
|
if threshold is None:
|
||
|
return decimal_formatter
|
||
|
|
||
|
def formatter(value):
|
||
|
if notna(value):
|
||
|
if abs(value) > threshold:
|
||
|
return decimal_formatter(value)
|
||
|
else:
|
||
|
return decimal_formatter(0.0)
|
||
|
else:
|
||
|
return self.na_rep
|
||
|
|
||
|
return formatter
|
||
|
|
||
|
def get_result_as_array(self) -> np.ndarray:
|
||
|
"""
|
||
|
Returns the float values converted into strings using
|
||
|
the parameters given at initialisation, as a numpy array
|
||
|
"""
|
||
|
|
||
|
def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: str):
|
||
|
mask = isna(values)
|
||
|
formatted = np.array(
|
||
|
[
|
||
|
formatter(val) if not m else na_rep
|
||
|
for val, m in zip(values.ravel(), mask.ravel())
|
||
|
]
|
||
|
).reshape(values.shape)
|
||
|
return formatted
|
||
|
|
||
|
if self.formatter is not None:
|
||
|
return format_with_na_rep(self.values, self.formatter, self.na_rep)
|
||
|
|
||
|
if self.fixed_width:
|
||
|
threshold = get_option("display.chop_threshold")
|
||
|
else:
|
||
|
threshold = None
|
||
|
|
||
|
# if we have a fixed_width, we'll need to try different float_format
|
||
|
def format_values_with(float_format):
|
||
|
formatter = self._value_formatter(float_format, threshold)
|
||
|
|
||
|
# default formatter leaves a space to the left when formatting
|
||
|
# floats, must be consistent for left-justifying NaNs (GH #25061)
|
||
|
if self.justify == "left":
|
||
|
na_rep = " " + self.na_rep
|
||
|
else:
|
||
|
na_rep = self.na_rep
|
||
|
|
||
|
# separate the wheat from the chaff
|
||
|
values = self.values
|
||
|
is_complex = is_complex_dtype(values)
|
||
|
values = format_with_na_rep(values, formatter, na_rep)
|
||
|
|
||
|
if self.fixed_width:
|
||
|
if is_complex:
|
||
|
result = _trim_zeros_complex(values, self.decimal)
|
||
|
else:
|
||
|
result = _trim_zeros_float(values, self.decimal)
|
||
|
return np.asarray(result, dtype="object")
|
||
|
|
||
|
return values
|
||
|
|
||
|
# There is a special default string when we are fixed-width
|
||
|
# The default is otherwise to use str instead of a formatting string
|
||
|
float_format: FloatFormatType | None
|
||
|
if self.float_format is None:
|
||
|
if self.fixed_width:
|
||
|
if self.leading_space is True:
|
||
|
fmt_str = "{value: .{digits:d}f}"
|
||
|
else:
|
||
|
fmt_str = "{value:.{digits:d}f}"
|
||
|
float_format = partial(fmt_str.format, digits=self.digits)
|
||
|
else:
|
||
|
float_format = self.float_format
|
||
|
else:
|
||
|
float_format = lambda value: self.float_format % value
|
||
|
|
||
|
formatted_values = format_values_with(float_format)
|
||
|
|
||
|
if not self.fixed_width:
|
||
|
return formatted_values
|
||
|
|
||
|
# we need do convert to engineering format if some values are too small
|
||
|
# and would appear as 0, or if some values are too big and take too
|
||
|
# much space
|
||
|
|
||
|
if len(formatted_values) > 0:
|
||
|
maxlen = max(len(x) for x in formatted_values)
|
||
|
too_long = maxlen > self.digits + 6
|
||
|
else:
|
||
|
too_long = False
|
||
|
|
||
|
with np.errstate(invalid="ignore"):
|
||
|
abs_vals = np.abs(self.values)
|
||
|
# this is pretty arbitrary for now
|
||
|
# large values: more that 8 characters including decimal symbol
|
||
|
# and first digit, hence > 1e6
|
||
|
has_large_values = (abs_vals > 1e6).any()
|
||
|
has_small_values = (
|
||
|
(abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)
|
||
|
).any()
|
||
|
|
||
|
if has_small_values or (too_long and has_large_values):
|
||
|
if self.leading_space is True:
|
||
|
fmt_str = "{value: .{digits:d}e}"
|
||
|
else:
|
||
|
fmt_str = "{value:.{digits:d}e}"
|
||
|
float_format = partial(fmt_str.format, digits=self.digits)
|
||
|
formatted_values = format_values_with(float_format)
|
||
|
|
||
|
return formatted_values
|
||
|
|
||
|
def _format_strings(self) -> list[str]:
|
||
|
return list(self.get_result_as_array())
|
||
|
|
||
|
|
||
|
class IntArrayFormatter(GenericArrayFormatter):
|
||
|
def _format_strings(self) -> list[str]:
|
||
|
if self.leading_space is False:
|
||
|
formatter_str = lambda x: f"{x:d}".format(x=x)
|
||
|
else:
|
||
|
formatter_str = lambda x: f"{x: d}".format(x=x)
|
||
|
formatter = self.formatter or formatter_str
|
||
|
fmt_values = [formatter(x) for x in self.values]
|
||
|
return fmt_values
|
||
|
|
||
|
|
||
|
class Datetime64Formatter(GenericArrayFormatter):
|
||
|
def __init__(
|
||
|
self,
|
||
|
values: np.ndarray | Series | DatetimeIndex | DatetimeArray,
|
||
|
nat_rep: str = "NaT",
|
||
|
date_format: None = None,
|
||
|
**kwargs,
|
||
|
) -> None:
|
||
|
super().__init__(values, **kwargs)
|
||
|
self.nat_rep = nat_rep
|
||
|
self.date_format = date_format
|
||
|
|
||
|
def _format_strings(self) -> list[str]:
|
||
|
"""we by definition have DO NOT have a TZ"""
|
||
|
values = self.values
|
||
|
|
||
|
if not isinstance(values, DatetimeIndex):
|
||
|
values = DatetimeIndex(values)
|
||
|
|
||
|
if self.formatter is not None and callable(self.formatter):
|
||
|
return [self.formatter(x) for x in values]
|
||
|
|
||
|
fmt_values = values._data._format_native_types(
|
||
|
na_rep=self.nat_rep, date_format=self.date_format
|
||
|
)
|
||
|
return fmt_values.tolist()
|
||
|
|
||
|
|
||
|
class ExtensionArrayFormatter(GenericArrayFormatter):
|
||
|
def _format_strings(self) -> list[str]:
|
||
|
values = extract_array(self.values, extract_numpy=True)
|
||
|
|
||
|
formatter = self.formatter
|
||
|
if formatter is None:
|
||
|
formatter = values._formatter(boxed=True)
|
||
|
|
||
|
if isinstance(values, Categorical):
|
||
|
# Categorical is special for now, so that we can preserve tzinfo
|
||
|
array = values._internal_get_values()
|
||
|
else:
|
||
|
array = np.asarray(values)
|
||
|
|
||
|
fmt_values = format_array(
|
||
|
array,
|
||
|
formatter,
|
||
|
float_format=self.float_format,
|
||
|
na_rep=self.na_rep,
|
||
|
digits=self.digits,
|
||
|
space=self.space,
|
||
|
justify=self.justify,
|
||
|
decimal=self.decimal,
|
||
|
leading_space=self.leading_space,
|
||
|
quoting=self.quoting,
|
||
|
)
|
||
|
return fmt_values
|
||
|
|
||
|
|
||
|
def format_percentiles(
|
||
|
percentiles: (np.ndarray | Sequence[float]),
|
||
|
) -> list[str]:
|
||
|
"""
|
||
|
Outputs rounded and formatted percentiles.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
percentiles : list-like, containing floats from interval [0,1]
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
formatted : list of strings
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
Rounding precision is chosen so that: (1) if any two elements of
|
||
|
``percentiles`` differ, they remain different after rounding
|
||
|
(2) no entry is *rounded* to 0% or 100%.
|
||
|
Any non-integer is always rounded to at least 1 decimal place.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
Keeps all entries different after rounding:
|
||
|
|
||
|
>>> format_percentiles([0.01999, 0.02001, 0.5, 0.666666, 0.9999])
|
||
|
['1.999%', '2.001%', '50%', '66.667%', '99.99%']
|
||
|
|
||
|
No element is rounded to 0% or 100% (unless already equal to it).
|
||
|
Duplicates are allowed:
|
||
|
|
||
|
>>> format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999])
|
||
|
['0%', '50%', '2.0%', '50%', '66.67%', '99.99%']
|
||
|
"""
|
||
|
percentiles = np.asarray(percentiles)
|
||
|
|
||
|
# It checks for np.NaN as well
|
||
|
with np.errstate(invalid="ignore"):
|
||
|
if (
|
||
|
not is_numeric_dtype(percentiles)
|
||
|
or not np.all(percentiles >= 0)
|
||
|
or not np.all(percentiles <= 1)
|
||
|
):
|
||
|
raise ValueError("percentiles should all be in the interval [0,1]")
|
||
|
|
||
|
percentiles = 100 * percentiles
|
||
|
|
||
|
int_idx = np.isclose(percentiles.astype(int), percentiles)
|
||
|
|
||
|
if np.all(int_idx):
|
||
|
out = percentiles.astype(int).astype(str)
|
||
|
return [i + "%" for i in out]
|
||
|
|
||
|
unique_pcts = np.unique(percentiles)
|
||
|
to_begin = unique_pcts[0] if unique_pcts[0] > 0 else None
|
||
|
to_end = 100 - unique_pcts[-1] if unique_pcts[-1] < 100 else None
|
||
|
|
||
|
# Least precision that keeps percentiles unique after rounding
|
||
|
prec = -np.floor(
|
||
|
np.log10(np.min(np.ediff1d(unique_pcts, to_begin=to_begin, to_end=to_end)))
|
||
|
).astype(int)
|
||
|
prec = max(1, prec)
|
||
|
out = np.empty_like(percentiles, dtype=object)
|
||
|
out[int_idx] = percentiles[int_idx].astype(int).astype(str)
|
||
|
|
||
|
out[~int_idx] = percentiles[~int_idx].round(prec).astype(str)
|
||
|
return [i + "%" for i in out]
|
||
|
|
||
|
|
||
|
def is_dates_only(values: np.ndarray | DatetimeArray | Index | DatetimeIndex) -> bool:
|
||
|
# return a boolean if we are only dates (and don't have a timezone)
|
||
|
if not isinstance(values, Index):
|
||
|
values = values.ravel()
|
||
|
|
||
|
if not isinstance(values, (DatetimeArray, DatetimeIndex)):
|
||
|
values = DatetimeIndex(values)
|
||
|
|
||
|
if values.tz is not None:
|
||
|
return False
|
||
|
|
||
|
values_int = values.asi8
|
||
|
consider_values = values_int != iNaT
|
||
|
# error: Argument 1 to "py_get_unit_from_dtype" has incompatible type
|
||
|
# "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]"
|
||
|
reso = get_unit_from_dtype(values.dtype) # type: ignore[arg-type]
|
||
|
ppd = periods_per_day(reso)
|
||
|
|
||
|
# TODO: can we reuse is_date_array_normalized? would need a skipna kwd
|
||
|
even_days = np.logical_and(consider_values, values_int % ppd != 0).sum() == 0
|
||
|
if even_days:
|
||
|
return True
|
||
|
return False
|
||
|
|
||
|
|
||
|
def _format_datetime64(x: NaTType | Timestamp, nat_rep: str = "NaT") -> str:
|
||
|
if x is NaT:
|
||
|
return nat_rep
|
||
|
|
||
|
# Timestamp.__str__ falls back to datetime.datetime.__str__ = isoformat(sep=' ')
|
||
|
# so it already uses string formatting rather than strftime (faster).
|
||
|
return str(x)
|
||
|
|
||
|
|
||
|
def _format_datetime64_dateonly(
|
||
|
x: NaTType | Timestamp,
|
||
|
nat_rep: str = "NaT",
|
||
|
date_format: str | None = None,
|
||
|
) -> str:
|
||
|
if isinstance(x, NaTType):
|
||
|
return nat_rep
|
||
|
|
||
|
if date_format:
|
||
|
return x.strftime(date_format)
|
||
|
else:
|
||
|
# Timestamp._date_repr relies on string formatting (faster than strftime)
|
||
|
return x._date_repr
|
||
|
|
||
|
|
||
|
def get_format_datetime64(
|
||
|
is_dates_only: bool, nat_rep: str = "NaT", date_format: str | None = None
|
||
|
) -> Callable:
|
||
|
"""Return a formatter callable taking a datetime64 as input and providing
|
||
|
a string as output"""
|
||
|
|
||
|
if is_dates_only:
|
||
|
return lambda x: _format_datetime64_dateonly(
|
||
|
x, nat_rep=nat_rep, date_format=date_format
|
||
|
)
|
||
|
else:
|
||
|
return lambda x: _format_datetime64(x, nat_rep=nat_rep)
|
||
|
|
||
|
|
||
|
def get_format_datetime64_from_values(
|
||
|
values: np.ndarray | DatetimeArray | DatetimeIndex, date_format: str | None
|
||
|
) -> str | None:
|
||
|
"""given values and a date_format, return a string format"""
|
||
|
if isinstance(values, np.ndarray) and values.ndim > 1:
|
||
|
# We don't actually care about the order of values, and DatetimeIndex
|
||
|
# only accepts 1D values
|
||
|
values = values.ravel()
|
||
|
|
||
|
ido = is_dates_only(values)
|
||
|
if ido:
|
||
|
# Only dates and no timezone: provide a default format
|
||
|
return date_format or "%Y-%m-%d"
|
||
|
return date_format
|
||
|
|
||
|
|
||
|
class Datetime64TZFormatter(Datetime64Formatter):
|
||
|
def _format_strings(self) -> list[str]:
|
||
|
"""we by definition have a TZ"""
|
||
|
values = self.values.astype(object)
|
||
|
ido = is_dates_only(values)
|
||
|
formatter = self.formatter or get_format_datetime64(
|
||
|
ido, date_format=self.date_format
|
||
|
)
|
||
|
fmt_values = [formatter(x) for x in values]
|
||
|
|
||
|
return fmt_values
|
||
|
|
||
|
|
||
|
class Timedelta64Formatter(GenericArrayFormatter):
|
||
|
def __init__(
|
||
|
self,
|
||
|
values: np.ndarray | TimedeltaIndex,
|
||
|
nat_rep: str = "NaT",
|
||
|
box: bool = False,
|
||
|
**kwargs,
|
||
|
) -> None:
|
||
|
super().__init__(values, **kwargs)
|
||
|
self.nat_rep = nat_rep
|
||
|
self.box = box
|
||
|
|
||
|
def _format_strings(self) -> list[str]:
|
||
|
formatter = self.formatter or get_format_timedelta64(
|
||
|
self.values, nat_rep=self.nat_rep, box=self.box
|
||
|
)
|
||
|
return [formatter(x) for x in self.values]
|
||
|
|
||
|
|
||
|
def get_format_timedelta64(
|
||
|
values: np.ndarray | TimedeltaIndex | TimedeltaArray,
|
||
|
nat_rep: str = "NaT",
|
||
|
box: bool = False,
|
||
|
) -> Callable:
|
||
|
"""
|
||
|
Return a formatter function for a range of timedeltas.
|
||
|
These will all have the same format argument
|
||
|
|
||
|
If box, then show the return in quotes
|
||
|
"""
|
||
|
values_int = values.view(np.int64)
|
||
|
|
||
|
consider_values = values_int != iNaT
|
||
|
|
||
|
one_day_nanos = 86400 * 10**9
|
||
|
# error: Unsupported operand types for % ("ExtensionArray" and "int")
|
||
|
not_midnight = values_int % one_day_nanos != 0 # type: ignore[operator]
|
||
|
# error: Argument 1 to "__call__" of "ufunc" has incompatible type
|
||
|
# "Union[Any, ExtensionArray, ndarray]"; expected
|
||
|
# "Union[Union[int, float, complex, str, bytes, generic],
|
||
|
# Sequence[Union[int, float, complex, str, bytes, generic]],
|
||
|
# Sequence[Sequence[Any]], _SupportsArray]"
|
||
|
both = np.logical_and(consider_values, not_midnight) # type: ignore[arg-type]
|
||
|
even_days = both.sum() == 0
|
||
|
|
||
|
if even_days:
|
||
|
format = None
|
||
|
else:
|
||
|
format = "long"
|
||
|
|
||
|
def _formatter(x):
|
||
|
if x is None or (is_scalar(x) and isna(x)):
|
||
|
return nat_rep
|
||
|
|
||
|
if not isinstance(x, Timedelta):
|
||
|
x = Timedelta(x)
|
||
|
|
||
|
# Timedelta._repr_base uses string formatting (faster than strftime)
|
||
|
result = x._repr_base(format=format)
|
||
|
if box:
|
||
|
result = f"'{result}'"
|
||
|
return result
|
||
|
|
||
|
return _formatter
|
||
|
|
||
|
|
||
|
def _make_fixed_width(
|
||
|
strings: list[str],
|
||
|
justify: str = "right",
|
||
|
minimum: int | None = None,
|
||
|
adj: TextAdjustment | None = None,
|
||
|
) -> list[str]:
|
||
|
|
||
|
if len(strings) == 0 or justify == "all":
|
||
|
return strings
|
||
|
|
||
|
if adj is None:
|
||
|
adjustment = get_adjustment()
|
||
|
else:
|
||
|
adjustment = adj
|
||
|
|
||
|
max_len = max(adjustment.len(x) for x in strings)
|
||
|
|
||
|
if minimum is not None:
|
||
|
max_len = max(minimum, max_len)
|
||
|
|
||
|
conf_max = get_option("display.max_colwidth")
|
||
|
if conf_max is not None and max_len > conf_max:
|
||
|
max_len = conf_max
|
||
|
|
||
|
def just(x: str) -> str:
|
||
|
if conf_max is not None:
|
||
|
if (conf_max > 3) & (adjustment.len(x) > max_len):
|
||
|
x = x[: max_len - 3] + "..."
|
||
|
return x
|
||
|
|
||
|
strings = [just(x) for x in strings]
|
||
|
result = adjustment.justify(strings, max_len, mode=justify)
|
||
|
return result
|
||
|
|
||
|
|
||
|
def _trim_zeros_complex(str_complexes: np.ndarray, decimal: str = ".") -> list[str]:
|
||
|
"""
|
||
|
Separates the real and imaginary parts from the complex number, and
|
||
|
executes the _trim_zeros_float method on each of those.
|
||
|
"""
|
||
|
trimmed = [
|
||
|
"".join(_trim_zeros_float(re.split(r"([j+-])", x), decimal))
|
||
|
for x in str_complexes
|
||
|
]
|
||
|
|
||
|
# pad strings to the length of the longest trimmed string for alignment
|
||
|
lengths = [len(s) for s in trimmed]
|
||
|
max_length = max(lengths)
|
||
|
padded = [
|
||
|
s[: -((k - 1) // 2 + 1)] # real part
|
||
|
+ (max_length - k) // 2 * "0"
|
||
|
+ s[-((k - 1) // 2 + 1) : -((k - 1) // 2)] # + / -
|
||
|
+ s[-((k - 1) // 2) : -1] # imaginary part
|
||
|
+ (max_length - k) // 2 * "0"
|
||
|
+ s[-1]
|
||
|
for s, k in zip(trimmed, lengths)
|
||
|
]
|
||
|
return padded
|
||
|
|
||
|
|
||
|
def _trim_zeros_single_float(str_float: str) -> str:
|
||
|
"""
|
||
|
Trims trailing zeros after a decimal point,
|
||
|
leaving just one if necessary.
|
||
|
"""
|
||
|
str_float = str_float.rstrip("0")
|
||
|
if str_float.endswith("."):
|
||
|
str_float += "0"
|
||
|
|
||
|
return str_float
|
||
|
|
||
|
|
||
|
def _trim_zeros_float(
|
||
|
str_floats: np.ndarray | list[str], decimal: str = "."
|
||
|
) -> list[str]:
|
||
|
"""
|
||
|
Trims the maximum number of trailing zeros equally from
|
||
|
all numbers containing decimals, leaving just one if
|
||
|
necessary.
|
||
|
"""
|
||
|
trimmed = str_floats
|
||
|
number_regex = re.compile(rf"^\s*[\+-]?[0-9]+\{decimal}[0-9]*$")
|
||
|
|
||
|
def is_number_with_decimal(x):
|
||
|
return re.match(number_regex, x) is not None
|
||
|
|
||
|
def should_trim(values: np.ndarray | list[str]) -> bool:
|
||
|
"""
|
||
|
Determine if an array of strings should be trimmed.
|
||
|
|
||
|
Returns True if all numbers containing decimals (defined by the
|
||
|
above regular expression) within the array end in a zero, otherwise
|
||
|
returns False.
|
||
|
"""
|
||
|
numbers = [x for x in values if is_number_with_decimal(x)]
|
||
|
return len(numbers) > 0 and all(x.endswith("0") for x in numbers)
|
||
|
|
||
|
while should_trim(trimmed):
|
||
|
trimmed = [x[:-1] if is_number_with_decimal(x) else x for x in trimmed]
|
||
|
|
||
|
# leave one 0 after the decimal points if need be.
|
||
|
result = [
|
||
|
x + "0" if is_number_with_decimal(x) and x.endswith(decimal) else x
|
||
|
for x in trimmed
|
||
|
]
|
||
|
return result
|
||
|
|
||
|
|
||
|
def _has_names(index: Index) -> bool:
|
||
|
if isinstance(index, MultiIndex):
|
||
|
return com.any_not_none(*index.names)
|
||
|
else:
|
||
|
return index.name is not None
|
||
|
|
||
|
|
||
|
class EngFormatter:
|
||
|
"""
|
||
|
Formats float values according to engineering format.
|
||
|
|
||
|
Based on matplotlib.ticker.EngFormatter
|
||
|
"""
|
||
|
|
||
|
# The SI engineering prefixes
|
||
|
ENG_PREFIXES = {
|
||
|
-24: "y",
|
||
|
-21: "z",
|
||
|
-18: "a",
|
||
|
-15: "f",
|
||
|
-12: "p",
|
||
|
-9: "n",
|
||
|
-6: "u",
|
||
|
-3: "m",
|
||
|
0: "",
|
||
|
3: "k",
|
||
|
6: "M",
|
||
|
9: "G",
|
||
|
12: "T",
|
||
|
15: "P",
|
||
|
18: "E",
|
||
|
21: "Z",
|
||
|
24: "Y",
|
||
|
}
|
||
|
|
||
|
def __init__(
|
||
|
self, accuracy: int | None = None, use_eng_prefix: bool = False
|
||
|
) -> None:
|
||
|
self.accuracy = accuracy
|
||
|
self.use_eng_prefix = use_eng_prefix
|
||
|
|
||
|
def __call__(self, num: float) -> str:
|
||
|
"""
|
||
|
Formats a number in engineering notation, appending a letter
|
||
|
representing the power of 1000 of the original number. Some examples:
|
||
|
>>> format_eng = EngFormatter(accuracy=0, use_eng_prefix=True)
|
||
|
>>> format_eng(0)
|
||
|
' 0'
|
||
|
>>> format_eng = EngFormatter(accuracy=1, use_eng_prefix=True)
|
||
|
>>> format_eng(1_000_000)
|
||
|
' 1.0M'
|
||
|
>>> format_eng = EngFormatter(accuracy=2, use_eng_prefix=False)
|
||
|
>>> format_eng("-1e-6")
|
||
|
'-1.00E-06'
|
||
|
|
||
|
@param num: the value to represent
|
||
|
@type num: either a numeric value or a string that can be converted to
|
||
|
a numeric value (as per decimal.Decimal constructor)
|
||
|
|
||
|
@return: engineering formatted string
|
||
|
"""
|
||
|
dnum = decimal.Decimal(str(num))
|
||
|
|
||
|
if decimal.Decimal.is_nan(dnum):
|
||
|
return "NaN"
|
||
|
|
||
|
if decimal.Decimal.is_infinite(dnum):
|
||
|
return "inf"
|
||
|
|
||
|
sign = 1
|
||
|
|
||
|
if dnum < 0: # pragma: no cover
|
||
|
sign = -1
|
||
|
dnum = -dnum
|
||
|
|
||
|
if dnum != 0:
|
||
|
pow10 = decimal.Decimal(int(math.floor(dnum.log10() / 3) * 3))
|
||
|
else:
|
||
|
pow10 = decimal.Decimal(0)
|
||
|
|
||
|
pow10 = pow10.min(max(self.ENG_PREFIXES.keys()))
|
||
|
pow10 = pow10.max(min(self.ENG_PREFIXES.keys()))
|
||
|
int_pow10 = int(pow10)
|
||
|
|
||
|
if self.use_eng_prefix:
|
||
|
prefix = self.ENG_PREFIXES[int_pow10]
|
||
|
else:
|
||
|
if int_pow10 < 0:
|
||
|
prefix = f"E-{-int_pow10:02d}"
|
||
|
else:
|
||
|
prefix = f"E+{int_pow10:02d}"
|
||
|
|
||
|
mant = sign * dnum / (10**pow10)
|
||
|
|
||
|
if self.accuracy is None: # pragma: no cover
|
||
|
format_str = "{mant: g}{prefix}"
|
||
|
else:
|
||
|
format_str = f"{{mant: .{self.accuracy:d}f}}{{prefix}}"
|
||
|
|
||
|
formatted = format_str.format(mant=mant, prefix=prefix)
|
||
|
|
||
|
return formatted
|
||
|
|
||
|
|
||
|
def set_eng_float_format(accuracy: int = 3, use_eng_prefix: bool = False) -> None:
|
||
|
"""
|
||
|
Alter default behavior on how float is formatted in DataFrame.
|
||
|
Format float in engineering format. By accuracy, we mean the number of
|
||
|
decimal digits after the floating point.
|
||
|
|
||
|
See also EngFormatter.
|
||
|
"""
|
||
|
set_option("display.float_format", EngFormatter(accuracy, use_eng_prefix))
|
||
|
|
||
|
|
||
|
def get_level_lengths(
|
||
|
levels: Any, sentinel: bool | object | str = ""
|
||
|
) -> list[dict[int, int]]:
|
||
|
"""
|
||
|
For each index in each level the function returns lengths of indexes.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
levels : list of lists
|
||
|
List of values on for level.
|
||
|
sentinel : string, optional
|
||
|
Value which states that no new index starts on there.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
Returns list of maps. For each level returns map of indexes (key is index
|
||
|
in row and value is length of index).
|
||
|
"""
|
||
|
if len(levels) == 0:
|
||
|
return []
|
||
|
|
||
|
control = [True] * len(levels[0])
|
||
|
|
||
|
result = []
|
||
|
for level in levels:
|
||
|
last_index = 0
|
||
|
|
||
|
lengths = {}
|
||
|
for i, key in enumerate(level):
|
||
|
if control[i] and key == sentinel:
|
||
|
pass
|
||
|
else:
|
||
|
control[i] = False
|
||
|
lengths[last_index] = i - last_index
|
||
|
last_index = i
|
||
|
|
||
|
lengths[last_index] = len(level) - last_index
|
||
|
|
||
|
result.append(lengths)
|
||
|
|
||
|
return result
|
||
|
|
||
|
|
||
|
def buffer_put_lines(buf: WriteBuffer[str], lines: list[str]) -> None:
|
||
|
"""
|
||
|
Appends lines to a buffer.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
buf
|
||
|
The buffer to write to
|
||
|
lines
|
||
|
The lines to append.
|
||
|
"""
|
||
|
if any(isinstance(x, str) for x in lines):
|
||
|
lines = [str(x) for x in lines]
|
||
|
buf.write("\n".join(lines))
|