""" Module for formatting output data in console (to string). """ from __future__ import annotations from shutil import get_terminal_size from typing import ( TYPE_CHECKING, Iterable, ) import numpy as np from pandas.io.formats.printing import pprint_thing if TYPE_CHECKING: from pandas.io.formats.format import DataFrameFormatter class StringFormatter: """Formatter for string representation of a dataframe.""" def __init__(self, fmt: DataFrameFormatter, line_width: int | None = None) -> None: self.fmt = fmt self.adj = fmt.adj self.frame = fmt.frame self.line_width = line_width def to_string(self) -> str: text = self._get_string_representation() if self.fmt.should_show_dimensions: text = "".join([text, self.fmt.dimensions_info]) return text def _get_strcols(self) -> list[list[str]]: strcols = self.fmt.get_strcols() if self.fmt.is_truncated: strcols = self._insert_dot_separators(strcols) return strcols def _get_string_representation(self) -> str: if self.fmt.frame.empty: return self._empty_info_line strcols = self._get_strcols() if self.line_width is None: # no need to wrap around just print the whole frame return self.adj.adjoin(1, *strcols) if self._need_to_wrap_around: return self._join_multiline(strcols) return self._fit_strcols_to_terminal_width(strcols) @property def _empty_info_line(self) -> str: return ( f"Empty {type(self.frame).__name__}\n" f"Columns: {pprint_thing(self.frame.columns)}\n" f"Index: {pprint_thing(self.frame.index)}" ) @property def _need_to_wrap_around(self) -> bool: return bool(self.fmt.max_cols is None or self.fmt.max_cols > 0) def _insert_dot_separators(self, strcols: list[list[str]]) -> list[list[str]]: str_index = self.fmt._get_formatted_index(self.fmt.tr_frame) index_length = len(str_index) if self.fmt.is_truncated_horizontally: strcols = self._insert_dot_separator_horizontal(strcols, index_length) if self.fmt.is_truncated_vertically: strcols = self._insert_dot_separator_vertical(strcols, index_length) return strcols @property def _adjusted_tr_col_num(self) -> int: return self.fmt.tr_col_num + 1 if self.fmt.index else self.fmt.tr_col_num def _insert_dot_separator_horizontal( self, strcols: list[list[str]], index_length: int ) -> list[list[str]]: strcols.insert(self._adjusted_tr_col_num, [" ..."] * index_length) return strcols def _insert_dot_separator_vertical( self, strcols: list[list[str]], index_length: int ) -> list[list[str]]: n_header_rows = index_length - len(self.fmt.tr_frame) row_num = self.fmt.tr_row_num for ix, col in enumerate(strcols): cwidth = self.adj.len(col[row_num]) if self.fmt.is_truncated_horizontally: is_dot_col = ix == self._adjusted_tr_col_num else: is_dot_col = False if cwidth > 3 or is_dot_col: dots = "..." else: dots = ".." if ix == 0 and self.fmt.index: dot_mode = "left" elif is_dot_col: cwidth = 4 dot_mode = "right" else: dot_mode = "right" dot_str = self.adj.justify([dots], cwidth, mode=dot_mode)[0] col.insert(row_num + n_header_rows, dot_str) return strcols def _join_multiline(self, strcols_input: Iterable[list[str]]) -> str: lwidth = self.line_width adjoin_width = 1 strcols = list(strcols_input) if self.fmt.index: idx = strcols.pop(0) lwidth -= np.array([self.adj.len(x) for x in idx]).max() + adjoin_width col_widths = [ np.array([self.adj.len(x) for x in col]).max() if len(col) > 0 else 0 for col in strcols ] assert lwidth is not None col_bins = _binify(col_widths, lwidth) nbins = len(col_bins) if self.fmt.is_truncated_vertically: assert self.fmt.max_rows_fitted is not None nrows = self.fmt.max_rows_fitted + 1 else: nrows = len(self.frame) str_lst = [] start = 0 for i, end in enumerate(col_bins): row = strcols[start:end] if self.fmt.index: row.insert(0, idx) if nbins > 1: if end <= len(strcols) and i < nbins - 1: row.append([" \\"] + [" "] * (nrows - 1)) else: row.append([" "] * nrows) str_lst.append(self.adj.adjoin(adjoin_width, *row)) start = end return "\n\n".join(str_lst) def _fit_strcols_to_terminal_width(self, strcols: list[list[str]]) -> str: from pandas import Series lines = self.adj.adjoin(1, *strcols).split("\n") max_len = Series(lines).str.len().max() # plus truncate dot col width, _ = get_terminal_size() dif = max_len - width # '+ 1' to avoid too wide repr (GH PR #17023) adj_dif = dif + 1 col_lens = Series([Series(ele).apply(len).max() for ele in strcols]) n_cols = len(col_lens) counter = 0 while adj_dif > 0 and n_cols > 1: counter += 1 mid = round(n_cols / 2) mid_ix = col_lens.index[mid] col_len = col_lens[mid_ix] # adjoin adds one adj_dif -= col_len + 1 col_lens = col_lens.drop(mid_ix) n_cols = len(col_lens) # subtract index column max_cols_fitted = n_cols - self.fmt.index # GH-21180. Ensure that we print at least two. max_cols_fitted = max(max_cols_fitted, 2) self.fmt.max_cols_fitted = max_cols_fitted # Call again _truncate to cut frame appropriately # and then generate string representation self.fmt.truncate() strcols = self._get_strcols() return self.adj.adjoin(1, *strcols) def _binify(cols: list[int], line_width: int) -> list[int]: adjoin_width = 1 bins = [] curr_width = 0 i_last_column = len(cols) - 1 for i, w in enumerate(cols): w_adjoined = w + adjoin_width curr_width += w_adjoined if i_last_column == i: wrap = curr_width + 1 > line_width and i > 0 else: wrap = curr_width + 2 > line_width and i > 0 if wrap: bins.append(i) curr_width = w_adjoined bins.append(len(cols)) return bins