aoc-2022/venv/Lib/site-packages/pandas/io/parsers/c_parser_wrapper.py

from __future__ import annotations

from collections import defaultdict
from typing import (
    TYPE_CHECKING,
    Hashable,
    Mapping,
    Sequence,
)
import warnings

import numpy as np

import pandas._libs.parsers as parsers
from pandas._typing import (
    ArrayLike,
    DtypeArg,
    DtypeObj,
    ReadCsvBuffer,
)
from pandas.errors import DtypeWarning
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.common import (
    is_categorical_dtype,
    pandas_dtype,
)
from pandas.core.dtypes.concat import union_categoricals
from pandas.core.dtypes.dtypes import ExtensionDtype

from pandas.core.indexes.api import ensure_index_from_sequences

from pandas.io.parsers.base_parser import (
    ParserBase,
    is_index_col,
)

if TYPE_CHECKING:
    from pandas import (
        Index,
        MultiIndex,
    )


class CParserWrapper(ParserBase):
    low_memory: bool
    _reader: parsers.TextReader

    def __init__(self, src: ReadCsvBuffer[str], **kwds) -> None:
        super().__init__(kwds)
        self.kwds = kwds
        kwds = kwds.copy()

        self.low_memory = kwds.pop("low_memory", False)

        # #2442
        # error: Cannot determine type of 'index_col'
        kwds["allow_leading_cols"] = (
            self.index_col is not False  # type: ignore[has-type]
        )

        # GH20529, validate usecol arg before TextReader
        kwds["usecols"] = self.usecols

        # Have to pass int, would break tests using TextReader directly otherwise :(
        kwds["on_bad_lines"] = self.on_bad_lines.value

        for key in (
            "storage_options",
            "encoding",
            "memory_map",
            "compression",
            "error_bad_lines",
            "warn_bad_lines",
        ):
            kwds.pop(key, None)

        kwds["dtype"] = ensure_dtype_objs(kwds.get("dtype", None))
        self._reader = parsers.TextReader(src, **kwds)

        self.unnamed_cols = self._reader.unnamed_cols

        # error: Cannot determine type of 'names'
        passed_names = self.names is None  # type: ignore[has-type]

        if self._reader.header is None:
            self.names = None
        else:
            # error: Cannot determine type of 'names'
            # error: Cannot determine type of 'index_names'
            (
                self.names,  # type: ignore[has-type]
                self.index_names,
                self.col_names,
                passed_names,
            ) = self._extract_multi_indexer_columns(
                self._reader.header,
                self.index_names,  # type: ignore[has-type]
                passed_names,
            )

        # error: Cannot determine type of 'names'
        if self.names is None:  # type: ignore[has-type]
            if self.prefix:
                # error: Cannot determine type of 'names'
                self.names = [  # type: ignore[has-type]
                    f"{self.prefix}{i}" for i in range(self._reader.table_width)
                ]
            else:
                # error: Cannot determine type of 'names'
                self.names = list(  # type: ignore[has-type]
                    range(self._reader.table_width)
                )

        # gh-9755
        #
        # need to set orig_names here first
        # so that proper indexing can be done
        # with _set_noconvert_columns
        #
        # once names has been filtered, we will
        # then set orig_names again to names
        # error: Cannot determine type of 'names'
        self.orig_names = self.names[:]  # type: ignore[has-type]

        if self.usecols:
            usecols = self._evaluate_usecols(self.usecols, self.orig_names)

            # GH 14671
            # assert for mypy, orig_names is List or None, None would error in issubset
            assert self.orig_names is not None
            if self.usecols_dtype == "string" and not set(usecols).issubset(
                self.orig_names
            ):
                self._validate_usecols_names(usecols, self.orig_names)

            # error: Cannot determine type of 'names'
            if len(self.names) > len(usecols):  # type: ignore[has-type]
                # error: Cannot determine type of 'names'
                self.names = [  # type: ignore[has-type]
                    n
                    # error: Cannot determine type of 'names'
                    for i, n in enumerate(self.names)  # type: ignore[has-type]
                    if (i in usecols or n in usecols)
                ]

            # error: Cannot determine type of 'names'
            if len(self.names) < len(usecols):  # type: ignore[has-type]
                # error: Cannot determine type of 'names'
                self._validate_usecols_names(
                    usecols,
                    self.names,  # type: ignore[has-type]
                )

        # error: Cannot determine type of 'names'
        self._validate_parse_dates_presence(self.names)  # type: ignore[has-type]
        self._set_noconvert_columns()

        # error: Cannot determine type of 'names'
        self.orig_names = self.names  # type: ignore[has-type]

        if not self._has_complex_date_col:
            # error: Cannot determine type of 'index_col'
            if self._reader.leading_cols == 0 and is_index_col(
                self.index_col  # type: ignore[has-type]
            ):

                self._name_processed = True
                (
                    index_names,
                    # error: Cannot determine type of 'names'
                    self.names,  # type: ignore[has-type]
                    self.index_col,
                ) = self._clean_index_names(
                    # error: Cannot determine type of 'names'
                    self.names,  # type: ignore[has-type]
                    # error: Cannot determine type of 'index_col'
                    self.index_col,  # type: ignore[has-type]
                )

                if self.index_names is None:
                    self.index_names = index_names

            if self._reader.header is None and not passed_names:
                assert self.index_names is not None
                self.index_names = [None] * len(self.index_names)

        self._implicit_index = self._reader.leading_cols > 0

    def close(self) -> None:
        # close handles opened by C parser
        try:
            self._reader.close()
        except ValueError:
            pass

    def _set_noconvert_columns(self) -> None:
        """
        Set the columns that should not undergo dtype conversions.

        Currently, any column that is involved with date parsing will not
        undergo such conversions.
        """
        assert self.orig_names is not None
        # error: Cannot determine type of 'names'

        # much faster than using orig_names.index(x) xref GH#44106
        names_dict = {x: i for i, x in enumerate(self.orig_names)}
        col_indices = [names_dict[x] for x in self.names]  # type: ignore[has-type]
        # error: Cannot determine type of 'names'
        noconvert_columns = self._set_noconvert_dtype_columns(
            col_indices,
            self.names,  # type: ignore[has-type]
        )
        for col in noconvert_columns:
            self._reader.set_noconvert(col)

    def read(
        self,
        nrows: int | None = None,
    ) -> tuple[
        Index | MultiIndex | None,
        Sequence[Hashable] | MultiIndex,
        Mapping[Hashable, ArrayLike],
    ]:
        index: Index | MultiIndex | None
        column_names: Sequence[Hashable] | MultiIndex
        try:
            if self.low_memory:
                chunks = self._reader.read_low_memory(nrows)
                # destructive to chunks
                data = _concatenate_chunks(chunks)

            else:
                data = self._reader.read(nrows)
        except StopIteration:
            if self._first_chunk:
                self._first_chunk = False
                names = self._maybe_dedup_names(self.orig_names)
                index, columns, col_dict = self._get_empty_meta(
                    names,
                    self.index_col,
                    self.index_names,
                    dtype=self.kwds.get("dtype"),
                )
                columns = self._maybe_make_multi_index_columns(columns, self.col_names)

                if self.usecols is not None:
                    columns = self._filter_usecols(columns)

                col_dict = {k: v for k, v in col_dict.items() if k in columns}

                return index, columns, col_dict

            else:
                self.close()
                raise

        # Done with first read, next time raise StopIteration
        self._first_chunk = False

        # error: Cannot determine type of 'names'
        names = self.names  # type: ignore[has-type]

        if self._reader.leading_cols:
            if self._has_complex_date_col:
                raise NotImplementedError("file structure not yet supported")

            # implicit index, no index names
            arrays = []

            for i in range(self._reader.leading_cols):
                if self.index_col is None:
                    values = data.pop(i)
                else:
                    values = data.pop(self.index_col[i])

                values = self._maybe_parse_dates(values, i, try_parse_dates=True)
                arrays.append(values)

            index = ensure_index_from_sequences(arrays)

            if self.usecols is not None:
                names = self._filter_usecols(names)

            names = self._maybe_dedup_names(names)

            # rename dict keys
            data_tups = sorted(data.items())
            data = {k: v for k, (i, v) in zip(names, data_tups)}

            column_names, date_data = self._do_date_conversions(names, data)

            # maybe create a mi on the columns
            column_names = self._maybe_make_multi_index_columns(
                column_names, self.col_names
            )

        else:
            # rename dict keys
            data_tups = sorted(data.items())

            # ugh, mutation

            # assert for mypy, orig_names is List or None, None would error in list(...)
            assert self.orig_names is not None
            names = list(self.orig_names)
            names = self._maybe_dedup_names(names)

            if self.usecols is not None:
                names = self._filter_usecols(names)

            # columns as list
            alldata = [x[1] for x in data_tups]
            if self.usecols is None:
                self._check_data_length(names, alldata)

            data = {k: v for k, (i, v) in zip(names, data_tups)}

            names, date_data = self._do_date_conversions(names, data)
            index, column_names = self._make_index(date_data, alldata, names)

        return index, column_names, date_data

    def _filter_usecols(self, names: Sequence[Hashable]) -> Sequence[Hashable]:
        # hackish
        usecols = self._evaluate_usecols(self.usecols, names)
        if usecols is not None and len(names) != len(usecols):
            names = [
                name for i, name in enumerate(names) if i in usecols or name in usecols
            ]
        return names

    def _get_index_names(self):
        names = list(self._reader.header[0])
        idx_names = None

        if self._reader.leading_cols == 0 and self.index_col is not None:
            (idx_names, names, self.index_col) = self._clean_index_names(
                names, self.index_col
            )

        return names, idx_names

    def _maybe_parse_dates(self, values, index: int, try_parse_dates: bool = True):
        if try_parse_dates and self._should_parse_dates(index):
            values = self._date_conv(values)
        return values


def _concatenate_chunks(chunks: list[dict[int, ArrayLike]]) -> dict:
    """
    Concatenate chunks of data read with low_memory=True.

    The tricky part is handling Categoricals, where different chunks
    may have different inferred categories.
    """
    names = list(chunks[0].keys())
    warning_columns = []

    result: dict = {}
    for name in names:
        arrs = [chunk.pop(name) for chunk in chunks]
        # Check each arr for consistent types.
        dtypes = {a.dtype for a in arrs}
        # TODO: shouldn't we exclude all EA dtypes here?
        numpy_dtypes = {x for x in dtypes if not is_categorical_dtype(x)}
        if len(numpy_dtypes) > 1:
            # error: Argument 1 to "find_common_type" has incompatible type
            # "Set[Any]"; expected "Sequence[Union[dtype[Any], None, type,
            # _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any,
            # Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]]"
            common_type = np.find_common_type(
                numpy_dtypes,  # type: ignore[arg-type]
                [],
            )
            if common_type == np.dtype(object):
                warning_columns.append(str(name))

        dtype = dtypes.pop()
        if is_categorical_dtype(dtype):
            result[name] = union_categoricals(arrs, sort_categories=False)
        else:
            if isinstance(dtype, ExtensionDtype):
                # TODO: concat_compat?
                array_type = dtype.construct_array_type()
                # error: Argument 1 to "_concat_same_type" of "ExtensionArray"
                # has incompatible type "List[Union[ExtensionArray, ndarray]]";
                # expected "Sequence[ExtensionArray]"
                result[name] = array_type._concat_same_type(
                    arrs  # type: ignore[arg-type]
                )
            else:
                # error: Argument 1 to "concatenate" has incompatible
                # type "List[Union[ExtensionArray, ndarray[Any, Any]]]"
                # ; expected "Union[_SupportsArray[dtype[Any]],
                # Sequence[_SupportsArray[dtype[Any]]],
                # Sequence[Sequence[_SupportsArray[dtype[Any]]]],
                # Sequence[Sequence[Sequence[_SupportsArray[dtype[Any]]]]]
                # , Sequence[Sequence[Sequence[Sequence[
                # _SupportsArray[dtype[Any]]]]]]]"
                result[name] = np.concatenate(arrs)  # type: ignore[arg-type]

    if warning_columns:
        warning_names = ",".join(warning_columns)
        warning_message = " ".join(
            [
                f"Columns ({warning_names}) have mixed types. "
                f"Specify dtype option on import or set low_memory=False."
            ]
        )
        warnings.warn(warning_message, DtypeWarning, stacklevel=find_stack_level())
    return result


def ensure_dtype_objs(
    dtype: DtypeArg | dict[Hashable, DtypeArg] | None
) -> DtypeObj | dict[Hashable, DtypeObj] | None:
    """
    Ensure we have either None, a dtype object, or a dictionary mapping to
    dtype objects.
    """
    if isinstance(dtype, defaultdict):
        # "None" not callable  [misc]
        default_dtype = pandas_dtype(dtype.default_factory())  # type: ignore[misc]
        dtype_converted: defaultdict = defaultdict(lambda: default_dtype)
        for key in dtype.keys():
            dtype_converted[key] = pandas_dtype(dtype[key])
        return dtype_converted
    elif isinstance(dtype, dict):
        return {k: pandas_dtype(dtype[k]) for k in dtype}
    elif dtype is not None:
        return pandas_dtype(dtype)
    return dtype
Give it up for day 1 of smooth brain shenanigans! https://pbs.twimg.com/media/E1bxikWWEAEGYHU.png 2022-12-01 16:50:29 +00:00			`from __future__ import annotations`

			`from collections import defaultdict`
			`from typing import (`
			`TYPE_CHECKING,`
			`Hashable,`
			`Mapping,`
			`Sequence,`
			`)`
			`import warnings`

			`import numpy as np`

			`import pandas._libs.parsers as parsers`
			`from pandas._typing import (`
			`ArrayLike,`
			`DtypeArg,`
			`DtypeObj,`
			`ReadCsvBuffer,`
			`)`
			`from pandas.errors import DtypeWarning`
			`from pandas.util._exceptions import find_stack_level`

			`from pandas.core.dtypes.common import (`
			`is_categorical_dtype,`
			`pandas_dtype,`
			`)`
			`from pandas.core.dtypes.concat import union_categoricals`
			`from pandas.core.dtypes.dtypes import ExtensionDtype`

			`from pandas.core.indexes.api import ensure_index_from_sequences`

			`from pandas.io.parsers.base_parser import (`
			`ParserBase,`
			`is_index_col,`
			`)`

			`if TYPE_CHECKING:`
			`from pandas import (`
			`Index,`
			`MultiIndex,`
			`)`


			`class CParserWrapper(ParserBase):`
			`low_memory: bool`
			`_reader: parsers.TextReader`

			`def __init__(self, src: ReadCsvBuffer[str], **kwds) -> None:`
			`super().__init__(kwds)`
			`self.kwds = kwds`
			`kwds = kwds.copy()`

			`self.low_memory = kwds.pop("low_memory", False)`

			`# #2442`
			`# error: Cannot determine type of 'index_col'`
			`kwds["allow_leading_cols"] = (`
			`self.index_col is not False # type: ignore[has-type]`
			`)`

			`# GH20529, validate usecol arg before TextReader`
			`kwds["usecols"] = self.usecols`

			`# Have to pass int, would break tests using TextReader directly otherwise :(`
			`kwds["on_bad_lines"] = self.on_bad_lines.value`

			`for key in (`
			`"storage_options",`
			`"encoding",`
			`"memory_map",`
			`"compression",`
			`"error_bad_lines",`
			`"warn_bad_lines",`
			`):`
			`kwds.pop(key, None)`

			`kwds["dtype"] = ensure_dtype_objs(kwds.get("dtype", None))`
			`self._reader = parsers.TextReader(src, **kwds)`

			`self.unnamed_cols = self._reader.unnamed_cols`

			`# error: Cannot determine type of 'names'`
			`passed_names = self.names is None # type: ignore[has-type]`

			`if self._reader.header is None:`
			`self.names = None`
			`else:`
			`# error: Cannot determine type of 'names'`
			`# error: Cannot determine type of 'index_names'`
			`(`
			`self.names, # type: ignore[has-type]`
			`self.index_names,`
			`self.col_names,`
			`passed_names,`
			`) = self._extract_multi_indexer_columns(`
			`self._reader.header,`
			`self.index_names, # type: ignore[has-type]`
			`passed_names,`
			`)`

			`# error: Cannot determine type of 'names'`
			`if self.names is None: # type: ignore[has-type]`
			`if self.prefix:`
			`# error: Cannot determine type of 'names'`
			`self.names = [ # type: ignore[has-type]`
			`f"{self.prefix}{i}" for i in range(self._reader.table_width)`
			`]`
			`else:`
			`# error: Cannot determine type of 'names'`
			`self.names = list( # type: ignore[has-type]`
			`range(self._reader.table_width)`
			`)`

			`# gh-9755`
			`#`
			`# need to set orig_names here first`
			`# so that proper indexing can be done`
			`# with _set_noconvert_columns`
			`#`
			`# once names has been filtered, we will`
			`# then set orig_names again to names`
			`# error: Cannot determine type of 'names'`
			`self.orig_names = self.names[:] # type: ignore[has-type]`

			`if self.usecols:`
			`usecols = self._evaluate_usecols(self.usecols, self.orig_names)`

			`# GH 14671`
			`# assert for mypy, orig_names is List or None, None would error in issubset`
			`assert self.orig_names is not None`
			`if self.usecols_dtype == "string" and not set(usecols).issubset(`
			`self.orig_names`
			`):`
			`self._validate_usecols_names(usecols, self.orig_names)`

			`# error: Cannot determine type of 'names'`
			`if len(self.names) > len(usecols): # type: ignore[has-type]`
			`# error: Cannot determine type of 'names'`
			`self.names = [ # type: ignore[has-type]`
			`n`
			`# error: Cannot determine type of 'names'`
			`for i, n in enumerate(self.names) # type: ignore[has-type]`
			`if (i in usecols or n in usecols)`
			`]`

			`# error: Cannot determine type of 'names'`
			`if len(self.names) < len(usecols): # type: ignore[has-type]`
			`# error: Cannot determine type of 'names'`
			`self._validate_usecols_names(`
			`usecols,`
			`self.names, # type: ignore[has-type]`
			`)`

			`# error: Cannot determine type of 'names'`
			`self._validate_parse_dates_presence(self.names) # type: ignore[has-type]`
			`self._set_noconvert_columns()`

			`# error: Cannot determine type of 'names'`
			`self.orig_names = self.names # type: ignore[has-type]`

			`if not self._has_complex_date_col:`
			`# error: Cannot determine type of 'index_col'`
			`if self._reader.leading_cols == 0 and is_index_col(`
			`self.index_col # type: ignore[has-type]`
			`):`

			`self._name_processed = True`
			`(`
			`index_names,`
			`# error: Cannot determine type of 'names'`
			`self.names, # type: ignore[has-type]`
			`self.index_col,`
			`) = self._clean_index_names(`
			`# error: Cannot determine type of 'names'`
			`self.names, # type: ignore[has-type]`
			`# error: Cannot determine type of 'index_col'`
			`self.index_col, # type: ignore[has-type]`
			`)`

			`if self.index_names is None:`
			`self.index_names = index_names`

			`if self._reader.header is None and not passed_names:`
			`assert self.index_names is not None`
			`self.index_names = [None] * len(self.index_names)`

			`self._implicit_index = self._reader.leading_cols > 0`

			`def close(self) -> None:`
			`# close handles opened by C parser`
			`try:`
			`self._reader.close()`
			`except ValueError:`
			`pass`

			`def _set_noconvert_columns(self) -> None:`
			`"""`
			`Set the columns that should not undergo dtype conversions.`

			`Currently, any column that is involved with date parsing will not`
			`undergo such conversions.`
			`"""`
			`assert self.orig_names is not None`
			`# error: Cannot determine type of 'names'`

			`# much faster than using orig_names.index(x) xref GH#44106`
			`names_dict = {x: i for i, x in enumerate(self.orig_names)}`
			`col_indices = [names_dict[x] for x in self.names] # type: ignore[has-type]`
			`# error: Cannot determine type of 'names'`
			`noconvert_columns = self._set_noconvert_dtype_columns(`
			`col_indices,`
			`self.names, # type: ignore[has-type]`
			`)`
			`for col in noconvert_columns:`
			`self._reader.set_noconvert(col)`

			`def read(`
			`self,`
			`nrows: int \| None = None,`
			`) -> tuple[`
			`Index \| MultiIndex \| None,`
			`Sequence[Hashable] \| MultiIndex,`
			`Mapping[Hashable, ArrayLike],`
			`]:`
			`index: Index \| MultiIndex \| None`
			`column_names: Sequence[Hashable] \| MultiIndex`
			`try:`
			`if self.low_memory:`
			`chunks = self._reader.read_low_memory(nrows)`
			`# destructive to chunks`
			`data = _concatenate_chunks(chunks)`

			`else:`
			`data = self._reader.read(nrows)`
			`except StopIteration:`
			`if self._first_chunk:`
			`self._first_chunk = False`
			`names = self._maybe_dedup_names(self.orig_names)`
			`index, columns, col_dict = self._get_empty_meta(`
			`names,`
			`self.index_col,`
			`self.index_names,`
			`dtype=self.kwds.get("dtype"),`
			`)`
			`columns = self._maybe_make_multi_index_columns(columns, self.col_names)`

			`if self.usecols is not None:`
			`columns = self._filter_usecols(columns)`

			`col_dict = {k: v for k, v in col_dict.items() if k in columns}`

			`return index, columns, col_dict`

			`else:`
			`self.close()`
			`raise`

			`# Done with first read, next time raise StopIteration`
			`self._first_chunk = False`

			`# error: Cannot determine type of 'names'`
			`names = self.names # type: ignore[has-type]`

			`if self._reader.leading_cols:`
			`if self._has_complex_date_col:`
			`raise NotImplementedError("file structure not yet supported")`

			`# implicit index, no index names`
			`arrays = []`

			`for i in range(self._reader.leading_cols):`
			`if self.index_col is None:`
			`values = data.pop(i)`
			`else:`
			`values = data.pop(self.index_col[i])`

			`values = self._maybe_parse_dates(values, i, try_parse_dates=True)`
			`arrays.append(values)`

			`index = ensure_index_from_sequences(arrays)`

			`if self.usecols is not None:`
			`names = self._filter_usecols(names)`

			`names = self._maybe_dedup_names(names)`

			`# rename dict keys`
			`data_tups = sorted(data.items())`
			`data = {k: v for k, (i, v) in zip(names, data_tups)}`

			`column_names, date_data = self._do_date_conversions(names, data)`

			`# maybe create a mi on the columns`
			`column_names = self._maybe_make_multi_index_columns(`
			`column_names, self.col_names`
			`)`

			`else:`
			`# rename dict keys`
			`data_tups = sorted(data.items())`

			`# ugh, mutation`

			`# assert for mypy, orig_names is List or None, None would error in list(...)`
			`assert self.orig_names is not None`
			`names = list(self.orig_names)`
			`names = self._maybe_dedup_names(names)`

			`if self.usecols is not None:`
			`names = self._filter_usecols(names)`

			`# columns as list`
			`alldata = [x[1] for x in data_tups]`
			`if self.usecols is None:`
			`self._check_data_length(names, alldata)`

			`data = {k: v for k, (i, v) in zip(names, data_tups)}`

			`names, date_data = self._do_date_conversions(names, data)`
			`index, column_names = self._make_index(date_data, alldata, names)`

			`return index, column_names, date_data`

			`def _filter_usecols(self, names: Sequence[Hashable]) -> Sequence[Hashable]:`
			`# hackish`
			`usecols = self._evaluate_usecols(self.usecols, names)`
			`if usecols is not None and len(names) != len(usecols):`
			`names = [`
			`name for i, name in enumerate(names) if i in usecols or name in usecols`
			`]`
			`return names`

			`def _get_index_names(self):`
			`names = list(self._reader.header[0])`
			`idx_names = None`

			`if self._reader.leading_cols == 0 and self.index_col is not None:`
			`(idx_names, names, self.index_col) = self._clean_index_names(`
			`names, self.index_col`
			`)`

			`return names, idx_names`

			`def _maybe_parse_dates(self, values, index: int, try_parse_dates: bool = True):`
			`if try_parse_dates and self._should_parse_dates(index):`
			`values = self._date_conv(values)`
			`return values`


			`def _concatenate_chunks(chunks: list[dict[int, ArrayLike]]) -> dict:`
			`"""`
			`Concatenate chunks of data read with low_memory=True.`

			`The tricky part is handling Categoricals, where different chunks`
			`may have different inferred categories.`
			`"""`
			`names = list(chunks[0].keys())`
			`warning_columns = []`

			`result: dict = {}`
			`for name in names:`
			`arrs = [chunk.pop(name) for chunk in chunks]`
			`# Check each arr for consistent types.`
			`dtypes = {a.dtype for a in arrs}`
			`# TODO: shouldn't we exclude all EA dtypes here?`
			`numpy_dtypes = {x for x in dtypes if not is_categorical_dtype(x)}`
			`if len(numpy_dtypes) > 1:`
			`# error: Argument 1 to "find_common_type" has incompatible type`
			`# "Set[Any]"; expected "Sequence[Union[dtype[Any], None, type,`
			`# _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any,`
			`# Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]]"`
			`common_type = np.find_common_type(`
			`numpy_dtypes, # type: ignore[arg-type]`
			`[],`
			`)`
			`if common_type == np.dtype(object):`
			`warning_columns.append(str(name))`

			`dtype = dtypes.pop()`
			`if is_categorical_dtype(dtype):`
			`result[name] = union_categoricals(arrs, sort_categories=False)`
			`else:`
			`if isinstance(dtype, ExtensionDtype):`
			`# TODO: concat_compat?`
			`array_type = dtype.construct_array_type()`
			`# error: Argument 1 to "_concat_same_type" of "ExtensionArray"`
			`# has incompatible type "List[Union[ExtensionArray, ndarray]]";`
			`# expected "Sequence[ExtensionArray]"`
			`result[name] = array_type._concat_same_type(`
			`arrs # type: ignore[arg-type]`
			`)`
			`else:`
			`# error: Argument 1 to "concatenate" has incompatible`
			`# type "List[Union[ExtensionArray, ndarray[Any, Any]]]"`
			`# ; expected "Union[_SupportsArray[dtype[Any]],`
			`# Sequence[_SupportsArray[dtype[Any]]],`
			`# Sequence[Sequence[_SupportsArray[dtype[Any]]]],`
			`# Sequence[Sequence[Sequence[_SupportsArray[dtype[Any]]]]]`
			`# , Sequence[Sequence[Sequence[Sequence[`
			`# _SupportsArray[dtype[Any]]]]]]]"`
			`result[name] = np.concatenate(arrs) # type: ignore[arg-type]`

			`if warning_columns:`
			`warning_names = ",".join(warning_columns)`
			`warning_message = " ".join(`
			`[`
			`f"Columns ({warning_names}) have mixed types. "`
			`f"Specify dtype option on import or set low_memory=False."`
			`]`
			`)`
			`warnings.warn(warning_message, DtypeWarning, stacklevel=find_stack_level())`
			`return result`


			`def ensure_dtype_objs(`
			`dtype: DtypeArg \| dict[Hashable, DtypeArg] \| None`
			`) -> DtypeObj \| dict[Hashable, DtypeObj] \| None:`
			`"""`
			`Ensure we have either None, a dtype object, or a dictionary mapping to`
			`dtype objects.`
			`"""`
			`if isinstance(dtype, defaultdict):`
			`# "None" not callable [misc]`
			`default_dtype = pandas_dtype(dtype.default_factory()) # type: ignore[misc]`
			`dtype_converted: defaultdict = defaultdict(lambda: default_dtype)`
			`for key in dtype.keys():`
			`dtype_converted[key] = pandas_dtype(dtype[key])`
			`return dtype_converted`
			`elif isinstance(dtype, dict):`
			`return {k: pandas_dtype(dtype[k]) for k in dtype}`
			`elif dtype is not None:`
			`return pandas_dtype(dtype)`
			`return dtype`