""" timezone conversion """ cimport cython from cpython.datetime cimport ( PyDelta_Check, datetime, datetime_new, import_datetime, timedelta, tzinfo, ) from cython cimport Py_ssize_t import_datetime() import numpy as np import pytz cimport numpy as cnp from numpy cimport ( int64_t, intp_t, ndarray, uint8_t, ) cnp.import_array() from pandas._libs.tslibs.dtypes cimport ( periods_per_day, periods_per_second, ) from pandas._libs.tslibs.nattype cimport NPY_NAT from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, npy_datetimestruct, pandas_datetime_to_datetimestruct, ) from pandas._libs.tslibs.timezones cimport ( get_dst_info, is_fixed_offset, is_tzlocal, is_utc, is_zoneinfo, utc_pytz, ) cdef const int64_t[::1] _deltas_placeholder = np.array([], dtype=np.int64) @cython.freelist(16) @cython.final cdef class Localizer: # cdef: # tzinfo tz # NPY_DATETIMEUNIT _reso # bint use_utc, use_fixed, use_tzlocal, use_dst, use_pytz # ndarray trans # Py_ssize_t ntrans # const int64_t[::1] deltas # int64_t delta # int64_t* tdata @cython.initializedcheck(False) @cython.boundscheck(False) def __cinit__(self, tzinfo tz, NPY_DATETIMEUNIT reso): self.tz = tz self._reso = reso self.use_utc = self.use_tzlocal = self.use_fixed = False self.use_dst = self.use_pytz = False self.ntrans = -1 # placeholder self.delta = -1 # placeholder self.deltas = _deltas_placeholder self.tdata = NULL if is_utc(tz) or tz is None: self.use_utc = True elif is_tzlocal(tz) or is_zoneinfo(tz): self.use_tzlocal = True else: trans, deltas, typ = get_dst_info(tz) if reso != NPY_DATETIMEUNIT.NPY_FR_ns: # NB: using floordiv here is implicitly assuming we will # never see trans or deltas that are not an integer number # of seconds. # TODO: avoid these np.array calls if reso == NPY_DATETIMEUNIT.NPY_FR_us: trans = np.array(trans) // 1_000 deltas = np.array(deltas) // 1_000 elif reso == NPY_DATETIMEUNIT.NPY_FR_ms: trans = np.array(trans) // 1_000_000 deltas = np.array(deltas) // 1_000_000 elif reso == NPY_DATETIMEUNIT.NPY_FR_s: trans = np.array(trans) // 1_000_000_000 deltas = np.array(deltas) // 1_000_000_000 else: raise NotImplementedError(reso) self.trans = trans self.ntrans = self.trans.shape[0] self.deltas = deltas if typ != "pytz" and typ != "dateutil": # static/fixed; in this case we know that len(delta) == 1 self.use_fixed = True self.delta = deltas[0] else: self.use_dst = True if typ == "pytz": self.use_pytz = True self.tdata = cnp.PyArray_DATA(trans) @cython.boundscheck(False) cdef inline int64_t utc_val_to_local_val( self, int64_t utc_val, Py_ssize_t* pos, bint* fold=NULL ) except? -1: if self.use_utc: return utc_val elif self.use_tzlocal: return utc_val + _tz_localize_using_tzinfo_api( utc_val, self.tz, to_utc=False, reso=self._reso, fold=fold ) elif self.use_fixed: return utc_val + self.delta else: pos[0] = bisect_right_i8(self.tdata, utc_val, self.ntrans) - 1 if fold is not NULL: fold[0] = _infer_dateutil_fold( utc_val, self.trans, self.deltas, pos[0] ) return utc_val + self.deltas[pos[0]] cdef int64_t tz_localize_to_utc_single( int64_t val, tzinfo tz, object ambiguous=None, object nonexistent=None, NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns, ) except? -1: """See tz_localize_to_utc.__doc__""" cdef: int64_t delta int64_t[::1] deltas if val == NPY_NAT: return val elif is_utc(tz) or tz is None: # TODO: test with non-nano return val elif is_tzlocal(tz) or is_zoneinfo(tz): return val - _tz_localize_using_tzinfo_api(val, tz, to_utc=True, reso=reso) elif is_fixed_offset(tz): _, deltas, _ = get_dst_info(tz) delta = deltas[0] # TODO: de-duplicate with Localizer.__init__ if reso != NPY_DATETIMEUNIT.NPY_FR_ns: if reso == NPY_DATETIMEUNIT.NPY_FR_us: delta = delta // 1000 elif reso == NPY_DATETIMEUNIT.NPY_FR_ms: delta = delta // 1_000_000 elif reso == NPY_DATETIMEUNIT.NPY_FR_s: delta = delta // 1_000_000_000 return val - delta else: return tz_localize_to_utc( np.array([val], dtype="i8"), tz, ambiguous=ambiguous, nonexistent=nonexistent, reso=reso, )[0] @cython.boundscheck(False) @cython.wraparound(False) def tz_localize_to_utc( ndarray[int64_t] vals, tzinfo tz, object ambiguous=None, object nonexistent=None, NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns, ): """ Localize tzinfo-naive i8 to given time zone (using pytz). If there are ambiguities in the values, raise AmbiguousTimeError. Parameters ---------- vals : ndarray[int64_t] tz : tzinfo or None ambiguous : str, bool, or arraylike When clocks moved backward due to DST, ambiguous times may arise. For example in Central European Time (UTC+01), when going from 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the `ambiguous` parameter dictates how ambiguous times should be handled. - 'infer' will attempt to infer fall dst-transition hours based on order - bool-ndarray where True signifies a DST time, False signifies a non-DST time (note that this flag is only applicable for ambiguous times, but the array must have the same length as vals) - bool if True, treat all vals as DST. If False, treat them as non-DST - 'NaT' will return NaT where there are ambiguous times nonexistent : {None, "NaT", "shift_forward", "shift_backward", "raise", \ timedelta-like} How to handle non-existent times when converting wall times to UTC reso : NPY_DATETIMEUNIT, default NPY_FR_ns Returns ------- localized : ndarray[int64_t] """ cdef: ndarray[uint8_t, cast=True] ambiguous_array Py_ssize_t i, idx, pos, n = vals.shape[0] Py_ssize_t delta_idx_offset, delta_idx, pos_left, pos_right int64_t v, left, right, val, new_local, remaining_mins int64_t first_delta, delta int64_t shift_delta = 0 ndarray[int64_t] result_a, result_b, dst_hours int64_t[::1] result npy_datetimestruct dts bint infer_dst = False, is_dst = False, fill = False bint shift_forward = False, shift_backward = False bint fill_nonexist = False str stamp Localizer info = Localizer(tz, reso=reso) int64_t pph = periods_per_day(reso) // 24 # Vectorized version of DstTzInfo.localize if info.use_utc: return vals.copy() result = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0) if info.use_tzlocal: for i in range(n): v = vals[i] if v == NPY_NAT: result[i] = NPY_NAT else: result[i] = v - _tz_localize_using_tzinfo_api( v, tz, to_utc=True, reso=reso ) return result.base # to return underlying ndarray elif info.use_fixed: delta = info.delta for i in range(n): v = vals[i] if v == NPY_NAT: result[i] = NPY_NAT else: result[i] = v - delta return result.base # to return underlying ndarray # silence false-positive compiler warning ambiguous_array = np.empty(0, dtype=bool) if isinstance(ambiguous, str): if ambiguous == 'infer': infer_dst = True elif ambiguous == 'NaT': fill = True elif isinstance(ambiguous, bool): is_dst = True if ambiguous: ambiguous_array = np.ones(len(vals), dtype=bool) else: ambiguous_array = np.zeros(len(vals), dtype=bool) elif hasattr(ambiguous, '__iter__'): is_dst = True if len(ambiguous) != len(vals): raise ValueError("Length of ambiguous bool-array must be " "the same size as vals") ambiguous_array = np.asarray(ambiguous, dtype=bool) if nonexistent == 'NaT': fill_nonexist = True elif nonexistent == 'shift_forward': shift_forward = True elif nonexistent == 'shift_backward': shift_backward = True elif PyDelta_Check(nonexistent): from .timedeltas import delta_to_nanoseconds shift_delta = delta_to_nanoseconds(nonexistent, reso=reso) elif nonexistent not in ('raise', None): msg = ("nonexistent must be one of {'NaT', 'raise', 'shift_forward', " "shift_backwards} or a timedelta object") raise ValueError(msg) # Determine whether each date lies left of the DST transition (store in # result_a) or right of the DST transition (store in result_b) result_a, result_b =_get_utc_bounds( vals, info.tdata, info.ntrans, info.deltas, reso=reso ) # silence false-positive compiler warning dst_hours = np.empty(0, dtype=np.int64) if infer_dst: dst_hours = _get_dst_hours(vals, result_a, result_b, reso=reso) # Pre-compute delta_idx_offset that will be used if we go down non-existent # paths. # Shift the delta_idx by if the UTC offset of # the target tz is greater than 0 and we're moving forward # or vice versa first_delta = info.deltas[0] if (shift_forward or shift_delta > 0) and first_delta > 0: delta_idx_offset = 1 elif (shift_backward or shift_delta < 0) and first_delta < 0: delta_idx_offset = 1 else: delta_idx_offset = 0 for i in range(n): val = vals[i] left = result_a[i] right = result_b[i] if val == NPY_NAT: # TODO: test with non-nano result[i] = val elif left != NPY_NAT and right != NPY_NAT: if left == right: # TODO: test with non-nano result[i] = left else: if infer_dst and dst_hours[i] != NPY_NAT: # TODO: test with non-nano result[i] = dst_hours[i] elif is_dst: if ambiguous_array[i]: result[i] = left else: result[i] = right elif fill: # TODO: test with non-nano; parametrize test_dt_round_tz_ambiguous result[i] = NPY_NAT else: stamp = _render_tstamp(val, reso=reso) raise pytz.AmbiguousTimeError( f"Cannot infer dst time from {stamp}, try using the " "'ambiguous' argument" ) elif left != NPY_NAT: result[i] = left elif right != NPY_NAT: # TODO: test with non-nano result[i] = right else: # Handle nonexistent times if shift_forward or shift_backward or shift_delta != 0: # Shift the nonexistent time to the closest existing time remaining_mins = val % pph if shift_delta != 0: # Validate that we don't relocalize on another nonexistent # time if -1 < shift_delta + remaining_mins < pph: raise ValueError( "The provided timedelta will relocalize on a " f"nonexistent time: {nonexistent}" ) new_local = val + shift_delta elif shift_forward: new_local = val + (pph - remaining_mins) else: # Subtract 1 since the beginning hour is _inclusive_ of # nonexistent times new_local = val - remaining_mins - 1 delta_idx = bisect_right_i8(info.tdata, new_local, info.ntrans) delta_idx = delta_idx - delta_idx_offset result[i] = new_local - info.deltas[delta_idx] elif fill_nonexist: result[i] = NPY_NAT else: stamp = _render_tstamp(val, reso=reso) raise pytz.NonExistentTimeError(stamp) return result.base # .base to get underlying ndarray cdef inline Py_ssize_t bisect_right_i8(int64_t *data, int64_t val, Py_ssize_t n): # Caller is responsible for checking n > 0 # This looks very similar to local_search_right in the ndarray.searchsorted # implementation. cdef: Py_ssize_t pivot, left = 0, right = n # edge cases if val > data[n - 1]: return n # Caller is responsible for ensuring 'val >= data[0]'. This is # ensured by the fact that 'data' comes from get_dst_info where data[0] # is *always* NPY_NAT+1. If that ever changes, we will need to restore # the following disabled check. # if val < data[0]: # return 0 while left < right: pivot = left + (right - left) // 2 if data[pivot] <= val: left = pivot + 1 else: right = pivot return left cdef inline str _render_tstamp(int64_t val, NPY_DATETIMEUNIT reso): """ Helper function to render exception messages""" from pandas._libs.tslibs.timestamps import Timestamp ts = Timestamp._from_value_and_reso(val, reso, None) return str(ts) cdef _get_utc_bounds( ndarray vals, int64_t* tdata, Py_ssize_t ntrans, const int64_t[::1] deltas, NPY_DATETIMEUNIT reso, ): # Determine whether each date lies left of the DST transition (store in # result_a) or right of the DST transition (store in result_b) cdef: ndarray result_a, result_b Py_ssize_t i, n = vals.size int64_t val, v_left, v_right Py_ssize_t isl, isr, pos_left, pos_right int64_t ppd = periods_per_day(reso) result_a = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0) result_b = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0) for i in range(n): # This loops resembles the "Find the two best possibilities" block # in pytz's DstTZInfo.localize method. result_a[i] = NPY_NAT result_b[i] = NPY_NAT val = vals[i] if val == NPY_NAT: continue # TODO: be careful of overflow in val-ppd isl = bisect_right_i8(tdata, val - ppd, ntrans) - 1 if isl < 0: isl = 0 v_left = val - deltas[isl] pos_left = bisect_right_i8(tdata, v_left, ntrans) - 1 # timestamp falls to the left side of the DST transition if v_left + deltas[pos_left] == val: result_a[i] = v_left # TODO: be careful of overflow in val+ppd isr = bisect_right_i8(tdata, val + ppd, ntrans) - 1 if isr < 0: isr = 0 v_right = val - deltas[isr] pos_right = bisect_right_i8(tdata, v_right, ntrans) - 1 # timestamp falls to the right side of the DST transition if v_right + deltas[pos_right] == val: result_b[i] = v_right return result_a, result_b @cython.boundscheck(False) cdef ndarray[int64_t] _get_dst_hours( # vals, reso only needed here to potential render an exception message const int64_t[:] vals, ndarray[int64_t] result_a, ndarray[int64_t] result_b, NPY_DATETIMEUNIT reso, ): cdef: Py_ssize_t i, n = vals.shape[0] ndarray[uint8_t, cast=True] mismatch ndarray[int64_t] delta, dst_hours ndarray[intp_t] switch_idxs, trans_idx, grp, a_idx, b_idx, one_diff list trans_grp intp_t switch_idx int64_t left, right dst_hours = cnp.PyArray_EMPTY(result_a.ndim, result_a.shape, cnp.NPY_INT64, 0) dst_hours[:] = NPY_NAT mismatch = cnp.PyArray_ZEROS(result_a.ndim, result_a.shape, cnp.NPY_BOOL, 0) for i in range(n): left = result_a[i] right = result_b[i] # Get the ambiguous hours (given the above, these are the hours # where result_a != result_b and neither of them are NAT) if left != right and left != NPY_NAT and right != NPY_NAT: mismatch[i] = 1 trans_idx = mismatch.nonzero()[0] if trans_idx.size == 1: # see test_tz_localize_to_utc_ambiguous_infer stamp = _render_tstamp(vals[trans_idx[0]], reso=reso) raise pytz.AmbiguousTimeError( f"Cannot infer dst time from {stamp} as there " "are no repeated times" ) # Split the array into contiguous chunks (where the difference between # indices is 1). These are effectively dst transitions in different # years which is useful for checking that there is not an ambiguous # transition in an individual year. if trans_idx.size > 0: one_diff = np.where(np.diff(trans_idx) != 1)[0] + 1 trans_grp = np.array_split(trans_idx, one_diff) # Iterate through each day, if there are no hours where the # delta is negative (indicates a repeat of hour) the switch # cannot be inferred for grp in trans_grp: delta = np.diff(result_a[grp]) if grp.size == 1 or np.all(delta > 0): # see test_tz_localize_to_utc_ambiguous_infer stamp = _render_tstamp(vals[grp[0]], reso=reso) raise pytz.AmbiguousTimeError(stamp) # Find the index for the switch and pull from a for dst and b # for standard switch_idxs = (delta <= 0).nonzero()[0] if switch_idxs.size > 1: # see test_tz_localize_to_utc_ambiguous_infer raise pytz.AmbiguousTimeError( f"There are {switch_idxs.size} dst switches when " "there should only be 1." ) switch_idx = switch_idxs[0] + 1 # Pull the only index and adjust a_idx = grp[:switch_idx] b_idx = grp[switch_idx:] dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx])) return dst_hours # ---------------------------------------------------------------------- # Timezone Conversion cpdef int64_t tz_convert_from_utc_single( int64_t utc_val, tzinfo tz, NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns ) except? -1: """ Convert the val (in i8) from UTC to tz This is a single value version of tz_convert_from_utc. Parameters ---------- utc_val : int64 tz : tzinfo reso : NPY_DATETIMEUNIT, default NPY_FR_ns Returns ------- converted: int64 """ cdef: Localizer info = Localizer(tz, reso=reso) Py_ssize_t pos # Note: caller is responsible for ensuring utc_val != NPY_NAT return info.utc_val_to_local_val(utc_val, &pos) # OSError may be thrown by tzlocal on windows at or close to 1970-01-01 # see https://github.com/pandas-dev/pandas/pull/37591#issuecomment-720628241 cdef int64_t _tz_localize_using_tzinfo_api( int64_t val, tzinfo tz, bint to_utc=True, NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns, bint* fold=NULL, ) except? -1: """ Convert the i8 representation of a datetime from a general-case timezone to UTC, or vice-versa using the datetime/tzinfo API. Private, not intended for use outside of tslibs.tzconversion. Parameters ---------- val : int64_t tz : tzinfo to_utc : bint True if converting _to_ UTC, False if going the other direction. reso : NPY_DATETIMEUNIT fold : bint*, default NULL pointer to fold: whether datetime ends up in a fold or not after adjustment. Only passed with to_utc=False. Returns ------- delta : int64_t Value to add when converting from utc, subtract when converting to utc. Notes ----- Sets fold by pointer """ cdef: npy_datetimestruct dts datetime dt int64_t delta timedelta td int64_t pps = periods_per_second(reso) pandas_datetime_to_datetimestruct(val, reso, &dts) # datetime_new is cython-optimized constructor if not to_utc: # tz.utcoffset only makes sense if datetime # is _wall time_, so if val is a UTC timestamp convert to wall time dt = _astimezone(dts, tz) if fold is not NULL: # NB: fold is only passed with to_utc=False fold[0] = dt.fold else: dt = datetime_new(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, None) td = tz.utcoffset(dt) delta = int(td.total_seconds() * pps) return delta cdef datetime _astimezone(npy_datetimestruct dts, tzinfo tz): """ Optimized equivalent to: dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, utc_pytz) dt = dt.astimezone(tz) Derived from the datetime.astimezone implementation at https://github.com/python/cpython/blob/main/Modules/_datetimemodule.c#L6187 NB: we are assuming tz is not None. """ cdef: datetime result result = datetime_new(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz) return tz.fromutc(result) # NB: relies on dateutil internals, subject to change. @cython.boundscheck(False) @cython.wraparound(False) cdef bint _infer_dateutil_fold( int64_t value, const int64_t[::1] trans, const int64_t[::1] deltas, Py_ssize_t pos, ): """ Infer _TSObject fold property from value by assuming 0 and then setting to 1 if necessary. Parameters ---------- value : int64_t trans : ndarray[int64_t] ndarray of offset transition points in nanoseconds since epoch. deltas : int64_t[:] array of offsets corresponding to transition points in trans. pos : Py_ssize_t Position of the last transition point before taking fold into account. Returns ------- bint Due to daylight saving time, one wall clock time can occur twice when shifting from summer to winter time; fold describes whether the datetime-like corresponds to the first (0) or the second time (1) the wall clock hits the ambiguous time References ---------- .. [1] "PEP 495 - Local Time Disambiguation" https://www.python.org/dev/peps/pep-0495/#the-fold-attribute """ cdef: bint fold = 0 int64_t fold_delta if pos > 0: fold_delta = deltas[pos - 1] - deltas[pos] if value - fold_delta < trans[pos]: fold = 1 return fold