726 lines
23 KiB
Cython
726 lines
23 KiB
Cython
|
"""
|
||
|
timezone conversion
|
||
|
"""
|
||
|
cimport cython
|
||
|
from cpython.datetime cimport (
|
||
|
PyDelta_Check,
|
||
|
datetime,
|
||
|
datetime_new,
|
||
|
import_datetime,
|
||
|
timedelta,
|
||
|
tzinfo,
|
||
|
)
|
||
|
from cython cimport Py_ssize_t
|
||
|
|
||
|
import_datetime()
|
||
|
|
||
|
import numpy as np
|
||
|
import pytz
|
||
|
|
||
|
cimport numpy as cnp
|
||
|
from numpy cimport (
|
||
|
int64_t,
|
||
|
intp_t,
|
||
|
ndarray,
|
||
|
uint8_t,
|
||
|
)
|
||
|
|
||
|
cnp.import_array()
|
||
|
|
||
|
from pandas._libs.tslibs.dtypes cimport (
|
||
|
periods_per_day,
|
||
|
periods_per_second,
|
||
|
)
|
||
|
from pandas._libs.tslibs.nattype cimport NPY_NAT
|
||
|
from pandas._libs.tslibs.np_datetime cimport (
|
||
|
NPY_DATETIMEUNIT,
|
||
|
npy_datetimestruct,
|
||
|
pandas_datetime_to_datetimestruct,
|
||
|
)
|
||
|
from pandas._libs.tslibs.timezones cimport (
|
||
|
get_dst_info,
|
||
|
is_fixed_offset,
|
||
|
is_tzlocal,
|
||
|
is_utc,
|
||
|
is_zoneinfo,
|
||
|
utc_pytz,
|
||
|
)
|
||
|
|
||
|
|
||
|
cdef const int64_t[::1] _deltas_placeholder = np.array([], dtype=np.int64)
|
||
|
|
||
|
|
||
|
@cython.freelist(16)
|
||
|
@cython.final
|
||
|
cdef class Localizer:
|
||
|
# cdef:
|
||
|
# tzinfo tz
|
||
|
# NPY_DATETIMEUNIT _reso
|
||
|
# bint use_utc, use_fixed, use_tzlocal, use_dst, use_pytz
|
||
|
# ndarray trans
|
||
|
# Py_ssize_t ntrans
|
||
|
# const int64_t[::1] deltas
|
||
|
# int64_t delta
|
||
|
# int64_t* tdata
|
||
|
|
||
|
@cython.initializedcheck(False)
|
||
|
@cython.boundscheck(False)
|
||
|
def __cinit__(self, tzinfo tz, NPY_DATETIMEUNIT reso):
|
||
|
self.tz = tz
|
||
|
self._reso = reso
|
||
|
self.use_utc = self.use_tzlocal = self.use_fixed = False
|
||
|
self.use_dst = self.use_pytz = False
|
||
|
self.ntrans = -1 # placeholder
|
||
|
self.delta = -1 # placeholder
|
||
|
self.deltas = _deltas_placeholder
|
||
|
self.tdata = NULL
|
||
|
|
||
|
if is_utc(tz) or tz is None:
|
||
|
self.use_utc = True
|
||
|
|
||
|
elif is_tzlocal(tz) or is_zoneinfo(tz):
|
||
|
self.use_tzlocal = True
|
||
|
|
||
|
else:
|
||
|
trans, deltas, typ = get_dst_info(tz)
|
||
|
if reso != NPY_DATETIMEUNIT.NPY_FR_ns:
|
||
|
# NB: using floordiv here is implicitly assuming we will
|
||
|
# never see trans or deltas that are not an integer number
|
||
|
# of seconds.
|
||
|
# TODO: avoid these np.array calls
|
||
|
if reso == NPY_DATETIMEUNIT.NPY_FR_us:
|
||
|
trans = np.array(trans) // 1_000
|
||
|
deltas = np.array(deltas) // 1_000
|
||
|
elif reso == NPY_DATETIMEUNIT.NPY_FR_ms:
|
||
|
trans = np.array(trans) // 1_000_000
|
||
|
deltas = np.array(deltas) // 1_000_000
|
||
|
elif reso == NPY_DATETIMEUNIT.NPY_FR_s:
|
||
|
trans = np.array(trans) // 1_000_000_000
|
||
|
deltas = np.array(deltas) // 1_000_000_000
|
||
|
else:
|
||
|
raise NotImplementedError(reso)
|
||
|
|
||
|
self.trans = trans
|
||
|
self.ntrans = self.trans.shape[0]
|
||
|
self.deltas = deltas
|
||
|
|
||
|
if typ != "pytz" and typ != "dateutil":
|
||
|
# static/fixed; in this case we know that len(delta) == 1
|
||
|
self.use_fixed = True
|
||
|
self.delta = deltas[0]
|
||
|
else:
|
||
|
self.use_dst = True
|
||
|
if typ == "pytz":
|
||
|
self.use_pytz = True
|
||
|
self.tdata = <int64_t*>cnp.PyArray_DATA(trans)
|
||
|
|
||
|
@cython.boundscheck(False)
|
||
|
cdef inline int64_t utc_val_to_local_val(
|
||
|
self, int64_t utc_val, Py_ssize_t* pos, bint* fold=NULL
|
||
|
) except? -1:
|
||
|
if self.use_utc:
|
||
|
return utc_val
|
||
|
elif self.use_tzlocal:
|
||
|
return utc_val + _tz_localize_using_tzinfo_api(
|
||
|
utc_val, self.tz, to_utc=False, reso=self._reso, fold=fold
|
||
|
)
|
||
|
elif self.use_fixed:
|
||
|
return utc_val + self.delta
|
||
|
else:
|
||
|
pos[0] = bisect_right_i8(self.tdata, utc_val, self.ntrans) - 1
|
||
|
if fold is not NULL:
|
||
|
fold[0] = _infer_dateutil_fold(
|
||
|
utc_val, self.trans, self.deltas, pos[0]
|
||
|
)
|
||
|
|
||
|
return utc_val + self.deltas[pos[0]]
|
||
|
|
||
|
|
||
|
cdef int64_t tz_localize_to_utc_single(
|
||
|
int64_t val,
|
||
|
tzinfo tz,
|
||
|
object ambiguous=None,
|
||
|
object nonexistent=None,
|
||
|
NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns,
|
||
|
) except? -1:
|
||
|
"""See tz_localize_to_utc.__doc__"""
|
||
|
cdef:
|
||
|
int64_t delta
|
||
|
int64_t[::1] deltas
|
||
|
|
||
|
if val == NPY_NAT:
|
||
|
return val
|
||
|
|
||
|
elif is_utc(tz) or tz is None:
|
||
|
# TODO: test with non-nano
|
||
|
return val
|
||
|
|
||
|
elif is_tzlocal(tz) or is_zoneinfo(tz):
|
||
|
return val - _tz_localize_using_tzinfo_api(val, tz, to_utc=True, reso=reso)
|
||
|
|
||
|
elif is_fixed_offset(tz):
|
||
|
_, deltas, _ = get_dst_info(tz)
|
||
|
delta = deltas[0]
|
||
|
# TODO: de-duplicate with Localizer.__init__
|
||
|
if reso != NPY_DATETIMEUNIT.NPY_FR_ns:
|
||
|
if reso == NPY_DATETIMEUNIT.NPY_FR_us:
|
||
|
delta = delta // 1000
|
||
|
elif reso == NPY_DATETIMEUNIT.NPY_FR_ms:
|
||
|
delta = delta // 1_000_000
|
||
|
elif reso == NPY_DATETIMEUNIT.NPY_FR_s:
|
||
|
delta = delta // 1_000_000_000
|
||
|
|
||
|
return val - delta
|
||
|
|
||
|
else:
|
||
|
return tz_localize_to_utc(
|
||
|
np.array([val], dtype="i8"),
|
||
|
tz,
|
||
|
ambiguous=ambiguous,
|
||
|
nonexistent=nonexistent,
|
||
|
reso=reso,
|
||
|
)[0]
|
||
|
|
||
|
|
||
|
@cython.boundscheck(False)
|
||
|
@cython.wraparound(False)
|
||
|
def tz_localize_to_utc(
|
||
|
ndarray[int64_t] vals,
|
||
|
tzinfo tz,
|
||
|
object ambiguous=None,
|
||
|
object nonexistent=None,
|
||
|
NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns,
|
||
|
):
|
||
|
"""
|
||
|
Localize tzinfo-naive i8 to given time zone (using pytz). If
|
||
|
there are ambiguities in the values, raise AmbiguousTimeError.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
vals : ndarray[int64_t]
|
||
|
tz : tzinfo or None
|
||
|
ambiguous : str, bool, or arraylike
|
||
|
When clocks moved backward due to DST, ambiguous times may arise.
|
||
|
For example in Central European Time (UTC+01), when going from 03:00
|
||
|
DST to 02:00 non-DST, 02:30:00 local time occurs both at 00:30:00 UTC
|
||
|
and at 01:30:00 UTC. In such a situation, the `ambiguous` parameter
|
||
|
dictates how ambiguous times should be handled.
|
||
|
|
||
|
- 'infer' will attempt to infer fall dst-transition hours based on
|
||
|
order
|
||
|
- bool-ndarray where True signifies a DST time, False signifies a
|
||
|
non-DST time (note that this flag is only applicable for ambiguous
|
||
|
times, but the array must have the same length as vals)
|
||
|
- bool if True, treat all vals as DST. If False, treat them as non-DST
|
||
|
- 'NaT' will return NaT where there are ambiguous times
|
||
|
|
||
|
nonexistent : {None, "NaT", "shift_forward", "shift_backward", "raise", \
|
||
|
timedelta-like}
|
||
|
How to handle non-existent times when converting wall times to UTC
|
||
|
reso : NPY_DATETIMEUNIT, default NPY_FR_ns
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
localized : ndarray[int64_t]
|
||
|
"""
|
||
|
cdef:
|
||
|
ndarray[uint8_t, cast=True] ambiguous_array
|
||
|
Py_ssize_t i, idx, pos, n = vals.shape[0]
|
||
|
Py_ssize_t delta_idx_offset, delta_idx, pos_left, pos_right
|
||
|
int64_t v, left, right, val, new_local, remaining_mins
|
||
|
int64_t first_delta, delta
|
||
|
int64_t shift_delta = 0
|
||
|
ndarray[int64_t] result_a, result_b, dst_hours
|
||
|
int64_t[::1] result
|
||
|
npy_datetimestruct dts
|
||
|
bint infer_dst = False, is_dst = False, fill = False
|
||
|
bint shift_forward = False, shift_backward = False
|
||
|
bint fill_nonexist = False
|
||
|
str stamp
|
||
|
Localizer info = Localizer(tz, reso=reso)
|
||
|
int64_t pph = periods_per_day(reso) // 24
|
||
|
|
||
|
# Vectorized version of DstTzInfo.localize
|
||
|
if info.use_utc:
|
||
|
return vals.copy()
|
||
|
|
||
|
result = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0)
|
||
|
|
||
|
if info.use_tzlocal:
|
||
|
for i in range(n):
|
||
|
v = vals[i]
|
||
|
if v == NPY_NAT:
|
||
|
result[i] = NPY_NAT
|
||
|
else:
|
||
|
result[i] = v - _tz_localize_using_tzinfo_api(
|
||
|
v, tz, to_utc=True, reso=reso
|
||
|
)
|
||
|
return result.base # to return underlying ndarray
|
||
|
|
||
|
elif info.use_fixed:
|
||
|
delta = info.delta
|
||
|
for i in range(n):
|
||
|
v = vals[i]
|
||
|
if v == NPY_NAT:
|
||
|
result[i] = NPY_NAT
|
||
|
else:
|
||
|
result[i] = v - delta
|
||
|
return result.base # to return underlying ndarray
|
||
|
|
||
|
# silence false-positive compiler warning
|
||
|
ambiguous_array = np.empty(0, dtype=bool)
|
||
|
if isinstance(ambiguous, str):
|
||
|
if ambiguous == 'infer':
|
||
|
infer_dst = True
|
||
|
elif ambiguous == 'NaT':
|
||
|
fill = True
|
||
|
elif isinstance(ambiguous, bool):
|
||
|
is_dst = True
|
||
|
if ambiguous:
|
||
|
ambiguous_array = np.ones(len(vals), dtype=bool)
|
||
|
else:
|
||
|
ambiguous_array = np.zeros(len(vals), dtype=bool)
|
||
|
elif hasattr(ambiguous, '__iter__'):
|
||
|
is_dst = True
|
||
|
if len(ambiguous) != len(vals):
|
||
|
raise ValueError("Length of ambiguous bool-array must be "
|
||
|
"the same size as vals")
|
||
|
ambiguous_array = np.asarray(ambiguous, dtype=bool)
|
||
|
|
||
|
if nonexistent == 'NaT':
|
||
|
fill_nonexist = True
|
||
|
elif nonexistent == 'shift_forward':
|
||
|
shift_forward = True
|
||
|
elif nonexistent == 'shift_backward':
|
||
|
shift_backward = True
|
||
|
elif PyDelta_Check(nonexistent):
|
||
|
from .timedeltas import delta_to_nanoseconds
|
||
|
shift_delta = delta_to_nanoseconds(nonexistent, reso=reso)
|
||
|
elif nonexistent not in ('raise', None):
|
||
|
msg = ("nonexistent must be one of {'NaT', 'raise', 'shift_forward', "
|
||
|
"shift_backwards} or a timedelta object")
|
||
|
raise ValueError(msg)
|
||
|
|
||
|
# Determine whether each date lies left of the DST transition (store in
|
||
|
# result_a) or right of the DST transition (store in result_b)
|
||
|
result_a, result_b =_get_utc_bounds(
|
||
|
vals, info.tdata, info.ntrans, info.deltas, reso=reso
|
||
|
)
|
||
|
|
||
|
# silence false-positive compiler warning
|
||
|
dst_hours = np.empty(0, dtype=np.int64)
|
||
|
if infer_dst:
|
||
|
dst_hours = _get_dst_hours(vals, result_a, result_b, reso=reso)
|
||
|
|
||
|
# Pre-compute delta_idx_offset that will be used if we go down non-existent
|
||
|
# paths.
|
||
|
# Shift the delta_idx by if the UTC offset of
|
||
|
# the target tz is greater than 0 and we're moving forward
|
||
|
# or vice versa
|
||
|
first_delta = info.deltas[0]
|
||
|
if (shift_forward or shift_delta > 0) and first_delta > 0:
|
||
|
delta_idx_offset = 1
|
||
|
elif (shift_backward or shift_delta < 0) and first_delta < 0:
|
||
|
delta_idx_offset = 1
|
||
|
else:
|
||
|
delta_idx_offset = 0
|
||
|
|
||
|
for i in range(n):
|
||
|
val = vals[i]
|
||
|
left = result_a[i]
|
||
|
right = result_b[i]
|
||
|
if val == NPY_NAT:
|
||
|
# TODO: test with non-nano
|
||
|
result[i] = val
|
||
|
elif left != NPY_NAT and right != NPY_NAT:
|
||
|
if left == right:
|
||
|
# TODO: test with non-nano
|
||
|
result[i] = left
|
||
|
else:
|
||
|
if infer_dst and dst_hours[i] != NPY_NAT:
|
||
|
# TODO: test with non-nano
|
||
|
result[i] = dst_hours[i]
|
||
|
elif is_dst:
|
||
|
if ambiguous_array[i]:
|
||
|
result[i] = left
|
||
|
else:
|
||
|
result[i] = right
|
||
|
elif fill:
|
||
|
# TODO: test with non-nano; parametrize test_dt_round_tz_ambiguous
|
||
|
result[i] = NPY_NAT
|
||
|
else:
|
||
|
stamp = _render_tstamp(val, reso=reso)
|
||
|
raise pytz.AmbiguousTimeError(
|
||
|
f"Cannot infer dst time from {stamp}, try using the "
|
||
|
"'ambiguous' argument"
|
||
|
)
|
||
|
elif left != NPY_NAT:
|
||
|
result[i] = left
|
||
|
elif right != NPY_NAT:
|
||
|
# TODO: test with non-nano
|
||
|
result[i] = right
|
||
|
else:
|
||
|
# Handle nonexistent times
|
||
|
if shift_forward or shift_backward or shift_delta != 0:
|
||
|
# Shift the nonexistent time to the closest existing time
|
||
|
remaining_mins = val % pph
|
||
|
if shift_delta != 0:
|
||
|
# Validate that we don't relocalize on another nonexistent
|
||
|
# time
|
||
|
if -1 < shift_delta + remaining_mins < pph:
|
||
|
raise ValueError(
|
||
|
"The provided timedelta will relocalize on a "
|
||
|
f"nonexistent time: {nonexistent}"
|
||
|
)
|
||
|
new_local = val + shift_delta
|
||
|
elif shift_forward:
|
||
|
new_local = val + (pph - remaining_mins)
|
||
|
else:
|
||
|
# Subtract 1 since the beginning hour is _inclusive_ of
|
||
|
# nonexistent times
|
||
|
new_local = val - remaining_mins - 1
|
||
|
|
||
|
delta_idx = bisect_right_i8(info.tdata, new_local, info.ntrans)
|
||
|
|
||
|
delta_idx = delta_idx - delta_idx_offset
|
||
|
result[i] = new_local - info.deltas[delta_idx]
|
||
|
elif fill_nonexist:
|
||
|
result[i] = NPY_NAT
|
||
|
else:
|
||
|
stamp = _render_tstamp(val, reso=reso)
|
||
|
raise pytz.NonExistentTimeError(stamp)
|
||
|
|
||
|
return result.base # .base to get underlying ndarray
|
||
|
|
||
|
|
||
|
cdef inline Py_ssize_t bisect_right_i8(int64_t *data,
|
||
|
int64_t val, Py_ssize_t n):
|
||
|
# Caller is responsible for checking n > 0
|
||
|
# This looks very similar to local_search_right in the ndarray.searchsorted
|
||
|
# implementation.
|
||
|
cdef:
|
||
|
Py_ssize_t pivot, left = 0, right = n
|
||
|
|
||
|
# edge cases
|
||
|
if val > data[n - 1]:
|
||
|
return n
|
||
|
|
||
|
# Caller is responsible for ensuring 'val >= data[0]'. This is
|
||
|
# ensured by the fact that 'data' comes from get_dst_info where data[0]
|
||
|
# is *always* NPY_NAT+1. If that ever changes, we will need to restore
|
||
|
# the following disabled check.
|
||
|
# if val < data[0]:
|
||
|
# return 0
|
||
|
|
||
|
while left < right:
|
||
|
pivot = left + (right - left) // 2
|
||
|
|
||
|
if data[pivot] <= val:
|
||
|
left = pivot + 1
|
||
|
else:
|
||
|
right = pivot
|
||
|
|
||
|
return left
|
||
|
|
||
|
|
||
|
cdef inline str _render_tstamp(int64_t val, NPY_DATETIMEUNIT reso):
|
||
|
""" Helper function to render exception messages"""
|
||
|
from pandas._libs.tslibs.timestamps import Timestamp
|
||
|
ts = Timestamp._from_value_and_reso(val, reso, None)
|
||
|
return str(ts)
|
||
|
|
||
|
|
||
|
cdef _get_utc_bounds(
|
||
|
ndarray vals,
|
||
|
int64_t* tdata,
|
||
|
Py_ssize_t ntrans,
|
||
|
const int64_t[::1] deltas,
|
||
|
NPY_DATETIMEUNIT reso,
|
||
|
):
|
||
|
# Determine whether each date lies left of the DST transition (store in
|
||
|
# result_a) or right of the DST transition (store in result_b)
|
||
|
|
||
|
cdef:
|
||
|
ndarray result_a, result_b
|
||
|
Py_ssize_t i, n = vals.size
|
||
|
int64_t val, v_left, v_right
|
||
|
Py_ssize_t isl, isr, pos_left, pos_right
|
||
|
int64_t ppd = periods_per_day(reso)
|
||
|
|
||
|
result_a = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0)
|
||
|
result_b = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0)
|
||
|
|
||
|
for i in range(n):
|
||
|
# This loops resembles the "Find the two best possibilities" block
|
||
|
# in pytz's DstTZInfo.localize method.
|
||
|
result_a[i] = NPY_NAT
|
||
|
result_b[i] = NPY_NAT
|
||
|
|
||
|
val = vals[i]
|
||
|
if val == NPY_NAT:
|
||
|
continue
|
||
|
|
||
|
# TODO: be careful of overflow in val-ppd
|
||
|
isl = bisect_right_i8(tdata, val - ppd, ntrans) - 1
|
||
|
if isl < 0:
|
||
|
isl = 0
|
||
|
|
||
|
v_left = val - deltas[isl]
|
||
|
pos_left = bisect_right_i8(tdata, v_left, ntrans) - 1
|
||
|
# timestamp falls to the left side of the DST transition
|
||
|
if v_left + deltas[pos_left] == val:
|
||
|
result_a[i] = v_left
|
||
|
|
||
|
# TODO: be careful of overflow in val+ppd
|
||
|
isr = bisect_right_i8(tdata, val + ppd, ntrans) - 1
|
||
|
if isr < 0:
|
||
|
isr = 0
|
||
|
|
||
|
v_right = val - deltas[isr]
|
||
|
pos_right = bisect_right_i8(tdata, v_right, ntrans) - 1
|
||
|
# timestamp falls to the right side of the DST transition
|
||
|
if v_right + deltas[pos_right] == val:
|
||
|
result_b[i] = v_right
|
||
|
|
||
|
return result_a, result_b
|
||
|
|
||
|
|
||
|
@cython.boundscheck(False)
|
||
|
cdef ndarray[int64_t] _get_dst_hours(
|
||
|
# vals, reso only needed here to potential render an exception message
|
||
|
const int64_t[:] vals,
|
||
|
ndarray[int64_t] result_a,
|
||
|
ndarray[int64_t] result_b,
|
||
|
NPY_DATETIMEUNIT reso,
|
||
|
):
|
||
|
cdef:
|
||
|
Py_ssize_t i, n = vals.shape[0]
|
||
|
ndarray[uint8_t, cast=True] mismatch
|
||
|
ndarray[int64_t] delta, dst_hours
|
||
|
ndarray[intp_t] switch_idxs, trans_idx, grp, a_idx, b_idx, one_diff
|
||
|
list trans_grp
|
||
|
intp_t switch_idx
|
||
|
int64_t left, right
|
||
|
|
||
|
dst_hours = cnp.PyArray_EMPTY(result_a.ndim, result_a.shape, cnp.NPY_INT64, 0)
|
||
|
dst_hours[:] = NPY_NAT
|
||
|
|
||
|
mismatch = cnp.PyArray_ZEROS(result_a.ndim, result_a.shape, cnp.NPY_BOOL, 0)
|
||
|
|
||
|
for i in range(n):
|
||
|
left = result_a[i]
|
||
|
right = result_b[i]
|
||
|
|
||
|
# Get the ambiguous hours (given the above, these are the hours
|
||
|
# where result_a != result_b and neither of them are NAT)
|
||
|
if left != right and left != NPY_NAT and right != NPY_NAT:
|
||
|
mismatch[i] = 1
|
||
|
|
||
|
trans_idx = mismatch.nonzero()[0]
|
||
|
|
||
|
if trans_idx.size == 1:
|
||
|
# see test_tz_localize_to_utc_ambiguous_infer
|
||
|
stamp = _render_tstamp(vals[trans_idx[0]], reso=reso)
|
||
|
raise pytz.AmbiguousTimeError(
|
||
|
f"Cannot infer dst time from {stamp} as there "
|
||
|
"are no repeated times"
|
||
|
)
|
||
|
|
||
|
# Split the array into contiguous chunks (where the difference between
|
||
|
# indices is 1). These are effectively dst transitions in different
|
||
|
# years which is useful for checking that there is not an ambiguous
|
||
|
# transition in an individual year.
|
||
|
if trans_idx.size > 0:
|
||
|
one_diff = np.where(np.diff(trans_idx) != 1)[0] + 1
|
||
|
trans_grp = np.array_split(trans_idx, one_diff)
|
||
|
|
||
|
# Iterate through each day, if there are no hours where the
|
||
|
# delta is negative (indicates a repeat of hour) the switch
|
||
|
# cannot be inferred
|
||
|
for grp in trans_grp:
|
||
|
|
||
|
delta = np.diff(result_a[grp])
|
||
|
if grp.size == 1 or np.all(delta > 0):
|
||
|
# see test_tz_localize_to_utc_ambiguous_infer
|
||
|
stamp = _render_tstamp(vals[grp[0]], reso=reso)
|
||
|
raise pytz.AmbiguousTimeError(stamp)
|
||
|
|
||
|
# Find the index for the switch and pull from a for dst and b
|
||
|
# for standard
|
||
|
switch_idxs = (delta <= 0).nonzero()[0]
|
||
|
if switch_idxs.size > 1:
|
||
|
# see test_tz_localize_to_utc_ambiguous_infer
|
||
|
raise pytz.AmbiguousTimeError(
|
||
|
f"There are {switch_idxs.size} dst switches when "
|
||
|
"there should only be 1."
|
||
|
)
|
||
|
|
||
|
switch_idx = switch_idxs[0] + 1
|
||
|
# Pull the only index and adjust
|
||
|
a_idx = grp[:switch_idx]
|
||
|
b_idx = grp[switch_idx:]
|
||
|
dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx]))
|
||
|
|
||
|
return dst_hours
|
||
|
|
||
|
|
||
|
# ----------------------------------------------------------------------
|
||
|
# Timezone Conversion
|
||
|
|
||
|
cpdef int64_t tz_convert_from_utc_single(
|
||
|
int64_t utc_val, tzinfo tz, NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns
|
||
|
) except? -1:
|
||
|
"""
|
||
|
Convert the val (in i8) from UTC to tz
|
||
|
|
||
|
This is a single value version of tz_convert_from_utc.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
utc_val : int64
|
||
|
tz : tzinfo
|
||
|
reso : NPY_DATETIMEUNIT, default NPY_FR_ns
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
converted: int64
|
||
|
"""
|
||
|
cdef:
|
||
|
Localizer info = Localizer(tz, reso=reso)
|
||
|
Py_ssize_t pos
|
||
|
|
||
|
# Note: caller is responsible for ensuring utc_val != NPY_NAT
|
||
|
return info.utc_val_to_local_val(utc_val, &pos)
|
||
|
|
||
|
|
||
|
# OSError may be thrown by tzlocal on windows at or close to 1970-01-01
|
||
|
# see https://github.com/pandas-dev/pandas/pull/37591#issuecomment-720628241
|
||
|
cdef int64_t _tz_localize_using_tzinfo_api(
|
||
|
int64_t val,
|
||
|
tzinfo tz,
|
||
|
bint to_utc=True,
|
||
|
NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns,
|
||
|
bint* fold=NULL,
|
||
|
) except? -1:
|
||
|
"""
|
||
|
Convert the i8 representation of a datetime from a general-case timezone to
|
||
|
UTC, or vice-versa using the datetime/tzinfo API.
|
||
|
|
||
|
Private, not intended for use outside of tslibs.tzconversion.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
val : int64_t
|
||
|
tz : tzinfo
|
||
|
to_utc : bint
|
||
|
True if converting _to_ UTC, False if going the other direction.
|
||
|
reso : NPY_DATETIMEUNIT
|
||
|
fold : bint*, default NULL
|
||
|
pointer to fold: whether datetime ends up in a fold or not
|
||
|
after adjustment.
|
||
|
Only passed with to_utc=False.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
delta : int64_t
|
||
|
Value to add when converting from utc, subtract when converting to utc.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
Sets fold by pointer
|
||
|
"""
|
||
|
cdef:
|
||
|
npy_datetimestruct dts
|
||
|
datetime dt
|
||
|
int64_t delta
|
||
|
timedelta td
|
||
|
int64_t pps = periods_per_second(reso)
|
||
|
|
||
|
pandas_datetime_to_datetimestruct(val, reso, &dts)
|
||
|
|
||
|
# datetime_new is cython-optimized constructor
|
||
|
if not to_utc:
|
||
|
# tz.utcoffset only makes sense if datetime
|
||
|
# is _wall time_, so if val is a UTC timestamp convert to wall time
|
||
|
dt = _astimezone(dts, tz)
|
||
|
|
||
|
if fold is not NULL:
|
||
|
# NB: fold is only passed with to_utc=False
|
||
|
fold[0] = dt.fold
|
||
|
else:
|
||
|
dt = datetime_new(dts.year, dts.month, dts.day, dts.hour,
|
||
|
dts.min, dts.sec, dts.us, None)
|
||
|
|
||
|
td = tz.utcoffset(dt)
|
||
|
delta = int(td.total_seconds() * pps)
|
||
|
return delta
|
||
|
|
||
|
|
||
|
cdef datetime _astimezone(npy_datetimestruct dts, tzinfo tz):
|
||
|
"""
|
||
|
Optimized equivalent to:
|
||
|
|
||
|
dt = datetime(dts.year, dts.month, dts.day, dts.hour,
|
||
|
dts.min, dts.sec, dts.us, utc_pytz)
|
||
|
dt = dt.astimezone(tz)
|
||
|
|
||
|
Derived from the datetime.astimezone implementation at
|
||
|
https://github.com/python/cpython/blob/main/Modules/_datetimemodule.c#L6187
|
||
|
|
||
|
NB: we are assuming tz is not None.
|
||
|
"""
|
||
|
cdef:
|
||
|
datetime result
|
||
|
|
||
|
result = datetime_new(dts.year, dts.month, dts.day, dts.hour,
|
||
|
dts.min, dts.sec, dts.us, tz)
|
||
|
return tz.fromutc(result)
|
||
|
|
||
|
|
||
|
# NB: relies on dateutil internals, subject to change.
|
||
|
@cython.boundscheck(False)
|
||
|
@cython.wraparound(False)
|
||
|
cdef bint _infer_dateutil_fold(
|
||
|
int64_t value,
|
||
|
const int64_t[::1] trans,
|
||
|
const int64_t[::1] deltas,
|
||
|
Py_ssize_t pos,
|
||
|
):
|
||
|
"""
|
||
|
Infer _TSObject fold property from value by assuming 0 and then setting
|
||
|
to 1 if necessary.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
value : int64_t
|
||
|
trans : ndarray[int64_t]
|
||
|
ndarray of offset transition points in nanoseconds since epoch.
|
||
|
deltas : int64_t[:]
|
||
|
array of offsets corresponding to transition points in trans.
|
||
|
pos : Py_ssize_t
|
||
|
Position of the last transition point before taking fold into account.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
bint
|
||
|
Due to daylight saving time, one wall clock time can occur twice
|
||
|
when shifting from summer to winter time; fold describes whether the
|
||
|
datetime-like corresponds to the first (0) or the second time (1)
|
||
|
the wall clock hits the ambiguous time
|
||
|
|
||
|
References
|
||
|
----------
|
||
|
.. [1] "PEP 495 - Local Time Disambiguation"
|
||
|
https://www.python.org/dev/peps/pep-0495/#the-fold-attribute
|
||
|
"""
|
||
|
cdef:
|
||
|
bint fold = 0
|
||
|
int64_t fold_delta
|
||
|
|
||
|
if pos > 0:
|
||
|
fold_delta = deltas[pos - 1] - deltas[pos]
|
||
|
if value - fold_delta < trans[pos]:
|
||
|
fold = 1
|
||
|
|
||
|
return fold
|