You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
358 lines
14 KiB
358 lines
14 KiB
From f8bd08e9c2fc6365980f41b846bbae4b40f08b83 Mon Sep 17 00:00:00 2001
|
|
From: Jeff Reback <jeff@reback.net>
|
|
Date: Sat, 12 Nov 2016 10:58:54 -0500
|
|
Subject: [PATCH] BUG: segfault manifesting with dateutil=2.6 w.r.t. replace
|
|
when timezones are present
|
|
|
|
closes #14621
|
|
|
|
Author: Jeff Reback <jeff@reback.net>
|
|
|
|
Closes #14631 from jreback/replace and squashes the following commits:
|
|
|
|
3f95042 [Jeff Reback] BUG: segfault manifesting with dateutil=2.6 w.r.t. replace when timezones are present
|
|
---
|
|
ci/requirements-3.5_OSX.pip | 2 +-
|
|
doc/source/whatsnew/v0.19.2.txt | 3 ++
|
|
pandas/tseries/offsets.py | 1 +
|
|
pandas/tseries/tests/test_offsets.py | 20 ++++---
|
|
pandas/tseries/tests/test_timezones.py | 89 +++++++++++++++++++++++++++++--
|
|
pandas/tseries/tests/test_tslib.py | 5 +-
|
|
pandas/tslib.pyx | 95 ++++++++++++++++++++++++++++------
|
|
7 files changed, 188 insertions(+), 27 deletions(-)
|
|
|
|
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
|
|
index 051cc8aa4..2e3852a7e 100644
|
|
--- a/pandas/tseries/offsets.py
|
|
+++ b/pandas/tseries/offsets.py
|
|
@@ -68,6 +68,7 @@ def apply_wraps(func):
|
|
other = other.tz_localize(None)
|
|
|
|
result = func(self, other)
|
|
+
|
|
if self._adjust_dst:
|
|
result = tslib._localize_pydatetime(result, tz)
|
|
|
|
diff --git a/pandas/tseries/tests/test_offsets.py b/pandas/tseries/tests/test_offsets.py
|
|
index 1735ac4e2..768e9212e 100644
|
|
--- a/pandas/tseries/tests/test_offsets.py
|
|
+++ b/pandas/tseries/tests/test_offsets.py
|
|
@@ -1,4 +1,5 @@
|
|
import os
|
|
+from distutils.version import LooseVersion
|
|
from datetime import date, datetime, timedelta
|
|
from dateutil.relativedelta import relativedelta
|
|
from pandas.compat import range, iteritems
|
|
@@ -4851,6 +4852,7 @@ class TestDST(tm.TestCase):
|
|
|
|
def _test_offset(self, offset_name, offset_n, tstart, expected_utc_offset):
|
|
offset = DateOffset(**{offset_name: offset_n})
|
|
+
|
|
t = tstart + offset
|
|
if expected_utc_offset is not None:
|
|
self.assertTrue(get_utc_offset_hours(t) == expected_utc_offset)
|
|
@@ -4890,17 +4892,23 @@ class TestDST(tm.TestCase):
|
|
return Timestamp(string + offset_string).tz_convert(tz)
|
|
|
|
def test_fallback_plural(self):
|
|
- """test moving from daylight savings to standard time"""
|
|
+ # test moving from daylight savings to standard time
|
|
+ import dateutil
|
|
for tz, utc_offsets in self.timezone_utc_offsets.items():
|
|
hrs_pre = utc_offsets['utc_offset_daylight']
|
|
hrs_post = utc_offsets['utc_offset_standard']
|
|
- self._test_all_offsets(
|
|
- n=3, tstart=self._make_timestamp(self.ts_pre_fallback,
|
|
- hrs_pre, tz),
|
|
- expected_utc_offset=hrs_post)
|
|
+
|
|
+ if dateutil.__version__ != LooseVersion('2.6.0'):
|
|
+ # buggy ambiguous behavior in 2.6.0
|
|
+ # GH 14621
|
|
+ # https://github.com/dateutil/dateutil/issues/321
|
|
+ self._test_all_offsets(
|
|
+ n=3, tstart=self._make_timestamp(self.ts_pre_fallback,
|
|
+ hrs_pre, tz),
|
|
+ expected_utc_offset=hrs_post)
|
|
|
|
def test_springforward_plural(self):
|
|
- """test moving from standard to daylight savings"""
|
|
+ # test moving from standard to daylight savings
|
|
for tz, utc_offsets in self.timezone_utc_offsets.items():
|
|
hrs_pre = utc_offsets['utc_offset_standard']
|
|
hrs_post = utc_offsets['utc_offset_daylight']
|
|
diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py
|
|
index 00e8ee631..db8cda5c7 100644
|
|
--- a/pandas/tseries/tests/test_timezones.py
|
|
+++ b/pandas/tseries/tests/test_timezones.py
|
|
@@ -4,7 +4,7 @@ import nose
|
|
|
|
import numpy as np
|
|
import pytz
|
|
-
|
|
+from distutils.version import LooseVersion
|
|
from pandas.types.dtypes import DatetimeTZDtype
|
|
from pandas import (Index, Series, DataFrame, isnull, Timestamp)
|
|
|
|
@@ -518,8 +518,12 @@ class TestTimeZoneSupportPytz(tm.TestCase):
|
|
|
|
times = date_range("2013-10-26 23:00", "2013-10-27 01:00", freq="H",
|
|
tz=tz, ambiguous='infer')
|
|
- self.assertEqual(times[0], Timestamp('2013-10-26 23:00', tz=tz))
|
|
- self.assertEqual(times[-1], Timestamp('2013-10-27 01:00', tz=tz))
|
|
+ self.assertEqual(times[0], Timestamp('2013-10-26 23:00', tz=tz,
|
|
+ freq="H"))
|
|
+ if dateutil.__version__ != LooseVersion('2.6.0'):
|
|
+ # GH 14621
|
|
+ self.assertEqual(times[-1], Timestamp('2013-10-27 01:00', tz=tz,
|
|
+ freq="H"))
|
|
|
|
def test_ambiguous_nat(self):
|
|
tz = self.tz('US/Eastern')
|
|
@@ -1163,6 +1167,85 @@ class TestTimeZones(tm.TestCase):
|
|
def setUp(self):
|
|
tm._skip_if_no_pytz()
|
|
|
|
+ def test_replace(self):
|
|
+ # GH 14621
|
|
+ # GH 7825
|
|
+ # replacing datetime components with and w/o presence of a timezone
|
|
+ dt = Timestamp('2016-01-01 09:00:00')
|
|
+ result = dt.replace(hour=0)
|
|
+ expected = Timestamp('2016-01-01 00:00:00')
|
|
+ self.assertEqual(result, expected)
|
|
+
|
|
+ for tz in self.timezones:
|
|
+ dt = Timestamp('2016-01-01 09:00:00', tz=tz)
|
|
+ result = dt.replace(hour=0)
|
|
+ expected = Timestamp('2016-01-01 00:00:00', tz=tz)
|
|
+ self.assertEqual(result, expected)
|
|
+
|
|
+ # we preserve nanoseconds
|
|
+ dt = Timestamp('2016-01-01 09:00:00.000000123', tz=tz)
|
|
+ result = dt.replace(hour=0)
|
|
+ expected = Timestamp('2016-01-01 00:00:00.000000123', tz=tz)
|
|
+ self.assertEqual(result, expected)
|
|
+
|
|
+ # test all
|
|
+ dt = Timestamp('2016-01-01 09:00:00.000000123', tz=tz)
|
|
+ result = dt.replace(year=2015, month=2, day=2, hour=0, minute=5,
|
|
+ second=5, microsecond=5, nanosecond=5)
|
|
+ expected = Timestamp('2015-02-02 00:05:05.000005005', tz=tz)
|
|
+ self.assertEqual(result, expected)
|
|
+
|
|
+ # error
|
|
+ def f():
|
|
+ dt.replace(foo=5)
|
|
+ self.assertRaises(ValueError, f)
|
|
+
|
|
+ def f():
|
|
+ dt.replace(hour=0.1)
|
|
+ self.assertRaises(ValueError, f)
|
|
+
|
|
+ # assert conversion to naive is the same as replacing tzinfo with None
|
|
+ dt = Timestamp('2013-11-03 01:59:59.999999-0400', tz='US/Eastern')
|
|
+ self.assertEqual(dt.tz_localize(None), dt.replace(tzinfo=None))
|
|
+
|
|
+ def test_ambiguous_compat(self):
|
|
+ # validate that pytz and dateutil are compat for dst
|
|
+ # when the transition happens
|
|
+ tm._skip_if_no_dateutil()
|
|
+ tm._skip_if_no_pytz()
|
|
+
|
|
+ pytz_zone = 'Europe/London'
|
|
+ dateutil_zone = 'dateutil/Europe/London'
|
|
+ result_pytz = (Timestamp('2013-10-27 01:00:00')
|
|
+ .tz_localize(pytz_zone, ambiguous=0))
|
|
+ result_dateutil = (Timestamp('2013-10-27 01:00:00')
|
|
+ .tz_localize(dateutil_zone, ambiguous=0))
|
|
+ self.assertEqual(result_pytz.value, result_dateutil.value)
|
|
+ self.assertEqual(result_pytz.value, 1382835600000000000)
|
|
+
|
|
+ # dateutil 2.6 buggy w.r.t. ambiguous=0
|
|
+ if dateutil.__version__ != LooseVersion('2.6.0'):
|
|
+ # GH 14621
|
|
+ # https://github.com/dateutil/dateutil/issues/321
|
|
+ self.assertEqual(result_pytz.to_pydatetime().tzname(),
|
|
+ result_dateutil.to_pydatetime().tzname())
|
|
+ self.assertEqual(str(result_pytz), str(result_dateutil))
|
|
+
|
|
+ # 1 hour difference
|
|
+ result_pytz = (Timestamp('2013-10-27 01:00:00')
|
|
+ .tz_localize(pytz_zone, ambiguous=1))
|
|
+ result_dateutil = (Timestamp('2013-10-27 01:00:00')
|
|
+ .tz_localize(dateutil_zone, ambiguous=1))
|
|
+ self.assertEqual(result_pytz.value, result_dateutil.value)
|
|
+ self.assertEqual(result_pytz.value, 1382832000000000000)
|
|
+
|
|
+ # dateutil < 2.6 is buggy w.r.t. ambiguous timezones
|
|
+ if dateutil.__version__ > LooseVersion('2.5.3'):
|
|
+ # GH 14621
|
|
+ self.assertEqual(str(result_pytz), str(result_dateutil))
|
|
+ self.assertEqual(result_pytz.to_pydatetime().tzname(),
|
|
+ result_dateutil.to_pydatetime().tzname())
|
|
+
|
|
def test_index_equals_with_tz(self):
|
|
left = date_range('1/1/2011', periods=100, freq='H', tz='utc')
|
|
right = date_range('1/1/2011', periods=100, freq='H', tz='US/Eastern')
|
|
diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py
|
|
index 21cfe84f1..b45f867be 100644
|
|
--- a/pandas/tseries/tests/test_tslib.py
|
|
+++ b/pandas/tseries/tests/test_tslib.py
|
|
@@ -327,8 +327,9 @@ class TestTimestamp(tm.TestCase):
|
|
|
|
# dateutil zone change (only matters for repr)
|
|
import dateutil
|
|
- if dateutil.__version__ >= LooseVersion(
|
|
- '2.3') and dateutil.__version__ <= LooseVersion('2.4.0'):
|
|
+ if (dateutil.__version__ >= LooseVersion('2.3') and
|
|
+ (dateutil.__version__ <= LooseVersion('2.4.0') or
|
|
+ dateutil.__version__ >= LooseVersion('2.6.0'))):
|
|
timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern',
|
|
'dateutil/US/Pacific']
|
|
else:
|
|
diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx
|
|
index d4eaaa0b5..685de214c 100644
|
|
--- a/pandas/tslib.pyx
|
|
+++ b/pandas/tslib.pyx
|
|
@@ -98,6 +98,7 @@ except NameError: # py3
|
|
cdef inline object create_timestamp_from_ts(
|
|
int64_t value, pandas_datetimestruct dts,
|
|
object tz, object freq):
|
|
+ """ convenience routine to construct a Timestamp from its parts """
|
|
cdef _Timestamp ts_base
|
|
ts_base = _Timestamp.__new__(Timestamp, dts.year, dts.month,
|
|
dts.day, dts.hour, dts.min,
|
|
@@ -112,6 +113,7 @@ cdef inline object create_timestamp_from_ts(
|
|
cdef inline object create_datetime_from_ts(
|
|
int64_t value, pandas_datetimestruct dts,
|
|
object tz, object freq):
|
|
+ """ convenience routine to construct a datetime.datetime from its parts """
|
|
return datetime(dts.year, dts.month, dts.day, dts.hour,
|
|
dts.min, dts.sec, dts.us, tz)
|
|
|
|
@@ -378,7 +380,6 @@ class Timestamp(_Timestamp):
|
|
# Mixing pydatetime positional and keyword arguments is forbidden!
|
|
|
|
cdef _TSObject ts
|
|
- cdef _Timestamp ts_base
|
|
|
|
if offset is not None:
|
|
# deprecate offset kwd in 0.19.0, GH13593
|
|
@@ -412,17 +413,7 @@ class Timestamp(_Timestamp):
|
|
from pandas.tseries.frequencies import to_offset
|
|
freq = to_offset(freq)
|
|
|
|
- # make datetime happy
|
|
- ts_base = _Timestamp.__new__(cls, ts.dts.year, ts.dts.month,
|
|
- ts.dts.day, ts.dts.hour, ts.dts.min,
|
|
- ts.dts.sec, ts.dts.us, ts.tzinfo)
|
|
-
|
|
- # fill out rest of data
|
|
- ts_base.value = ts.value
|
|
- ts_base.freq = freq
|
|
- ts_base.nanosecond = ts.dts.ps / 1000
|
|
-
|
|
- return ts_base
|
|
+ return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq)
|
|
|
|
def _round(self, freq, rounder):
|
|
|
|
@@ -660,8 +651,80 @@ class Timestamp(_Timestamp):
|
|
astimezone = tz_convert
|
|
|
|
def replace(self, **kwds):
|
|
- return Timestamp(datetime.replace(self, **kwds),
|
|
- freq=self.freq)
|
|
+ """
|
|
+ implements datetime.replace, handles nanoseconds
|
|
+
|
|
+ Parameters
|
|
+ ----------
|
|
+ kwargs: key-value dict
|
|
+
|
|
+ accepted keywords are:
|
|
+ year, month, day, hour, minute, second, microsecond, nanosecond, tzinfo
|
|
+
|
|
+ values must be integer, or for tzinfo, a tz-convertible
|
|
+
|
|
+ Returns
|
|
+ -------
|
|
+ Timestamp with fields replaced
|
|
+ """
|
|
+
|
|
+ cdef:
|
|
+ pandas_datetimestruct dts
|
|
+ int64_t value
|
|
+ object tzinfo, result, k, v
|
|
+ _TSObject ts
|
|
+
|
|
+ # set to naive if needed
|
|
+ tzinfo = self.tzinfo
|
|
+ value = self.value
|
|
+ if tzinfo is not None:
|
|
+ value = tz_convert_single(value, 'UTC', tzinfo)
|
|
+
|
|
+ # setup components
|
|
+ pandas_datetime_to_datetimestruct(value, PANDAS_FR_ns, &dts)
|
|
+ dts.ps = self.nanosecond * 1000
|
|
+
|
|
+ # replace
|
|
+ def validate(k, v):
|
|
+ """ validate integers """
|
|
+ if not isinstance(v, int):
|
|
+ raise ValueError("value must be an integer, received {v} for {k}".format(v=type(v), k=k))
|
|
+ return v
|
|
+
|
|
+ for k, v in kwds.items():
|
|
+ if k == 'year':
|
|
+ dts.year = validate(k, v)
|
|
+ elif k == 'month':
|
|
+ dts.month = validate(k, v)
|
|
+ elif k == 'day':
|
|
+ dts.day = validate(k, v)
|
|
+ elif k == 'hour':
|
|
+ dts.hour = validate(k, v)
|
|
+ elif k == 'minute':
|
|
+ dts.min = validate(k, v)
|
|
+ elif k == 'second':
|
|
+ dts.sec = validate(k, v)
|
|
+ elif k == 'microsecond':
|
|
+ dts.us = validate(k, v)
|
|
+ elif k == 'nanosecond':
|
|
+ dts.ps = validate(k, v) * 1000
|
|
+ elif k == 'tzinfo':
|
|
+ tzinfo = v
|
|
+ else:
|
|
+ raise ValueError("invalid name {} passed".format(k))
|
|
+
|
|
+ # reconstruct & check bounds
|
|
+ value = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
|
|
+ if value != NPY_NAT:
|
|
+ _check_dts_bounds(&dts)
|
|
+
|
|
+ # set tz if needed
|
|
+ if tzinfo is not None:
|
|
+ value = tz_convert_single(value, tzinfo, 'UTC')
|
|
+
|
|
+ result = create_timestamp_from_ts(value, dts, tzinfo, self.freq)
|
|
+
|
|
+ return result
|
|
|
|
def isoformat(self, sep='T'):
|
|
base = super(_Timestamp, self).isoformat(sep=sep)
|
|
@@ -5041,7 +5104,9 @@ cpdef normalize_date(object dt):
|
|
-------
|
|
normalized : datetime.datetime or Timestamp
|
|
"""
|
|
- if PyDateTime_Check(dt):
|
|
+ if is_timestamp(dt):
|
|
+ return dt.replace(hour=0, minute=0, second=0, microsecond=0, nanosecond=0)
|
|
+ elif PyDateTime_Check(dt):
|
|
return dt.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
elif PyDate_Check(dt):
|
|
return datetime(dt.year, dt.month, dt.day)
|
|
--
|
|
2.11.0
|
|
|