You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
gentoo-overlay/dev-python/pandas/files/pandas-0.19.1-seqf.patch

358 lines
14 KiB

From f8bd08e9c2fc6365980f41b846bbae4b40f08b83 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 12 Nov 2016 10:58:54 -0500
Subject: [PATCH] BUG: segfault manifesting with dateutil=2.6 w.r.t. replace
when timezones are present
closes #14621
Author: Jeff Reback <jeff@reback.net>
Closes #14631 from jreback/replace and squashes the following commits:
3f95042 [Jeff Reback] BUG: segfault manifesting with dateutil=2.6 w.r.t. replace when timezones are present
---
ci/requirements-3.5_OSX.pip | 2 +-
doc/source/whatsnew/v0.19.2.txt | 3 ++
pandas/tseries/offsets.py | 1 +
pandas/tseries/tests/test_offsets.py | 20 ++++---
pandas/tseries/tests/test_timezones.py | 89 +++++++++++++++++++++++++++++--
pandas/tseries/tests/test_tslib.py | 5 +-
pandas/tslib.pyx | 95 ++++++++++++++++++++++++++++------
7 files changed, 188 insertions(+), 27 deletions(-)
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index 051cc8aa4..2e3852a7e 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -68,6 +68,7 @@ def apply_wraps(func):
other = other.tz_localize(None)
result = func(self, other)
+
if self._adjust_dst:
result = tslib._localize_pydatetime(result, tz)
diff --git a/pandas/tseries/tests/test_offsets.py b/pandas/tseries/tests/test_offsets.py
index 1735ac4e2..768e9212e 100644
--- a/pandas/tseries/tests/test_offsets.py
+++ b/pandas/tseries/tests/test_offsets.py
@@ -1,4 +1,5 @@
import os
+from distutils.version import LooseVersion
from datetime import date, datetime, timedelta
from dateutil.relativedelta import relativedelta
from pandas.compat import range, iteritems
@@ -4851,6 +4852,7 @@ class TestDST(tm.TestCase):
def _test_offset(self, offset_name, offset_n, tstart, expected_utc_offset):
offset = DateOffset(**{offset_name: offset_n})
+
t = tstart + offset
if expected_utc_offset is not None:
self.assertTrue(get_utc_offset_hours(t) == expected_utc_offset)
@@ -4890,17 +4892,23 @@ class TestDST(tm.TestCase):
return Timestamp(string + offset_string).tz_convert(tz)
def test_fallback_plural(self):
- """test moving from daylight savings to standard time"""
+ # test moving from daylight savings to standard time
+ import dateutil
for tz, utc_offsets in self.timezone_utc_offsets.items():
hrs_pre = utc_offsets['utc_offset_daylight']
hrs_post = utc_offsets['utc_offset_standard']
- self._test_all_offsets(
- n=3, tstart=self._make_timestamp(self.ts_pre_fallback,
- hrs_pre, tz),
- expected_utc_offset=hrs_post)
+
+ if dateutil.__version__ != LooseVersion('2.6.0'):
+ # buggy ambiguous behavior in 2.6.0
+ # GH 14621
+ # https://github.com/dateutil/dateutil/issues/321
+ self._test_all_offsets(
+ n=3, tstart=self._make_timestamp(self.ts_pre_fallback,
+ hrs_pre, tz),
+ expected_utc_offset=hrs_post)
def test_springforward_plural(self):
- """test moving from standard to daylight savings"""
+ # test moving from standard to daylight savings
for tz, utc_offsets in self.timezone_utc_offsets.items():
hrs_pre = utc_offsets['utc_offset_standard']
hrs_post = utc_offsets['utc_offset_daylight']
diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py
index 00e8ee631..db8cda5c7 100644
--- a/pandas/tseries/tests/test_timezones.py
+++ b/pandas/tseries/tests/test_timezones.py
@@ -4,7 +4,7 @@ import nose
import numpy as np
import pytz
-
+from distutils.version import LooseVersion
from pandas.types.dtypes import DatetimeTZDtype
from pandas import (Index, Series, DataFrame, isnull, Timestamp)
@@ -518,8 +518,12 @@ class TestTimeZoneSupportPytz(tm.TestCase):
times = date_range("2013-10-26 23:00", "2013-10-27 01:00", freq="H",
tz=tz, ambiguous='infer')
- self.assertEqual(times[0], Timestamp('2013-10-26 23:00', tz=tz))
- self.assertEqual(times[-1], Timestamp('2013-10-27 01:00', tz=tz))
+ self.assertEqual(times[0], Timestamp('2013-10-26 23:00', tz=tz,
+ freq="H"))
+ if dateutil.__version__ != LooseVersion('2.6.0'):
+ # GH 14621
+ self.assertEqual(times[-1], Timestamp('2013-10-27 01:00', tz=tz,
+ freq="H"))
def test_ambiguous_nat(self):
tz = self.tz('US/Eastern')
@@ -1163,6 +1167,85 @@ class TestTimeZones(tm.TestCase):
def setUp(self):
tm._skip_if_no_pytz()
+ def test_replace(self):
+ # GH 14621
+ # GH 7825
+ # replacing datetime components with and w/o presence of a timezone
+ dt = Timestamp('2016-01-01 09:00:00')
+ result = dt.replace(hour=0)
+ expected = Timestamp('2016-01-01 00:00:00')
+ self.assertEqual(result, expected)
+
+ for tz in self.timezones:
+ dt = Timestamp('2016-01-01 09:00:00', tz=tz)
+ result = dt.replace(hour=0)
+ expected = Timestamp('2016-01-01 00:00:00', tz=tz)
+ self.assertEqual(result, expected)
+
+ # we preserve nanoseconds
+ dt = Timestamp('2016-01-01 09:00:00.000000123', tz=tz)
+ result = dt.replace(hour=0)
+ expected = Timestamp('2016-01-01 00:00:00.000000123', tz=tz)
+ self.assertEqual(result, expected)
+
+ # test all
+ dt = Timestamp('2016-01-01 09:00:00.000000123', tz=tz)
+ result = dt.replace(year=2015, month=2, day=2, hour=0, minute=5,
+ second=5, microsecond=5, nanosecond=5)
+ expected = Timestamp('2015-02-02 00:05:05.000005005', tz=tz)
+ self.assertEqual(result, expected)
+
+ # error
+ def f():
+ dt.replace(foo=5)
+ self.assertRaises(ValueError, f)
+
+ def f():
+ dt.replace(hour=0.1)
+ self.assertRaises(ValueError, f)
+
+ # assert conversion to naive is the same as replacing tzinfo with None
+ dt = Timestamp('2013-11-03 01:59:59.999999-0400', tz='US/Eastern')
+ self.assertEqual(dt.tz_localize(None), dt.replace(tzinfo=None))
+
+ def test_ambiguous_compat(self):
+ # validate that pytz and dateutil are compat for dst
+ # when the transition happens
+ tm._skip_if_no_dateutil()
+ tm._skip_if_no_pytz()
+
+ pytz_zone = 'Europe/London'
+ dateutil_zone = 'dateutil/Europe/London'
+ result_pytz = (Timestamp('2013-10-27 01:00:00')
+ .tz_localize(pytz_zone, ambiguous=0))
+ result_dateutil = (Timestamp('2013-10-27 01:00:00')
+ .tz_localize(dateutil_zone, ambiguous=0))
+ self.assertEqual(result_pytz.value, result_dateutil.value)
+ self.assertEqual(result_pytz.value, 1382835600000000000)
+
+ # dateutil 2.6 buggy w.r.t. ambiguous=0
+ if dateutil.__version__ != LooseVersion('2.6.0'):
+ # GH 14621
+ # https://github.com/dateutil/dateutil/issues/321
+ self.assertEqual(result_pytz.to_pydatetime().tzname(),
+ result_dateutil.to_pydatetime().tzname())
+ self.assertEqual(str(result_pytz), str(result_dateutil))
+
+ # 1 hour difference
+ result_pytz = (Timestamp('2013-10-27 01:00:00')
+ .tz_localize(pytz_zone, ambiguous=1))
+ result_dateutil = (Timestamp('2013-10-27 01:00:00')
+ .tz_localize(dateutil_zone, ambiguous=1))
+ self.assertEqual(result_pytz.value, result_dateutil.value)
+ self.assertEqual(result_pytz.value, 1382832000000000000)
+
+ # dateutil < 2.6 is buggy w.r.t. ambiguous timezones
+ if dateutil.__version__ > LooseVersion('2.5.3'):
+ # GH 14621
+ self.assertEqual(str(result_pytz), str(result_dateutil))
+ self.assertEqual(result_pytz.to_pydatetime().tzname(),
+ result_dateutil.to_pydatetime().tzname())
+
def test_index_equals_with_tz(self):
left = date_range('1/1/2011', periods=100, freq='H', tz='utc')
right = date_range('1/1/2011', periods=100, freq='H', tz='US/Eastern')
diff --git a/pandas/tseries/tests/test_tslib.py b/pandas/tseries/tests/test_tslib.py
index 21cfe84f1..b45f867be 100644
--- a/pandas/tseries/tests/test_tslib.py
+++ b/pandas/tseries/tests/test_tslib.py
@@ -327,8 +327,9 @@ class TestTimestamp(tm.TestCase):
# dateutil zone change (only matters for repr)
import dateutil
- if dateutil.__version__ >= LooseVersion(
- '2.3') and dateutil.__version__ <= LooseVersion('2.4.0'):
+ if (dateutil.__version__ >= LooseVersion('2.3') and
+ (dateutil.__version__ <= LooseVersion('2.4.0') or
+ dateutil.__version__ >= LooseVersion('2.6.0'))):
timezones = ['UTC', 'Asia/Tokyo', 'US/Eastern',
'dateutil/US/Pacific']
else:
diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx
index d4eaaa0b5..685de214c 100644
--- a/pandas/tslib.pyx
+++ b/pandas/tslib.pyx
@@ -98,6 +98,7 @@ except NameError: # py3
cdef inline object create_timestamp_from_ts(
int64_t value, pandas_datetimestruct dts,
object tz, object freq):
+ """ convenience routine to construct a Timestamp from its parts """
cdef _Timestamp ts_base
ts_base = _Timestamp.__new__(Timestamp, dts.year, dts.month,
dts.day, dts.hour, dts.min,
@@ -112,6 +113,7 @@ cdef inline object create_timestamp_from_ts(
cdef inline object create_datetime_from_ts(
int64_t value, pandas_datetimestruct dts,
object tz, object freq):
+ """ convenience routine to construct a datetime.datetime from its parts """
return datetime(dts.year, dts.month, dts.day, dts.hour,
dts.min, dts.sec, dts.us, tz)
@@ -378,7 +380,6 @@ class Timestamp(_Timestamp):
# Mixing pydatetime positional and keyword arguments is forbidden!
cdef _TSObject ts
- cdef _Timestamp ts_base
if offset is not None:
# deprecate offset kwd in 0.19.0, GH13593
@@ -412,17 +413,7 @@ class Timestamp(_Timestamp):
from pandas.tseries.frequencies import to_offset
freq = to_offset(freq)
- # make datetime happy
- ts_base = _Timestamp.__new__(cls, ts.dts.year, ts.dts.month,
- ts.dts.day, ts.dts.hour, ts.dts.min,
- ts.dts.sec, ts.dts.us, ts.tzinfo)
-
- # fill out rest of data
- ts_base.value = ts.value
- ts_base.freq = freq
- ts_base.nanosecond = ts.dts.ps / 1000
-
- return ts_base
+ return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq)
def _round(self, freq, rounder):
@@ -660,8 +651,80 @@ class Timestamp(_Timestamp):
astimezone = tz_convert
def replace(self, **kwds):
- return Timestamp(datetime.replace(self, **kwds),
- freq=self.freq)
+ """
+ implements datetime.replace, handles nanoseconds
+
+ Parameters
+ ----------
+ kwargs: key-value dict
+
+ accepted keywords are:
+ year, month, day, hour, minute, second, microsecond, nanosecond, tzinfo
+
+ values must be integer, or for tzinfo, a tz-convertible
+
+ Returns
+ -------
+ Timestamp with fields replaced
+ """
+
+ cdef:
+ pandas_datetimestruct dts
+ int64_t value
+ object tzinfo, result, k, v
+ _TSObject ts
+
+ # set to naive if needed
+ tzinfo = self.tzinfo
+ value = self.value
+ if tzinfo is not None:
+ value = tz_convert_single(value, 'UTC', tzinfo)
+
+ # setup components
+ pandas_datetime_to_datetimestruct(value, PANDAS_FR_ns, &dts)
+ dts.ps = self.nanosecond * 1000
+
+ # replace
+ def validate(k, v):
+ """ validate integers """
+ if not isinstance(v, int):
+ raise ValueError("value must be an integer, received {v} for {k}".format(v=type(v), k=k))
+ return v
+
+ for k, v in kwds.items():
+ if k == 'year':
+ dts.year = validate(k, v)
+ elif k == 'month':
+ dts.month = validate(k, v)
+ elif k == 'day':
+ dts.day = validate(k, v)
+ elif k == 'hour':
+ dts.hour = validate(k, v)
+ elif k == 'minute':
+ dts.min = validate(k, v)
+ elif k == 'second':
+ dts.sec = validate(k, v)
+ elif k == 'microsecond':
+ dts.us = validate(k, v)
+ elif k == 'nanosecond':
+ dts.ps = validate(k, v) * 1000
+ elif k == 'tzinfo':
+ tzinfo = v
+ else:
+ raise ValueError("invalid name {} passed".format(k))
+
+ # reconstruct & check bounds
+ value = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
+ if value != NPY_NAT:
+ _check_dts_bounds(&dts)
+
+ # set tz if needed
+ if tzinfo is not None:
+ value = tz_convert_single(value, tzinfo, 'UTC')
+
+ result = create_timestamp_from_ts(value, dts, tzinfo, self.freq)
+
+ return result
def isoformat(self, sep='T'):
base = super(_Timestamp, self).isoformat(sep=sep)
@@ -5041,7 +5104,9 @@ cpdef normalize_date(object dt):
-------
normalized : datetime.datetime or Timestamp
"""
- if PyDateTime_Check(dt):
+ if is_timestamp(dt):
+ return dt.replace(hour=0, minute=0, second=0, microsecond=0, nanosecond=0)
+ elif PyDateTime_Check(dt):
return dt.replace(hour=0, minute=0, second=0, microsecond=0)
elif PyDate_Check(dt):
return datetime(dt.year, dt.month, dt.day)
--
2.11.0