time_series¶
In [1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
matplotlib.style.use("ggplot")
In [2]:
dates = pd.date_range(start="2016-03-01", end="2016-03-31")
dates, len(dates)
Out[2]:
(DatetimeIndex(['2016-03-01', '2016-03-02', '2016-03-03', '2016-03-04',
'2016-03-05', '2016-03-06', '2016-03-07', '2016-03-08',
'2016-03-09', '2016-03-10', '2016-03-11', '2016-03-12',
'2016-03-13', '2016-03-14', '2016-03-15', '2016-03-16',
'2016-03-17', '2016-03-18', '2016-03-19', '2016-03-20',
'2016-03-21', '2016-03-22', '2016-03-23', '2016-03-24',
'2016-03-25', '2016-03-26', '2016-03-27', '2016-03-28',
'2016-03-29', '2016-03-30', '2016-03-31'],
dtype='datetime64[ns]', freq='D'), 31)
In [3]:
# b is business
bdates = pd.bdate_range(start="2016-03-01", end="2016-03-31")
bdates, len(bdates)
Out[3]:
(DatetimeIndex(['2016-03-01', '2016-03-02', '2016-03-03', '2016-03-04',
'2016-03-07', '2016-03-08', '2016-03-09', '2016-03-10',
'2016-03-11', '2016-03-14', '2016-03-15', '2016-03-16',
'2016-03-17', '2016-03-18', '2016-03-21', '2016-03-22',
'2016-03-23', '2016-03-24', '2016-03-25', '2016-03-28',
'2016-03-29', '2016-03-30', '2016-03-31'],
dtype='datetime64[ns]', freq='B'), 23)
In [4]:
pd.timedelta_range(start="1 days", end="31 days")
Out[4]:
TimedeltaIndex([ '1 days', '2 days', '3 days', '4 days', '5 days',
'6 days', '7 days', '8 days', '9 days', '10 days',
'11 days', '12 days', '13 days', '14 days', '15 days',
'16 days', '17 days', '18 days', '19 days', '20 days',
'21 days', '22 days', '23 days', '24 days', '25 days',
'26 days', '27 days', '28 days', '29 days', '30 days',
'31 days'],
dtype='timedelta64[ns]', freq='D')
In [5]:
pd.period_range
Out[5]:
<function pandas.tseries.period.period_range>
In [6]:
pindex = pd.period_range(start="2016-03-01", end="2016-03-31")
pindex
Out[6]:
PeriodIndex(['2016-03-01', '2016-03-02', '2016-03-03', '2016-03-04',
'2016-03-05', '2016-03-06', '2016-03-07', '2016-03-08',
'2016-03-09', '2016-03-10', '2016-03-11', '2016-03-12',
'2016-03-13', '2016-03-14', '2016-03-15', '2016-03-16',
'2016-03-17', '2016-03-18', '2016-03-19', '2016-03-20',
'2016-03-21', '2016-03-22', '2016-03-23', '2016-03-24',
'2016-03-25', '2016-03-26', '2016-03-27', '2016-03-28',
'2016-03-29', '2016-03-30', '2016-03-31'],
dtype='int64', freq='D')
In [7]:
pindex[0], type(pindex[0])
Out[7]:
(Period('2016-03-01', 'D'), pandas._period.Period)
In [53]:
pd.Period, pd.DataFrame
Out[53]:
(pandas._period.Period, pandas.core.frame.DataFrame)
In [9]:
# http://docs.python.jp/3/library/io.html
# http://stackoverflow.com/questions/4330812/how-do-i-clear-a-stringio-object
import sys
import io
from contextlib import contextmanager
@contextmanager
def capture():
_stdout = sys.stdout
with io.StringIO() as strio:
try:
sys.stdout = strio
yield strio
finally:
sys.stdout = _stdout
In [33]:
import pandas as pd
with capture() as c:
print(c, c.closed)
c.flush()
c_log1 = c.getvalue()
c.truncate()
c_log2 = c.getvalue()
c.truncate(0)
help(pd.Period)
help_doc = c.getvalue()
print("output")
print(c_log1.strip())
print(c_log2.strip())
print("\n".join(help_doc.splitlines()[:20]))
print(c.closed)
output
<_io.StringIO object at 0x11721fdc8> False
<_io.StringIO object at 0x11721fdc8> False
Help on class Period in module pandas._period:
class Period(builtins.object)
| Represents an period of time
|
| Parameters
| ----------
| value : Period or compat.string_types, default None
| The time period represented (e.g., '4Q2005')
| freq : str, default None
| One of pandas period strings or corresponding objects
| year : int, default None
| month : int, default 1
| quarter : int, default None
| day : int, default 1
| hour : int, default 0
| minute : int, default 0
| second : int, default 0
|
| Methods defined here:
True
In [38]:
pd.Period?
In [42]:
period = pd.Period(year=2016, month=3, day=6, freq="D")
period
Out[42]:
Period('2016-03-06', 'D')
In [41]:
pd.Period("2016-03-06")
Out[41]:
Period('2016-03-06', 'D')
In [43]:
period.start_time, period.end_time
Out[43]:
(Timestamp('2016-03-06 00:00:00'), Timestamp('2016-03-06 23:59:59.999999999'))
In [56]:
import datetime
dt_start = datetime.datetime(2016, 3, 6)
dt_end = datetime.datetime(2016, 3, 6, 23, 59, 59)
[
period.start_time < dt_start <= period.end_time,
period.start_time <= dt_start <= period.end_time,
period.start_time <= dt_end <= period.end_time,
period.start_time <= dt_end < period.end_time,
]
Out[56]:
[False, True, True, True]
In [49]:
date_range = pd.date_range("2016-03-01", periods=31)
end_exclude = date_range.shift(1)
end_exclude
Out[49]:
DatetimeIndex(['2016-03-02', '2016-03-03', '2016-03-04', '2016-03-05',
'2016-03-06', '2016-03-07', '2016-03-08', '2016-03-09',
'2016-03-10', '2016-03-11', '2016-03-12', '2016-03-13',
'2016-03-14', '2016-03-15', '2016-03-16', '2016-03-17',
'2016-03-18', '2016-03-19', '2016-03-20', '2016-03-21',
'2016-03-22', '2016-03-23', '2016-03-24', '2016-03-25',
'2016-03-26', '2016-03-27', '2016-03-28', '2016-03-29',
'2016-03-30', '2016-03-31', '2016-04-01'],
dtype='datetime64[ns]', freq='D')
In [52]:
date_df = pd.DataFrame({
"start_date": date_range,
"end_exclude": end_exclude
})
date_df.tail()
Out[52]:
end_exclude | start_date | |
---|---|---|
26 | 2016-03-28 | 2016-03-27 |
27 | 2016-03-29 | 2016-03-28 |
28 | 2016-03-30 | 2016-03-29 |
29 | 2016-03-31 | 2016-03-30 |
30 | 2016-04-01 | 2016-03-31 |