time_series

In [1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

%matplotlib inline
matplotlib.style.use("ggplot")
In [2]:
dates = pd.date_range(start="2016-03-01", end="2016-03-31")
dates, len(dates)
Out[2]:
(DatetimeIndex(['2016-03-01', '2016-03-02', '2016-03-03', '2016-03-04',
                '2016-03-05', '2016-03-06', '2016-03-07', '2016-03-08',
                '2016-03-09', '2016-03-10', '2016-03-11', '2016-03-12',
                '2016-03-13', '2016-03-14', '2016-03-15', '2016-03-16',
                '2016-03-17', '2016-03-18', '2016-03-19', '2016-03-20',
                '2016-03-21', '2016-03-22', '2016-03-23', '2016-03-24',
                '2016-03-25', '2016-03-26', '2016-03-27', '2016-03-28',
                '2016-03-29', '2016-03-30', '2016-03-31'],
               dtype='datetime64[ns]', freq='D'), 31)
In [3]:
# b is business
bdates = pd.bdate_range(start="2016-03-01", end="2016-03-31")
bdates, len(bdates)
Out[3]:
(DatetimeIndex(['2016-03-01', '2016-03-02', '2016-03-03', '2016-03-04',
                '2016-03-07', '2016-03-08', '2016-03-09', '2016-03-10',
                '2016-03-11', '2016-03-14', '2016-03-15', '2016-03-16',
                '2016-03-17', '2016-03-18', '2016-03-21', '2016-03-22',
                '2016-03-23', '2016-03-24', '2016-03-25', '2016-03-28',
                '2016-03-29', '2016-03-30', '2016-03-31'],
               dtype='datetime64[ns]', freq='B'), 23)
In [4]:
pd.timedelta_range(start="1 days", end="31 days")
Out[4]:
TimedeltaIndex([ '1 days',  '2 days',  '3 days',  '4 days',  '5 days',
                 '6 days',  '7 days',  '8 days',  '9 days', '10 days',
                '11 days', '12 days', '13 days', '14 days', '15 days',
                '16 days', '17 days', '18 days', '19 days', '20 days',
                '21 days', '22 days', '23 days', '24 days', '25 days',
                '26 days', '27 days', '28 days', '29 days', '30 days',
                '31 days'],
               dtype='timedelta64[ns]', freq='D')
In [5]:
pd.period_range
Out[5]:
<function pandas.tseries.period.period_range>
In [6]:
pindex = pd.period_range(start="2016-03-01", end="2016-03-31")
pindex
Out[6]:
PeriodIndex(['2016-03-01', '2016-03-02', '2016-03-03', '2016-03-04',
             '2016-03-05', '2016-03-06', '2016-03-07', '2016-03-08',
             '2016-03-09', '2016-03-10', '2016-03-11', '2016-03-12',
             '2016-03-13', '2016-03-14', '2016-03-15', '2016-03-16',
             '2016-03-17', '2016-03-18', '2016-03-19', '2016-03-20',
             '2016-03-21', '2016-03-22', '2016-03-23', '2016-03-24',
             '2016-03-25', '2016-03-26', '2016-03-27', '2016-03-28',
             '2016-03-29', '2016-03-30', '2016-03-31'],
            dtype='int64', freq='D')
In [7]:
pindex[0], type(pindex[0])
Out[7]:
(Period('2016-03-01', 'D'), pandas._period.Period)
In [53]:
pd.Period, pd.DataFrame
Out[53]:
(pandas._period.Period, pandas.core.frame.DataFrame)
In [9]:
# http://docs.python.jp/3/library/io.html
# http://stackoverflow.com/questions/4330812/how-do-i-clear-a-stringio-object
import sys
import io
from contextlib import contextmanager


@contextmanager
def capture():
    _stdout = sys.stdout
    with io.StringIO() as strio:
        try:
            sys.stdout = strio
            yield strio
        finally:
            sys.stdout = _stdout
In [33]:
import pandas as pd

with capture() as c:
    print(c, c.closed)
    c.flush()
    c_log1 = c.getvalue()
    c.truncate()
    c_log2 = c.getvalue()
    c.truncate(0)
    help(pd.Period)
    help_doc = c.getvalue()

print("output")
print(c_log1.strip())
print(c_log2.strip())
print("\n".join(help_doc.splitlines()[:20]))
print(c.closed)

output
<_io.StringIO object at 0x11721fdc8> False
<_io.StringIO object at 0x11721fdc8> False
Help on class Period in module pandas._period:

class Period(builtins.object)
 |  Represents an period of time
 |
 |  Parameters
 |  ----------
 |  value : Period or compat.string_types, default None
 |      The time period represented (e.g., '4Q2005')
 |  freq : str, default None
 |      One of pandas period strings or corresponding objects
 |  year : int, default None
 |  month : int, default 1
 |  quarter : int, default None
 |  day : int, default 1
 |  hour : int, default 0
 |  minute : int, default 0
 |  second : int, default 0
 |
 |  Methods defined here:
True
In [38]:
pd.Period?
In [42]:
period = pd.Period(year=2016, month=3, day=6, freq="D")
period
Out[42]:
Period('2016-03-06', 'D')
In [41]:
pd.Period("2016-03-06")
Out[41]:
Period('2016-03-06', 'D')
In [43]:
period.start_time, period.end_time
Out[43]:
(Timestamp('2016-03-06 00:00:00'), Timestamp('2016-03-06 23:59:59.999999999'))
In [56]:
import datetime
dt_start = datetime.datetime(2016, 3, 6)
dt_end = datetime.datetime(2016, 3, 6, 23, 59, 59)
[
    period.start_time <  dt_start <= period.end_time,
    period.start_time <= dt_start <= period.end_time,
    period.start_time <= dt_end <= period.end_time,
    period.start_time <= dt_end < period.end_time,
]
Out[56]:
[False, True, True, True]
In [49]:
date_range = pd.date_range("2016-03-01", periods=31)
end_exclude = date_range.shift(1)
end_exclude
Out[49]:
DatetimeIndex(['2016-03-02', '2016-03-03', '2016-03-04', '2016-03-05',
               '2016-03-06', '2016-03-07', '2016-03-08', '2016-03-09',
               '2016-03-10', '2016-03-11', '2016-03-12', '2016-03-13',
               '2016-03-14', '2016-03-15', '2016-03-16', '2016-03-17',
               '2016-03-18', '2016-03-19', '2016-03-20', '2016-03-21',
               '2016-03-22', '2016-03-23', '2016-03-24', '2016-03-25',
               '2016-03-26', '2016-03-27', '2016-03-28', '2016-03-29',
               '2016-03-30', '2016-03-31', '2016-04-01'],
              dtype='datetime64[ns]', freq='D')
In [52]:
date_df = pd.DataFrame({
    "start_date": date_range,
     "end_exclude": end_exclude
})
date_df.tail()
Out[52]:
end_exclude start_date
26 2016-03-28 2016-03-27
27 2016-03-29 2016-03-28
28 2016-03-30 2016-03-29
29 2016-03-31 2016-03-30
30 2016-04-01 2016-03-31