interpolate

In [1]:
site_url = "http://www.kyoritsu-pub.co.jp/bookdetail/9784320123656"
zip_url = "http://www.kyoritsu-pub.co.jp/app/file/goods_contents/2377.zip"
pdf_url = "http://www.kyoritsu-pub.co.jp/app/file/goods_contents/2324.pdf"
In [2]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
In [3]:
import requests
import io
import shutil
import zipfile
import os
In [4]:
data_dir = "suppl_20151001/data/missdata"
filename = "employee_IQ_JP.csv"
file_path = data_dir + "/" + filename
if not os.path.exists(file_path):
    # headers = {'user-agent': 'my-agent'}
    # r = requests.get(pdf_url, headers=headers)
    # shutil.copyfileobj(io.BytesIO(r.content), open(pdf_url.split("/")[-1], "wb"))

    proxies={
        "http": "http://xxx.xxx.xxx.:xxxx",
        "https": "http://xxx.xxx.xxx.:xxxx",
    }
    r = requests.get(zip_url, proxies=proxies)
    with zipfile.ZipFile(io.BytesIO(r.content)) as z:
        z.extractall()

df = pd.read_csv(file_path)
df.describe()
Out[4]:
IQ JobPerformance MCAR MCAR.is.missing MAR MAR.is.missing MNAR MNAR.is.missing
count 20.000000 20.000000 16.00000 20.000000 15.000000 20.000000 15.00000 20.000000
mean 100.000000 10.350000 10.56250 0.200000 10.666667 0.250000 11.40000 0.250000
std 14.127241 2.680829 2.82769 0.410391 2.794553 0.444262 2.22967 0.444262
min 78.000000 7.000000 7.00000 0.000000 7.000000 0.000000 9.00000 0.000000
25% 90.000000 8.750000 8.75000 0.000000 9.000000 0.000000 10.00000 0.000000
50% 97.500000 10.000000 10.00000 0.000000 10.000000 0.000000 11.00000 0.000000
75% 109.000000 12.000000 12.25000 0.000000 12.000000 0.250000 12.50000 0.250000
max 134.000000 16.000000 16.00000 1.000000 16.000000 1.000000 16.00000 1.000000
In [ ]:
# http://ejje.weblio.jp/content/interpolate
In [52]:
s = pd.Series([0, 1, np.nan, 3])
# help(s.interpolate)
In [40]:
methods = ['linear', 'time', 'index', 'values', 'nearest', 'zero',
              'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh',
              'polynomial', 'spline', 'piecewise_polynomial', 'pchip']
methods.remove("time")
methods.remove("quadratic")
methods.remove("cubic")
methods.remove("polynomial")
methods.remove("spline")
methods.remove("piecewise_polynomial")

methods
Out[40]:
['linear',
 'index',
 'values',
 'nearest',
 'zero',
 'slinear',
 'barycentric',
 'krogh',
 'pchip']
In [51]:
def _interpolate(m):
    _s = s.interpolate(method=m)
    _s.name = m
    return _s
pd.concat(map(_interpolate, methods), axis=1)

Out[51]:
linear index values nearest zero slinear barycentric krogh pchip
0 0 0 0 0 0 0 0 0 0
1 1 1 1 1 1 1 1 1 1
2 2 2 2 1 1 2 2 2 2
3 3 3 3 3 3 3 3 3 3