interpolate¶
In [1]:
site_url = "http://www.kyoritsu-pub.co.jp/bookdetail/9784320123656"
zip_url = "http://www.kyoritsu-pub.co.jp/app/file/goods_contents/2377.zip"
pdf_url = "http://www.kyoritsu-pub.co.jp/app/file/goods_contents/2324.pdf"
In [2]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
In [3]:
import requests
import io
import shutil
import zipfile
import os
In [4]:
data_dir = "suppl_20151001/data/missdata"
filename = "employee_IQ_JP.csv"
file_path = data_dir + "/" + filename
if not os.path.exists(file_path):
# headers = {'user-agent': 'my-agent'}
# r = requests.get(pdf_url, headers=headers)
# shutil.copyfileobj(io.BytesIO(r.content), open(pdf_url.split("/")[-1], "wb"))
proxies={
"http": "http://xxx.xxx.xxx.:xxxx",
"https": "http://xxx.xxx.xxx.:xxxx",
}
r = requests.get(zip_url, proxies=proxies)
with zipfile.ZipFile(io.BytesIO(r.content)) as z:
z.extractall()
df = pd.read_csv(file_path)
df.describe()
Out[4]:
IQ | JobPerformance | MCAR | MCAR.is.missing | MAR | MAR.is.missing | MNAR | MNAR.is.missing | |
---|---|---|---|---|---|---|---|---|
count | 20.000000 | 20.000000 | 16.00000 | 20.000000 | 15.000000 | 20.000000 | 15.00000 | 20.000000 |
mean | 100.000000 | 10.350000 | 10.56250 | 0.200000 | 10.666667 | 0.250000 | 11.40000 | 0.250000 |
std | 14.127241 | 2.680829 | 2.82769 | 0.410391 | 2.794553 | 0.444262 | 2.22967 | 0.444262 |
min | 78.000000 | 7.000000 | 7.00000 | 0.000000 | 7.000000 | 0.000000 | 9.00000 | 0.000000 |
25% | 90.000000 | 8.750000 | 8.75000 | 0.000000 | 9.000000 | 0.000000 | 10.00000 | 0.000000 |
50% | 97.500000 | 10.000000 | 10.00000 | 0.000000 | 10.000000 | 0.000000 | 11.00000 | 0.000000 |
75% | 109.000000 | 12.000000 | 12.25000 | 0.000000 | 12.000000 | 0.250000 | 12.50000 | 0.250000 |
max | 134.000000 | 16.000000 | 16.00000 | 1.000000 | 16.000000 | 1.000000 | 16.00000 | 1.000000 |
In [ ]:
# http://ejje.weblio.jp/content/interpolate
In [52]:
s = pd.Series([0, 1, np.nan, 3])
# help(s.interpolate)
In [40]:
methods = ['linear', 'time', 'index', 'values', 'nearest', 'zero',
'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh',
'polynomial', 'spline', 'piecewise_polynomial', 'pchip']
methods.remove("time")
methods.remove("quadratic")
methods.remove("cubic")
methods.remove("polynomial")
methods.remove("spline")
methods.remove("piecewise_polynomial")
methods
Out[40]:
['linear',
'index',
'values',
'nearest',
'zero',
'slinear',
'barycentric',
'krogh',
'pchip']
In [51]:
def _interpolate(m):
_s = s.interpolate(method=m)
_s.name = m
return _s
pd.concat(map(_interpolate, methods), axis=1)
Out[51]:
linear | index | values | nearest | zero | slinear | barycentric | krogh | pchip | |
---|---|---|---|---|---|---|---|---|---|
0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
2 | 2 | 2 | 2 | 1 | 1 | 2 | 2 | 2 | 2 |
3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |