{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# interpolate"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"site_url = \"http://www.kyoritsu-pub.co.jp/bookdetail/9784320123656\"\n",
"zip_url = \"http://www.kyoritsu-pub.co.jp/app/file/goods_contents/2377.zip\"\n",
"pdf_url = \"http://www.kyoritsu-pub.co.jp/app/file/goods_contents/2324.pdf\""
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import requests\n",
"import io\n",
"import shutil\n",
"import zipfile\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"
\n",
" \n",
" \n",
" | \n",
" IQ | \n",
" JobPerformance | \n",
" MCAR | \n",
" MCAR.is.missing | \n",
" MAR | \n",
" MAR.is.missing | \n",
" MNAR | \n",
" MNAR.is.missing | \n",
"
\n",
" \n",
" \n",
" \n",
" count | \n",
" 20.000000 | \n",
" 20.000000 | \n",
" 16.00000 | \n",
" 20.000000 | \n",
" 15.000000 | \n",
" 20.000000 | \n",
" 15.00000 | \n",
" 20.000000 | \n",
"
\n",
" \n",
" mean | \n",
" 100.000000 | \n",
" 10.350000 | \n",
" 10.56250 | \n",
" 0.200000 | \n",
" 10.666667 | \n",
" 0.250000 | \n",
" 11.40000 | \n",
" 0.250000 | \n",
"
\n",
" \n",
" std | \n",
" 14.127241 | \n",
" 2.680829 | \n",
" 2.82769 | \n",
" 0.410391 | \n",
" 2.794553 | \n",
" 0.444262 | \n",
" 2.22967 | \n",
" 0.444262 | \n",
"
\n",
" \n",
" min | \n",
" 78.000000 | \n",
" 7.000000 | \n",
" 7.00000 | \n",
" 0.000000 | \n",
" 7.000000 | \n",
" 0.000000 | \n",
" 9.00000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 25% | \n",
" 90.000000 | \n",
" 8.750000 | \n",
" 8.75000 | \n",
" 0.000000 | \n",
" 9.000000 | \n",
" 0.000000 | \n",
" 10.00000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 50% | \n",
" 97.500000 | \n",
" 10.000000 | \n",
" 10.00000 | \n",
" 0.000000 | \n",
" 10.000000 | \n",
" 0.000000 | \n",
" 11.00000 | \n",
" 0.000000 | \n",
"
\n",
" \n",
" 75% | \n",
" 109.000000 | \n",
" 12.000000 | \n",
" 12.25000 | \n",
" 0.000000 | \n",
" 12.000000 | \n",
" 0.250000 | \n",
" 12.50000 | \n",
" 0.250000 | \n",
"
\n",
" \n",
" max | \n",
" 134.000000 | \n",
" 16.000000 | \n",
" 16.00000 | \n",
" 1.000000 | \n",
" 16.000000 | \n",
" 1.000000 | \n",
" 16.00000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" IQ JobPerformance MCAR MCAR.is.missing MAR \\\n",
"count 20.000000 20.000000 16.00000 20.000000 15.000000 \n",
"mean 100.000000 10.350000 10.56250 0.200000 10.666667 \n",
"std 14.127241 2.680829 2.82769 0.410391 2.794553 \n",
"min 78.000000 7.000000 7.00000 0.000000 7.000000 \n",
"25% 90.000000 8.750000 8.75000 0.000000 9.000000 \n",
"50% 97.500000 10.000000 10.00000 0.000000 10.000000 \n",
"75% 109.000000 12.000000 12.25000 0.000000 12.000000 \n",
"max 134.000000 16.000000 16.00000 1.000000 16.000000 \n",
"\n",
" MAR.is.missing MNAR MNAR.is.missing \n",
"count 20.000000 15.00000 20.000000 \n",
"mean 0.250000 11.40000 0.250000 \n",
"std 0.444262 2.22967 0.444262 \n",
"min 0.000000 9.00000 0.000000 \n",
"25% 0.000000 10.00000 0.000000 \n",
"50% 0.000000 11.00000 0.000000 \n",
"75% 0.250000 12.50000 0.250000 \n",
"max 1.000000 16.00000 1.000000 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_dir = \"suppl_20151001/data/missdata\"\n",
"filename = \"employee_IQ_JP.csv\"\n",
"file_path = data_dir + \"/\" + filename\n",
"if not os.path.exists(file_path):\n",
" # headers = {'user-agent': 'my-agent'}\n",
" # r = requests.get(pdf_url, headers=headers)\n",
" # shutil.copyfileobj(io.BytesIO(r.content), open(pdf_url.split(\"/\")[-1], \"wb\"))\n",
"\n",
" proxies={\n",
" \"http\": \"http://xxx.xxx.xxx.:xxxx\",\n",
" \"https\": \"http://xxx.xxx.xxx.:xxxx\",\n",
" }\n",
" r = requests.get(zip_url, proxies=proxies)\n",
" with zipfile.ZipFile(io.BytesIO(r.content)) as z:\n",
" z.extractall()\n",
"\n",
"df = pd.read_csv(file_path)\n",
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# http://ejje.weblio.jp/content/interpolate"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"s = pd.Series([0, 1, np.nan, 3])\n",
"# help(s.interpolate)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"['linear',\n",
" 'index',\n",
" 'values',\n",
" 'nearest',\n",
" 'zero',\n",
" 'slinear',\n",
" 'barycentric',\n",
" 'krogh',\n",
" 'pchip']"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"methods = ['linear', 'time', 'index', 'values', 'nearest', 'zero',\n",
" 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh',\n",
" 'polynomial', 'spline', 'piecewise_polynomial', 'pchip']\n",
"methods.remove(\"time\")\n",
"methods.remove(\"quadratic\")\n",
"methods.remove(\"cubic\")\n",
"methods.remove(\"polynomial\")\n",
"methods.remove(\"spline\")\n",
"methods.remove(\"piecewise_polynomial\")\n",
"\n",
"methods"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" linear | \n",
" index | \n",
" values | \n",
" nearest | \n",
" zero | \n",
" slinear | \n",
" barycentric | \n",
" krogh | \n",
" pchip | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" 2 | \n",
" 2 | \n",
" 1 | \n",
" 1 | \n",
" 2 | \n",
" 2 | \n",
" 2 | \n",
" 2 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" 3 | \n",
" 3 | \n",
" 3 | \n",
" 3 | \n",
" 3 | \n",
" 3 | \n",
" 3 | \n",
" 3 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" linear index values nearest zero slinear barycentric krogh pchip\n",
"0 0 0 0 0 0 0 0 0 0\n",
"1 1 1 1 1 1 1 1 1 1\n",
"2 2 2 2 1 1 2 2 2 2\n",
"3 3 3 3 3 3 3 3 3 3"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def _interpolate(m):\n",
" _s = s.interpolate(method=m)\n",
" _s.name = m\n",
" return _s\n",
"pd.concat(map(_interpolate, methods), axis=1)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}