{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# interpolate" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "site_url = \"http://www.kyoritsu-pub.co.jp/bookdetail/9784320123656\"\n", "zip_url = \"http://www.kyoritsu-pub.co.jp/app/file/goods_contents/2377.zip\"\n", "pdf_url = \"http://www.kyoritsu-pub.co.jp/app/file/goods_contents/2324.pdf\"" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import requests\n", "import io\n", "import shutil\n", "import zipfile\n", "import os" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IQJobPerformanceMCARMCAR.is.missingMARMAR.is.missingMNARMNAR.is.missing
count20.00000020.00000016.0000020.00000015.00000020.00000015.0000020.000000
mean100.00000010.35000010.562500.20000010.6666670.25000011.400000.250000
std14.1272412.6808292.827690.4103912.7945530.4442622.229670.444262
min78.0000007.0000007.000000.0000007.0000000.0000009.000000.000000
25%90.0000008.7500008.750000.0000009.0000000.00000010.000000.000000
50%97.50000010.00000010.000000.00000010.0000000.00000011.000000.000000
75%109.00000012.00000012.250000.00000012.0000000.25000012.500000.250000
max134.00000016.00000016.000001.00000016.0000001.00000016.000001.000000
\n", "
" ], "text/plain": [ " IQ JobPerformance MCAR MCAR.is.missing MAR \\\n", "count 20.000000 20.000000 16.00000 20.000000 15.000000 \n", "mean 100.000000 10.350000 10.56250 0.200000 10.666667 \n", "std 14.127241 2.680829 2.82769 0.410391 2.794553 \n", "min 78.000000 7.000000 7.00000 0.000000 7.000000 \n", "25% 90.000000 8.750000 8.75000 0.000000 9.000000 \n", "50% 97.500000 10.000000 10.00000 0.000000 10.000000 \n", "75% 109.000000 12.000000 12.25000 0.000000 12.000000 \n", "max 134.000000 16.000000 16.00000 1.000000 16.000000 \n", "\n", " MAR.is.missing MNAR MNAR.is.missing \n", "count 20.000000 15.00000 20.000000 \n", "mean 0.250000 11.40000 0.250000 \n", "std 0.444262 2.22967 0.444262 \n", "min 0.000000 9.00000 0.000000 \n", "25% 0.000000 10.00000 0.000000 \n", "50% 0.000000 11.00000 0.000000 \n", "75% 0.250000 12.50000 0.250000 \n", "max 1.000000 16.00000 1.000000 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_dir = \"suppl_20151001/data/missdata\"\n", "filename = \"employee_IQ_JP.csv\"\n", "file_path = data_dir + \"/\" + filename\n", "if not os.path.exists(file_path):\n", " # headers = {'user-agent': 'my-agent'}\n", " # r = requests.get(pdf_url, headers=headers)\n", " # shutil.copyfileobj(io.BytesIO(r.content), open(pdf_url.split(\"/\")[-1], \"wb\"))\n", "\n", " proxies={\n", " \"http\": \"http://xxx.xxx.xxx.:xxxx\",\n", " \"https\": \"http://xxx.xxx.xxx.:xxxx\",\n", " }\n", " r = requests.get(zip_url, proxies=proxies)\n", " with zipfile.ZipFile(io.BytesIO(r.content)) as z:\n", " z.extractall()\n", "\n", "df = pd.read_csv(file_path)\n", "df.describe()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# http://ejje.weblio.jp/content/interpolate" ] }, { "cell_type": "code", "execution_count": 52, "metadata": { "collapsed": false }, "outputs": [], "source": [ "s = pd.Series([0, 1, np.nan, 3])\n", "# help(s.interpolate)" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['linear',\n", " 'index',\n", " 'values',\n", " 'nearest',\n", " 'zero',\n", " 'slinear',\n", " 'barycentric',\n", " 'krogh',\n", " 'pchip']" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "methods = ['linear', 'time', 'index', 'values', 'nearest', 'zero',\n", " 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh',\n", " 'polynomial', 'spline', 'piecewise_polynomial', 'pchip']\n", "methods.remove(\"time\")\n", "methods.remove(\"quadratic\")\n", "methods.remove(\"cubic\")\n", "methods.remove(\"polynomial\")\n", "methods.remove(\"spline\")\n", "methods.remove(\"piecewise_polynomial\")\n", "\n", "methods" ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
linearindexvaluesnearestzeroslinearbarycentrickroghpchip
0000000000
1111111111
2222112222
3333333333
\n", "
" ], "text/plain": [ " linear index values nearest zero slinear barycentric krogh pchip\n", "0 0 0 0 0 0 0 0 0 0\n", "1 1 1 1 1 1 1 1 1 1\n", "2 2 2 2 1 1 2 2 2 2\n", "3 3 3 3 3 3 3 3 3 3" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def _interpolate(m):\n", " _s = s.interpolate(method=m)\n", " _s.name = m\n", " return _s\n", "pd.concat(map(_interpolate, methods), axis=1)\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python [default]", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 0 }