{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# datasets-R" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2016-06-04T23:22:49.334909", "start_time": "2016-06-04T23:22:48.133422" }, "collapsed": true }, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2016-06-04T23:22:52.165178", "start_time": "2016-06-04T23:22:50.141275" }, "collapsed": false, "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "(list, 2, pandas.core.frame.DataFrame)" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_html.html\n", "# pip install html5lib\n", "url = \"http://d.hatena.ne.jp/hoxo_m/20120214/p1\"\n", "dfs = pd.read_html(url, header=0)\n", "type(dfs), len(dfs), type(dfs[0])" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2016-06-04T23:23:01.624747", "start_time": "2016-06-04T23:23:01.606662" }, "collapsed": false, "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
データセット名タイトル説明詳細DL
23USArrests合州国の州別暴力犯罪率このデータセットは 1973 年の合州国の 50 の州毎の、住民10万人あたりの暴行、殺人、...詳細DL
24USJudgeRatings弁護士による合州国最高裁判事の評価弁護士による合州国最高裁判事の評価詳細DL
25USPersonalExpenditure個人消費データこのデータセットは、1940,1945, 1950 そして 1960 年における、 次の各項...詳細DL
26VADeaths死亡率データ1940年代のバージニア州の100人あたりの死亡率。詳細DL
27women米国女性の平均身長と平均体重30歳から39歳のアメリカ人女性の平均身長と体重。詳細DL
\n", "
" ], "text/plain": [ " データセット名 タイトル \\\n", "23 USArrests 合州国の州別暴力犯罪率 \n", "24 USJudgeRatings 弁護士による合州国最高裁判事の評価 \n", "25 USPersonalExpenditure 個人消費データ \n", "26 VADeaths 死亡率データ \n", "27 women 米国女性の平均身長と平均体重 \n", "\n", " 説明 詳細 DL \n", "23 このデータセットは 1973 年の合州国の 50 の州毎の、住民10万人あたりの暴行、殺人、... 詳細 DL \n", "24 弁護士による合州国最高裁判事の評価 詳細 DL \n", "25 このデータセットは、1940,1945, 1950 そして 1960 年における、 次の各項... 詳細 DL \n", "26 1940年代のバージニア州の100人あたりの死亡率。 詳細 DL \n", "27 30歳から39歳のアメリカ人女性の平均身長と体重。 詳細 DL " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dnames = dfs[0]\n", "dnames.tail()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2016-06-04T23:23:05.654319", "start_time": "2016-06-04T23:23:03.912711" }, "collapsed": false, "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
データセット名タイトル説明詳細DL詳細_URLDL_URL
23USArrests合州国の州別暴力犯罪率このデータセットは 1973 年の合州国の 50 の州毎の、住民10万人あたりの暴行、殺人、...詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/U...
24USJudgeRatings弁護士による合州国最高裁判事の評価弁護士による合州国最高裁判事の評価詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/U...
25USPersonalExpenditure個人消費データこのデータセットは、1940,1945, 1950 そして 1960 年における、 次の各項...詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/U...
26VADeaths死亡率データ1940年代のバージニア州の100人あたりの死亡率。詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/V...
27women米国女性の平均身長と平均体重30歳から39歳のアメリカ人女性の平均身長と体重。詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/w...
\n", "
" ], "text/plain": [ " データセット名 タイトル \\\n", "23 USArrests 合州国の州別暴力犯罪率 \n", "24 USJudgeRatings 弁護士による合州国最高裁判事の評価 \n", "25 USPersonalExpenditure 個人消費データ \n", "26 VADeaths 死亡率データ \n", "27 women 米国女性の平均身長と平均体重 \n", "\n", " 説明 詳細 DL \\\n", "23 このデータセットは 1973 年の合州国の 50 の州毎の、住民10万人あたりの暴行、殺人、... 詳細 DL \n", "24 弁護士による合州国最高裁判事の評価 詳細 DL \n", "25 このデータセットは、1940,1945, 1950 そして 1960 年における、 次の各項... 詳細 DL \n", "26 1940年代のバージニア州の100人あたりの死亡率。 詳細 DL \n", "27 30歳から39歳のアメリカ人女性の平均身長と体重。 詳細 DL \n", "\n", " 詳細_URL \\\n", "23 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "24 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "25 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "26 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "27 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "\n", " DL_URL \n", "23 http://dl.dropbox.com/u/432512/20120210/data/U... \n", "24 http://dl.dropbox.com/u/432512/20120210/data/U... \n", "25 http://dl.dropbox.com/u/432512/20120210/data/U... \n", "26 http://dl.dropbox.com/u/432512/20120210/data/V... \n", "27 http://dl.dropbox.com/u/432512/20120210/data/w... " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# pip install pyquery\n", "# Successfully installed cssselect-0.9.1 pyquery-1.2.13\n", "# http://pyquery.readthedocs.io/en/latest/attributes.html\n", "import pyquery\n", "\n", "d = pyquery.PyQuery(url)\n", "# Chrome で copy as selector\n", "# #days > div > div > div.section > table:nth-child(15) > tbody > tr:nth-child(1) > th:nth-child(4)\n", "dnames[\"詳細_URL\"] = pd.Series(pyquery.PyQuery(a).attr('href') for a in d.find(\"table:first tr td:nth-child(4) > a\"))\n", "dnames[\"DL_URL\"] = pd.Series(pyquery.PyQuery(a).attr('href') for a in d.find(\"table:first tr td:nth-child(5) > a\"))\n", "dnames.tail()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2016-06-04T23:23:13.515728", "start_time": "2016-06-04T23:23:13.509968" }, "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "([], [,
], lxml.html.HtmlElement)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d, d(\"table\"), type(d[0])" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2016-06-04T23:23:19.891182", "start_time": "2016-06-04T23:23:16.908546" }, "collapsed": false, "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
0回帰anscombe, attitude, cars, esoph, Formaldehyde,...
1分散分析HairEyeColor, infert, InsectSprays, PlantGrowt...
2多変量解析airquality, attenu, attitude, eurodist, iris, ...
3時系列解析airmiles, co2, discoveries, infert
\n", "" ], "text/plain": [ " 0 1\n", "0 回帰 anscombe, attitude, cars, esoph, Formaldehyde,...\n", "1 分散分析 HairEyeColor, infert, InsectSprays, PlantGrowt...\n", "2 多変量解析 airquality, attenu, attitude, eurodist, iris, ...\n", "3 時系列解析 airmiles, co2, discoveries, infert" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "methods = pd.read_html(\"http://d.hatena.ne.jp/hoxo_m/20120214/p1\")[1]\n", "methods" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2016-06-04T23:23:22.486815", "start_time": "2016-06-04T23:23:22.440736" }, "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789
0anscombeattitudecarsesophFormaldehydeirisLifeCycleSavingsTitanicToothGrowthwomen
1HairEyeColorinfertInsectSpraysPlantGrowthTitanicToothGrowthUCBAdmissionsVADeathsNoneNone
2airqualityattenuattitudeeurodistirisLifeCycleSavingsOrchardSpraysUSArrestsUSJudgeRatingsNone
3airmilesco2discoveriesinfertNoneNoneNoneNoneNoneNone
\n", "
" ], "text/plain": [ " 0 1 2 3 4 \\\n", "0 anscombe attitude cars esoph Formaldehyde \n", "1 HairEyeColor infert InsectSprays PlantGrowth Titanic \n", "2 airquality attenu attitude eurodist iris \n", "3 airmiles co2 discoveries infert None \n", "\n", " 5 6 7 8 9 \n", "0 iris LifeCycleSavings Titanic ToothGrowth women \n", "1 ToothGrowth UCBAdmissions VADeaths None None \n", "2 LifeCycleSavings OrchardSprays USArrests USJudgeRatings None \n", "3 None None None None None " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "methods.columns = [\"method\", \"dataset_name\"]\n", "method_expand = methods.dataset_name.str.replace(\", \", \",\").str.split(\",\", expand=True)\n", "method_expand" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2016-06-04T23:23:28.833049", "start_time": "2016-06-04T23:23:28.812299" }, "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
methodvariablevalue
35時系列解析8None
36回帰9women
37分散分析9None
38多変量解析9None
39時系列解析9None
\n", "
" ], "text/plain": [ " method variable value\n", "35 時系列解析 8 None\n", "36 回帰 9 women\n", "37 分散分析 9 None\n", "38 多変量解析 9 None\n", "39 時系列解析 9 None" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "melted = pd.melt(\n", " pd.concat([methods[[\"method\"]], method_expand], axis=1),\n", " id_vars=\"method\")\n", "melted.tail()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "ExecuteTime": { "end_time": "2016-06-04T23:23:29.876960", "start_time": "2016-06-04T23:23:29.812897" }, "collapsed": false, "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
methodvalue
26多変量解析USJudgeRatings
27時系列解析airmiles
28時系列解析co2
29時系列解析discoveries
30時系列解析infert
\n", "
" ], "text/plain": [ " method value\n", "26 多変量解析 USJudgeRatings\n", "27 時系列解析 airmiles\n", "28 時系列解析 co2\n", "29 時系列解析 discoveries\n", "30 時系列解析 infert" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "melted = melted.dropna()\n", "melted[\"lower_value\"] = melted.value.str.lower()\n", "method_data_map = melted.sort_values([\"method\", \"lower_value\"])[[\"method\", \"value\"]].reset_index(drop=True)\n", "method_data_map.tail()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "ExecuteTime": { "end_time": "2016-06-04T23:23:31.038117", "start_time": "2016-06-04T23:23:31.012299" }, "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
methodvalue分散分析回帰多変量解析時系列解析
0Formaldehyde
1HairEyeColor
2InsectSprays
3LifeCycleSavings
4OrchardSprays
\n", "
" ], "text/plain": [ "method value 分散分析 回帰 多変量解析 時系列解析\n", "0 Formaldehyde ☓ ◯ ☓ ☓\n", "1 HairEyeColor ◯ ☓ ☓ ☓\n", "2 InsectSprays ◯ ☓ ☓ ☓\n", "3 LifeCycleSavings ☓ ◯ ◯ ☓\n", "4 OrchardSprays ☓ ☓ ◯ ☓" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "method_data_map[\"適用可能\"] = \"◯\"\n", "method_data_map_pivoted = method_data_map.pivot(index=\"value\", columns=\"method\", values=\"適用可能\").fillna(\"☓\").reset_index()\n", "method_data_map_pivoted.head()" ] }, { "cell_type": "code", "execution_count": 117, "metadata": { "ExecuteTime": { "end_time": "2016-06-04T18:32:09.103382", "start_time": "2016-06-04T18:32:09.035226" }, "collapsed": false, "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
データセット名タイトル説明詳細DL詳細_URLDL_URLvalue分散分析回帰多変量解析時系列解析
0airmiles商用航空会社マイレージ1937年から1960年の各年の、合州国の商用航空会社の課税利用者マイル数。詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/a...airmiles
1airqualityニューヨークの大気状態観測値ニューヨークの大気状態観測値。1973 年の五月から。詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/a...airquality
2anscombe``同じ'' 線形単回帰に対する Anscombe の四つ組同じ通常の統計的性質(平均、分散、相関、回帰直線)を持つが、全く異なる 四つの x-y デー...詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/a...anscombe
3attenuJoyner-Boore の地震波の減衰データこのデータはカリフォルニア州の 23 の地震のピーク時加速度を、様々な観測基地で測定したデー...詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/a...attenu
4attitude管理者に対する態度(まだ)無い。詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/a...attitude
5cars車の停車距離車が停車するまでに必要な距離のデータ。 データは 1920 年代に得られたことを注意せよ。詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/c...cars
6co2Mauna Loa 火山の大気中の炭酸ガス濃度大気中の CO2 濃度が百万分の一単位 (ppm) で表され、preliminary 199...詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/c...co2
7discoveries重要な発見の数1860年から1959年の各年における ``偉大な'' 発明と科学的発見の数。詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/d...discoveries
8esoph喫煙、アルコールと食道ガンフランスの Ile-et-Vilaine における食道ガンの類別研究のデータ。詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/e...esoph
9euroヨーロッパの為替レート様々なヨーロッパの通貨の交換比率。詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/e...NaNNaNNaNNaNNaN
10eurodistヨーロッパの都市間の距離このデータはヨーロッパの21の都市間の道路距離(km 単位)を与える。 データは ``The...詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/e...eurodist
11Formaldehydeホルムアルデヒドの定量これらのデータは、クロム酸と濃縮硫酸を加えた結果生ずる紫色を分光計で 読みとるホルムアルデヒ...詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/F...Formaldehyde
12HairEyeColor統計の講義を受講している学生の髪と瞳の色統計の講義を受講している 592 人の学生の髪、瞳の色と性別の分布。詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/H...HairEyeColor
13infert自然・人工流産後の不妊症これは条件付きロジスティック回帰が登場する前に行われた対応対照群研究である。詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/i...infert
14InsectSprays昆虫への薬剤噴霧の効果異なる農薬を散布した農業実験単位毎の昆虫の計数値。詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/I...InsectSprays
15irisEdgar Anderson のあやめのデータこの有名な(Fiher もしくは Anderson の)あやめのデータセットは、三種類のあや...詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/i...iris
16islands世界の主要な陸地の面積10,000 平方マイルを越える陸地の千平方マイル単位の面積。詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/i...NaNNaNNaNNaNNaN
17LifeCycleSavings各国の世代毎の貯蓄データ1960 - 1970 の貯蓄データ。詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/L...LifeCycleSavings
18OrchardSprays果樹園への散布液の効果果樹園への散布液の様々な成分が 蜜蜂を忌避する効果を確かめる実験が、ラテン方陣デザインを用い...詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/O...OrchardSprays
19PlantGrowth植物の成長に関する実験の結果対照群と二つの異なった処理条件のもとで得られた、収穫量(乾燥重量で計った)を比較する実験の結果。詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/P...PlantGrowth
20Titanicタイタニック号乗客の生存このデータセットは大洋定期船 `Titanic' の破滅的な処女航海の乗客の運命に付いての情...詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/T...Titanic
21ToothGrowthギニアピッグの歯の成長に対するビタミン C の効果三種類のビタミン C の投与量(0.5, 1, そして 2mg)、二種類の 摂取法(オレンジ...詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/T...ToothGrowth
22UCBAdmissionsカリフォルニア大学バークレイ分校の学生入学1973年のバークレイ校大学院の六つの最大学部の受験生の総合的データで、入学状況と性別で分類...詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/U...UCBAdmissions
23USArrests合州国の州別暴力犯罪率このデータセットは 1973 年の合州国の 50 の州毎の、住民10万人あたりの暴行、殺人、...詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/U...USArrests
24USJudgeRatings弁護士による合州国最高裁判事の評価弁護士による合州国最高裁判事の評価詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/U...USJudgeRatings
25USPersonalExpenditure個人消費データこのデータセットは、1940,1945, 1950 そして 1960 年における、 次の各項...詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/U...NaNNaNNaNNaNNaN
26VADeaths死亡率データ1940年代のバージニア州の100人あたりの死亡率。詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/V...VADeaths
27women米国女性の平均身長と平均体重30歳から39歳のアメリカ人女性の平均身長と体重。詳細DLhttp://www.is.titech.ac.jp/~mase/mase/html.jp/...http://dl.dropbox.com/u/432512/20120210/data/w...women
\n", "
" ], "text/plain": [ " データセット名 タイトル \\\n", "0 airmiles 商用航空会社マイレージ \n", "1 airquality ニューヨークの大気状態観測値 \n", "2 anscombe ``同じ'' 線形単回帰に対する Anscombe の四つ組 \n", "3 attenu Joyner-Boore の地震波の減衰データ \n", "4 attitude 管理者に対する態度 \n", "5 cars 車の停車距離 \n", "6 co2 Mauna Loa 火山の大気中の炭酸ガス濃度 \n", "7 discoveries 重要な発見の数 \n", "8 esoph 喫煙、アルコールと食道ガン \n", "9 euro ヨーロッパの為替レート \n", "10 eurodist ヨーロッパの都市間の距離 \n", "11 Formaldehyde ホルムアルデヒドの定量 \n", "12 HairEyeColor 統計の講義を受講している学生の髪と瞳の色 \n", "13 infert 自然・人工流産後の不妊症 \n", "14 InsectSprays 昆虫への薬剤噴霧の効果 \n", "15 iris Edgar Anderson のあやめのデータ \n", "16 islands 世界の主要な陸地の面積 \n", "17 LifeCycleSavings 各国の世代毎の貯蓄データ \n", "18 OrchardSprays 果樹園への散布液の効果 \n", "19 PlantGrowth 植物の成長に関する実験の結果 \n", "20 Titanic タイタニック号乗客の生存 \n", "21 ToothGrowth ギニアピッグの歯の成長に対するビタミン C の効果 \n", "22 UCBAdmissions カリフォルニア大学バークレイ分校の学生入学 \n", "23 USArrests 合州国の州別暴力犯罪率 \n", "24 USJudgeRatings 弁護士による合州国最高裁判事の評価 \n", "25 USPersonalExpenditure 個人消費データ \n", "26 VADeaths 死亡率データ \n", "27 women 米国女性の平均身長と平均体重 \n", "\n", " 説明 詳細 DL \\\n", "0 1937年から1960年の各年の、合州国の商用航空会社の課税利用者マイル数。 詳細 DL \n", "1 ニューヨークの大気状態観測値。1973 年の五月から。 詳細 DL \n", "2 同じ通常の統計的性質(平均、分散、相関、回帰直線)を持つが、全く異なる 四つの x-y デー... 詳細 DL \n", "3 このデータはカリフォルニア州の 23 の地震のピーク時加速度を、様々な観測基地で測定したデー... 詳細 DL \n", "4 (まだ)無い。 詳細 DL \n", "5 車が停車するまでに必要な距離のデータ。 データは 1920 年代に得られたことを注意せよ。 詳細 DL \n", "6 大気中の CO2 濃度が百万分の一単位 (ppm) で表され、preliminary 199... 詳細 DL \n", "7 1860年から1959年の各年における ``偉大な'' 発明と科学的発見の数。 詳細 DL \n", "8 フランスの Ile-et-Vilaine における食道ガンの類別研究のデータ。 詳細 DL \n", "9 様々なヨーロッパの通貨の交換比率。 詳細 DL \n", "10 このデータはヨーロッパの21の都市間の道路距離(km 単位)を与える。 データは ``The... 詳細 DL \n", "11 これらのデータは、クロム酸と濃縮硫酸を加えた結果生ずる紫色を分光計で 読みとるホルムアルデヒ... 詳細 DL \n", "12 統計の講義を受講している 592 人の学生の髪、瞳の色と性別の分布。 詳細 DL \n", "13 これは条件付きロジスティック回帰が登場する前に行われた対応対照群研究である。 詳細 DL \n", "14 異なる農薬を散布した農業実験単位毎の昆虫の計数値。 詳細 DL \n", "15 この有名な(Fiher もしくは Anderson の)あやめのデータセットは、三種類のあや... 詳細 DL \n", "16 10,000 平方マイルを越える陸地の千平方マイル単位の面積。 詳細 DL \n", "17 1960 - 1970 の貯蓄データ。 詳細 DL \n", "18 果樹園への散布液の様々な成分が 蜜蜂を忌避する効果を確かめる実験が、ラテン方陣デザインを用い... 詳細 DL \n", "19 対照群と二つの異なった処理条件のもとで得られた、収穫量(乾燥重量で計った)を比較する実験の結果。 詳細 DL \n", "20 このデータセットは大洋定期船 `Titanic' の破滅的な処女航海の乗客の運命に付いての情... 詳細 DL \n", "21 三種類のビタミン C の投与量(0.5, 1, そして 2mg)、二種類の 摂取法(オレンジ... 詳細 DL \n", "22 1973年のバークレイ校大学院の六つの最大学部の受験生の総合的データで、入学状況と性別で分類... 詳細 DL \n", "23 このデータセットは 1973 年の合州国の 50 の州毎の、住民10万人あたりの暴行、殺人、... 詳細 DL \n", "24 弁護士による合州国最高裁判事の評価 詳細 DL \n", "25 このデータセットは、1940,1945, 1950 そして 1960 年における、 次の各項... 詳細 DL \n", "26 1940年代のバージニア州の100人あたりの死亡率。 詳細 DL \n", "27 30歳から39歳のアメリカ人女性の平均身長と体重。 詳細 DL \n", "\n", " 詳細_URL \\\n", "0 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "1 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "2 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "3 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "4 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "5 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "6 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "7 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "8 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "9 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "10 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "11 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "12 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "13 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "14 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "15 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "16 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "17 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "18 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "19 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "20 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "21 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "22 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "23 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "24 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "25 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "26 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "27 http://www.is.titech.ac.jp/~mase/mase/html.jp/... \n", "\n", " DL_URL value 分散分析 \\\n", "0 http://dl.dropbox.com/u/432512/20120210/data/a... airmiles ☓ \n", "1 http://dl.dropbox.com/u/432512/20120210/data/a... airquality ☓ \n", "2 http://dl.dropbox.com/u/432512/20120210/data/a... anscombe ☓ \n", "3 http://dl.dropbox.com/u/432512/20120210/data/a... attenu ☓ \n", "4 http://dl.dropbox.com/u/432512/20120210/data/a... attitude ☓ \n", "5 http://dl.dropbox.com/u/432512/20120210/data/c... cars ☓ \n", "6 http://dl.dropbox.com/u/432512/20120210/data/c... co2 ☓ \n", "7 http://dl.dropbox.com/u/432512/20120210/data/d... discoveries ☓ \n", "8 http://dl.dropbox.com/u/432512/20120210/data/e... esoph ☓ \n", "9 http://dl.dropbox.com/u/432512/20120210/data/e... NaN NaN \n", "10 http://dl.dropbox.com/u/432512/20120210/data/e... eurodist ☓ \n", "11 http://dl.dropbox.com/u/432512/20120210/data/F... Formaldehyde ☓ \n", "12 http://dl.dropbox.com/u/432512/20120210/data/H... HairEyeColor ◯ \n", "13 http://dl.dropbox.com/u/432512/20120210/data/i... infert ◯ \n", "14 http://dl.dropbox.com/u/432512/20120210/data/I... InsectSprays ◯ \n", "15 http://dl.dropbox.com/u/432512/20120210/data/i... iris ☓ \n", "16 http://dl.dropbox.com/u/432512/20120210/data/i... NaN NaN \n", "17 http://dl.dropbox.com/u/432512/20120210/data/L... LifeCycleSavings ☓ \n", "18 http://dl.dropbox.com/u/432512/20120210/data/O... OrchardSprays ☓ \n", "19 http://dl.dropbox.com/u/432512/20120210/data/P... PlantGrowth ◯ \n", "20 http://dl.dropbox.com/u/432512/20120210/data/T... Titanic ◯ \n", "21 http://dl.dropbox.com/u/432512/20120210/data/T... ToothGrowth ◯ \n", "22 http://dl.dropbox.com/u/432512/20120210/data/U... UCBAdmissions ◯ \n", "23 http://dl.dropbox.com/u/432512/20120210/data/U... USArrests ☓ \n", "24 http://dl.dropbox.com/u/432512/20120210/data/U... USJudgeRatings ☓ \n", "25 http://dl.dropbox.com/u/432512/20120210/data/U... NaN NaN \n", "26 http://dl.dropbox.com/u/432512/20120210/data/V... VADeaths ◯ \n", "27 http://dl.dropbox.com/u/432512/20120210/data/w... women ☓ \n", "\n", " 回帰 多変量解析 時系列解析 \n", "0 ☓ ☓ ◯ \n", "1 ☓ ◯ ☓ \n", "2 ◯ ☓ ☓ \n", "3 ☓ ◯ ☓ \n", "4 ◯ ◯ ☓ \n", "5 ◯ ☓ ☓ \n", "6 ☓ ☓ ◯ \n", "7 ☓ ☓ ◯ \n", "8 ◯ ☓ ☓ \n", "9 NaN NaN NaN \n", "10 ☓ ◯ ☓ \n", "11 ◯ ☓ ☓ \n", "12 ☓ ☓ ☓ \n", "13 ☓ ☓ ◯ \n", "14 ☓ ☓ ☓ \n", "15 ◯ ◯ ☓ \n", "16 NaN NaN NaN \n", "17 ◯ ◯ ☓ \n", "18 ☓ ◯ ☓ \n", "19 ☓ ☓ ☓ \n", "20 ◯ ☓ ☓ \n", "21 ◯ ☓ ☓ \n", "22 ☓ ☓ ☓ \n", "23 ☓ ◯ ☓ \n", "24 ☓ ◯ ☓ \n", "25 NaN NaN NaN \n", "26 ☓ ☓ ☓ \n", "27 ◯ ☓ ☓ " ] }, "execution_count": 117, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dnames_detail = dnames.merge(method_data_map_pivoted, left_on=\"データセット名\", right_on=\"value\", how=\"outer\")\n", "dnames_detail" ] } ], "metadata": { "hide_input": false, "kernelspec": { "display_name": "Python [default]", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" }, "toc": { "toc_cell": false, "toc_number_sections": true, "toc_threshold": 6, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 0 }