diff --git a/Titanic_report.pdf b/Titanic_report.pdf new file mode 100644 index 000000000..671a20894 Binary files /dev/null and b/Titanic_report.pdf differ diff --git a/titanic - SVM.ipynb b/titanic - SVM.ipynb new file mode 100644 index 000000000..efc68101b --- /dev/null +++ b/titanic - SVM.ipynb @@ -0,0 +1,7964 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "_cell_guid": "9e319230-1cc2-4b19-ad66-06a07e07fc60", + "_execution_state": "idle", + "_uuid": "03de849fb872e11b3db2a2c99563783ce6ba784f" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(891, 12) (418, 11) ['PassengerId' 'Survived' 'Pclass' 'Name' 'Sex' 'Age' 'SibSp' 'Parch'\n", + " 'Ticket' 'Fare' 'Cabin' 'Embarked']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\HP-PC\\Anaconda3\\lib\\site-packages\\pandas\\core\\frame.py:6692: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n", + "of pandas will change to not sort by default.\n", + "\n", + "To accept the future behavior, pass 'sort=False'.\n", + "\n", + "To retain the current behavior and silence the warning, pass 'sort=True'.\n", + "\n", + " sort=sort)\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "%matplotlib inline\n", + "\n", + "# ignore Deprecation Warning\n", + "import warnings\n", + "warnings.filterwarnings(\"ignore\", category=DeprecationWarning) \n", + "\n", + "# Random Forest Regressor\n", + "from sklearn.ensemble import RandomForestRegressor\n", + "\n", + "# Neural Network\n", + "import keras \n", + "from keras.models import Sequential \n", + "from keras.layers import Dense\n", + "\n", + "# load the data\n", + "df_train = pd.read_csv('data/train.csv')\n", + "df_test = pd.read_csv('data/test.csv')\n", + "df = df_train.append(df_test , ignore_index = True)\n", + "\n", + "# some quick inspections\n", + "print(df_train.shape, df_test.shape, df_train.columns.values)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
44244303Petterson, Mr. Johan Emilmale25.0103470767.7750NaNS
54354412Beane, Mr. Edwardmale32.010290826.0000NaNS
54754812Padro y Manent, Mr. JulianmaleNaN00SC/PARIS 214613.8625NaNC
79279303Sage, Miss. Stella AnnafemaleNaN82CA. 234369.5500NaNS
12712813Madsen, Mr. Fridtjof Arnemale24.000C 173697.1417NaNS
\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass Name Sex Age \\\n", + "442 443 0 3 Petterson, Mr. Johan Emil male 25.0 \n", + "543 544 1 2 Beane, Mr. Edward male 32.0 \n", + "547 548 1 2 Padro y Manent, Mr. Julian male NaN \n", + "792 793 0 3 Sage, Miss. Stella Anna female NaN \n", + "127 128 1 3 Madsen, Mr. Fridtjof Arne male 24.0 \n", + "\n", + " SibSp Parch Ticket Fare Cabin Embarked \n", + "442 1 0 347076 7.7750 NaN S \n", + "543 1 0 2908 26.0000 NaN S \n", + "547 0 0 SC/PARIS 2146 13.8625 NaN C \n", + "792 8 2 CA. 2343 69.5500 NaN S \n", + "127 0 0 C 17369 7.1417 NaN S " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_train.sample(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Pclass" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "_cell_guid": "8ac53d3d-bf63-4b0a-b4dd-cf69ebb1b189", + "_execution_state": "idle", + "_uuid": "7a92a95fd0642b65500aa0fe2d5453f8f9cff24c" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# check if there is any NAN\n", + "df['Pclass'].isnull().sum(axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "_cell_guid": "366c466d-d5f1-41f2-9568-3abc426bfc75", + "_execution_state": "idle", + "_uuid": "d380019bd6179b6ddcde472ce10771788ae204f4" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PclassSurvived
010.629630
120.472826
230.242363
\n", + "
" + ], + "text/plain": [ + " Pclass Survived\n", + "0 1 0.629630\n", + "1 2 0.472826\n", + "2 3 0.242363" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the correlation between Pclass and Survived\n", + "df[['Pclass', 'Survived']].groupby(['Pclass'], as_index=False).mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "cf76f8ac-cc51-4ffb-8066-b605c5aa971c", + "_execution_state": "idle", + "_uuid": "617016b19a60d3ae9b2b1dbfefb73f4fb56e13a9" + }, + "source": [ + "### Name" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "_cell_guid": "fa11d6a2-ad3d-443b-8579-de525fdd20c1", + "_execution_state": "idle", + "_uuid": "a40e18c8e9fe2692dec94722cc6ab316d71b5003" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 Braund, Mr. Owen Harris\n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th...\n", + "2 Heikkinen, Miss. Laina\n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel)\n", + "4 Allen, Mr. William Henry\n", + "5 Moran, Mr. James\n", + "6 McCarthy, Mr. Timothy J\n", + "7 Palsson, Master. Gosta Leonard\n", + "8 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)\n", + "9 Nasser, Mrs. Nicholas (Adele Achem)\n", + "Name: Name, dtype: object" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Name.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "_cell_guid": "bb1fc2fe-1947-4f89-99fa-d621711615fc", + "_execution_state": "idle", + "_uuid": "93e6890cecf2d3ae919ec30b24588d780092720d" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Mr 757\n", + "Miss 260\n", + "Mrs 197\n", + "Master 61\n", + "Rev 8\n", + "Dr 8\n", + "Col 4\n", + "Mlle 2\n", + "Ms 2\n", + "Major 2\n", + "Jonkheer 1\n", + "Lady 1\n", + "Capt 1\n", + "Dona 1\n", + "Sir 1\n", + "Don 1\n", + "the Countess 1\n", + "Mme 1\n", + "Name: Title, dtype: int64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Title'] = df.Name.map( lambda x: x.split(',')[1].split( '.' )[0].strip())\n", + "\n", + "# inspect the amount of people for each title\n", + "df['Title'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "_cell_guid": "67b67929-d7c3-4d52-9cde-eb37b4ef256d", + "_execution_state": "idle", + "_uuid": "a7840964bab69268bcc0e2516770f7faf00aad1d" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\HP-PC\\Anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py:190: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + " self._setitem_with_indexer(indexer, value)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TitleSurvived
0Master0.575000
1Miss0.701087
2Mr0.156673
3Mrs0.796875
4Others0.318182
\n", + "
" + ], + "text/plain": [ + " Title Survived\n", + "0 Master 0.575000\n", + "1 Miss 0.701087\n", + "2 Mr 0.156673\n", + "3 Mrs 0.796875\n", + "4 Others 0.318182" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Title'] = df['Title'].replace('Mlle', 'Miss')\n", + "df['Title'] = df['Title'].replace(['Mme','Lady','Ms'], 'Mrs')\n", + "df.Title.loc[ (df.Title != 'Master') & (df.Title != 'Mr') & (df.Title != 'Miss') \n", + " & (df.Title != 'Mrs')] = 'Others'\n", + "\n", + "# inspect the correlation between Title and Survived\n", + "df[['Title', 'Survived']].groupby(['Title'], as_index=False).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "_cell_guid": "18a55b3d-677f-4611-a41e-be3f9ff49c67", + "_execution_state": "idle", + "_uuid": "77a517250b3a9b32da2436b8522e864e4e3e7016" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Mr 757\n", + "Miss 262\n", + "Mrs 201\n", + "Master 61\n", + "Others 28\n", + "Name: Title, dtype: int64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the amount of people for each title\n", + "df['Title'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "_cell_guid": "516ceeba-2dab-4fff-8f5d-4fe94f4eddc8", + "_execution_state": "idle", + "_uuid": "3f031015c4e518d5b0b1874bac37a0d899eb2a60" + }, + "outputs": [], + "source": [ + "df = pd.concat([df, pd.get_dummies(df['Title'])], axis=1).drop(labels=['Name'], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AgeCabinEmbarkedFareParchPassengerIdPclassSexSibSpSurvivedTicketTitleMasterMissMrMrsOthers
022.0NaNS7.2500013male10.0A/5 21171Mr00100
138.0C85C71.2833021female11.0PC 17599Mrs00010
226.0NaNS7.9250033female01.0STON/O2. 3101282Miss01000
335.0C123S53.1000041female11.0113803Mrs00010
435.0NaNS8.0500053male00.0373450Mr00100
5NaNNaNQ8.4583063male00.0330877Mr00100
654.0E46S51.8625071male00.017463Mr00100
72.0NaNS21.0750183male30.0349909Master10000
827.0NaNS11.1333293female01.0347742Mrs00010
914.0NaNC30.07080102female11.0237736Mrs00010
104.0G6S16.70001113female11.0PP 9549Miss01000
1158.0C103S26.55000121female01.0113783Miss01000
1220.0NaNS8.05000133male00.0A/5. 2151Mr00100
1339.0NaNS31.27505143male10.0347082Mr00100
1414.0NaNS7.85420153female00.0350406Miss01000
1555.0NaNS16.00000162female01.0248706Mrs00010
162.0NaNQ29.12501173male40.0382652Master10000
17NaNNaNS13.00000182male01.0244373Mr00100
1831.0NaNS18.00000193female10.0345763Mrs00010
19NaNNaNC7.22500203female01.02649Mrs00010
2035.0NaNS26.00000212male00.0239865Mr00100
2134.0D56S13.00000222male01.0248698Mr00100
2215.0NaNQ8.02920233female01.0330923Miss01000
2328.0A6S35.50000241male01.0113788Mr00100
248.0NaNS21.07501253female30.0349909Miss01000
2538.0NaNS31.38755263female11.0347077Mrs00010
26NaNNaNC7.22500273male00.02631Mr00100
2719.0C23 C25 C27S263.00002281male30.019950Mr00100
28NaNNaNQ7.87920293female01.0330959Miss01000
29NaNNaNS7.89580303male00.0349216Mr00100
......................................................
127921.0NaNQ7.7500012803male0NaN364858Mr00100
12806.0NaNS21.0750112813male3NaN349909Master10000
128123.0B24S93.5000012821male0NaN12749Mr00100
128251.0D28S39.4000112831female0NaNPC 17592Mrs00010
128313.0NaNS20.2500212843male0NaNC.A. 2673Master10000
128447.0NaNS10.5000012852male0NaNC.A. 30769Mr00100
128529.0NaNS22.0250112863male3NaN315153Mr00100
128618.0C31S60.0000012871female1NaN13695Mrs00010
128724.0NaNQ7.2500012883male0NaN371109Mr00100
128848.0B41C79.2000112891female1NaN13567Mrs00010
128922.0NaNS7.7750012903male0NaN347065Mr00100
129031.0NaNQ7.7333012913male0NaN21332Mr00100
129130.0C7S164.8667012921female0NaN36928Miss01000
129238.0NaNS21.0000012932male1NaN28664Mr00100
129322.0NaNC59.4000112941female0NaN112378Miss01000
129417.0NaNS47.1000012951male0NaN113059Mr00100
129543.0D40C27.7208012961male1NaN17765Mr00100
129620.0D38C13.8625012972male0NaNSC/PARIS 2166Mr00100
129723.0NaNS10.5000012982male1NaN28666Mr00100
129850.0C80C211.5000112991male1NaN113503Mr00100
1299NaNNaNQ7.7208013003female0NaN334915Miss01000
13003.0NaNS13.7750113013female1NaNSOTON/O.Q. 3101315Miss01000
1301NaNNaNQ7.7500013023female0NaN365237Miss01000
130237.0C78Q90.0000013031female1NaN19928Mrs00010
130328.0NaNS7.7750013043female0NaN347086Miss01000
1304NaNNaNS8.0500013053male0NaNA.5. 3236Mr00100
130539.0C105C108.9000013061female0NaNPC 17758Others00001
130638.5NaNS7.2500013073male0NaNSOTON/O.Q. 3101262Mr00100
1307NaNNaNS8.0500013083male0NaN359309Mr00100
1308NaNNaNC22.3583113093male1NaN2668Master10000
\n", + "

1309 rows × 17 columns

\n", + "
" + ], + "text/plain": [ + " Age Cabin Embarked Fare Parch PassengerId Pclass \\\n", + "0 22.0 NaN S 7.2500 0 1 3 \n", + "1 38.0 C85 C 71.2833 0 2 1 \n", + "2 26.0 NaN S 7.9250 0 3 3 \n", + "3 35.0 C123 S 53.1000 0 4 1 \n", + "4 35.0 NaN S 8.0500 0 5 3 \n", + "5 NaN NaN Q 8.4583 0 6 3 \n", + "6 54.0 E46 S 51.8625 0 7 1 \n", + "7 2.0 NaN S 21.0750 1 8 3 \n", + "8 27.0 NaN S 11.1333 2 9 3 \n", + "9 14.0 NaN C 30.0708 0 10 2 \n", + "10 4.0 G6 S 16.7000 1 11 3 \n", + "11 58.0 C103 S 26.5500 0 12 1 \n", + "12 20.0 NaN S 8.0500 0 13 3 \n", + "13 39.0 NaN S 31.2750 5 14 3 \n", + "14 14.0 NaN S 7.8542 0 15 3 \n", + "15 55.0 NaN S 16.0000 0 16 2 \n", + "16 2.0 NaN Q 29.1250 1 17 3 \n", + "17 NaN NaN S 13.0000 0 18 2 \n", + "18 31.0 NaN S 18.0000 0 19 3 \n", + "19 NaN NaN C 7.2250 0 20 3 \n", + "20 35.0 NaN S 26.0000 0 21 2 \n", + "21 34.0 D56 S 13.0000 0 22 2 \n", + "22 15.0 NaN Q 8.0292 0 23 3 \n", + "23 28.0 A6 S 35.5000 0 24 1 \n", + "24 8.0 NaN S 21.0750 1 25 3 \n", + "25 38.0 NaN S 31.3875 5 26 3 \n", + "26 NaN NaN C 7.2250 0 27 3 \n", + "27 19.0 C23 C25 C27 S 263.0000 2 28 1 \n", + "28 NaN NaN Q 7.8792 0 29 3 \n", + "29 NaN NaN S 7.8958 0 30 3 \n", + "... ... ... ... ... ... ... ... \n", + "1279 21.0 NaN Q 7.7500 0 1280 3 \n", + "1280 6.0 NaN S 21.0750 1 1281 3 \n", + "1281 23.0 B24 S 93.5000 0 1282 1 \n", + "1282 51.0 D28 S 39.4000 1 1283 1 \n", + "1283 13.0 NaN S 20.2500 2 1284 3 \n", + "1284 47.0 NaN S 10.5000 0 1285 2 \n", + "1285 29.0 NaN S 22.0250 1 1286 3 \n", + "1286 18.0 C31 S 60.0000 0 1287 1 \n", + "1287 24.0 NaN Q 7.2500 0 1288 3 \n", + "1288 48.0 B41 C 79.2000 1 1289 1 \n", + "1289 22.0 NaN S 7.7750 0 1290 3 \n", + "1290 31.0 NaN Q 7.7333 0 1291 3 \n", + "1291 30.0 C7 S 164.8667 0 1292 1 \n", + "1292 38.0 NaN S 21.0000 0 1293 2 \n", + "1293 22.0 NaN C 59.4000 1 1294 1 \n", + "1294 17.0 NaN S 47.1000 0 1295 1 \n", + "1295 43.0 D40 C 27.7208 0 1296 1 \n", + "1296 20.0 D38 C 13.8625 0 1297 2 \n", + "1297 23.0 NaN S 10.5000 0 1298 2 \n", + "1298 50.0 C80 C 211.5000 1 1299 1 \n", + "1299 NaN NaN Q 7.7208 0 1300 3 \n", + "1300 3.0 NaN S 13.7750 1 1301 3 \n", + "1301 NaN NaN Q 7.7500 0 1302 3 \n", + "1302 37.0 C78 Q 90.0000 0 1303 1 \n", + "1303 28.0 NaN S 7.7750 0 1304 3 \n", + "1304 NaN NaN S 8.0500 0 1305 3 \n", + "1305 39.0 C105 C 108.9000 0 1306 1 \n", + "1306 38.5 NaN S 7.2500 0 1307 3 \n", + "1307 NaN NaN S 8.0500 0 1308 3 \n", + "1308 NaN NaN C 22.3583 1 1309 3 \n", + "\n", + " Sex SibSp Survived Ticket Title Master Miss Mr \\\n", + "0 male 1 0.0 A/5 21171 Mr 0 0 1 \n", + "1 female 1 1.0 PC 17599 Mrs 0 0 0 \n", + "2 female 0 1.0 STON/O2. 3101282 Miss 0 1 0 \n", + "3 female 1 1.0 113803 Mrs 0 0 0 \n", + "4 male 0 0.0 373450 Mr 0 0 1 \n", + "5 male 0 0.0 330877 Mr 0 0 1 \n", + "6 male 0 0.0 17463 Mr 0 0 1 \n", + "7 male 3 0.0 349909 Master 1 0 0 \n", + "8 female 0 1.0 347742 Mrs 0 0 0 \n", + "9 female 1 1.0 237736 Mrs 0 0 0 \n", + "10 female 1 1.0 PP 9549 Miss 0 1 0 \n", + "11 female 0 1.0 113783 Miss 0 1 0 \n", + "12 male 0 0.0 A/5. 2151 Mr 0 0 1 \n", + "13 male 1 0.0 347082 Mr 0 0 1 \n", + "14 female 0 0.0 350406 Miss 0 1 0 \n", + "15 female 0 1.0 248706 Mrs 0 0 0 \n", + "16 male 4 0.0 382652 Master 1 0 0 \n", + "17 male 0 1.0 244373 Mr 0 0 1 \n", + "18 female 1 0.0 345763 Mrs 0 0 0 \n", + "19 female 0 1.0 2649 Mrs 0 0 0 \n", + "20 male 0 0.0 239865 Mr 0 0 1 \n", + "21 male 0 1.0 248698 Mr 0 0 1 \n", + "22 female 0 1.0 330923 Miss 0 1 0 \n", + "23 male 0 1.0 113788 Mr 0 0 1 \n", + "24 female 3 0.0 349909 Miss 0 1 0 \n", + "25 female 1 1.0 347077 Mrs 0 0 0 \n", + "26 male 0 0.0 2631 Mr 0 0 1 \n", + "27 male 3 0.0 19950 Mr 0 0 1 \n", + "28 female 0 1.0 330959 Miss 0 1 0 \n", + "29 male 0 0.0 349216 Mr 0 0 1 \n", + "... ... ... ... ... ... ... ... .. \n", + "1279 male 0 NaN 364858 Mr 0 0 1 \n", + "1280 male 3 NaN 349909 Master 1 0 0 \n", + "1281 male 0 NaN 12749 Mr 0 0 1 \n", + "1282 female 0 NaN PC 17592 Mrs 0 0 0 \n", + "1283 male 0 NaN C.A. 2673 Master 1 0 0 \n", + "1284 male 0 NaN C.A. 30769 Mr 0 0 1 \n", + "1285 male 3 NaN 315153 Mr 0 0 1 \n", + "1286 female 1 NaN 13695 Mrs 0 0 0 \n", + "1287 male 0 NaN 371109 Mr 0 0 1 \n", + "1288 female 1 NaN 13567 Mrs 0 0 0 \n", + "1289 male 0 NaN 347065 Mr 0 0 1 \n", + "1290 male 0 NaN 21332 Mr 0 0 1 \n", + "1291 female 0 NaN 36928 Miss 0 1 0 \n", + "1292 male 1 NaN 28664 Mr 0 0 1 \n", + "1293 female 0 NaN 112378 Miss 0 1 0 \n", + "1294 male 0 NaN 113059 Mr 0 0 1 \n", + "1295 male 1 NaN 17765 Mr 0 0 1 \n", + "1296 male 0 NaN SC/PARIS 2166 Mr 0 0 1 \n", + "1297 male 1 NaN 28666 Mr 0 0 1 \n", + "1298 male 1 NaN 113503 Mr 0 0 1 \n", + "1299 female 0 NaN 334915 Miss 0 1 0 \n", + "1300 female 1 NaN SOTON/O.Q. 3101315 Miss 0 1 0 \n", + "1301 female 0 NaN 365237 Miss 0 1 0 \n", + "1302 female 1 NaN 19928 Mrs 0 0 0 \n", + "1303 female 0 NaN 347086 Miss 0 1 0 \n", + "1304 male 0 NaN A.5. 3236 Mr 0 0 1 \n", + "1305 female 0 NaN PC 17758 Others 0 0 0 \n", + "1306 male 0 NaN SOTON/O.Q. 3101262 Mr 0 0 1 \n", + "1307 male 0 NaN 359309 Mr 0 0 1 \n", + "1308 male 1 NaN 2668 Master 1 0 0 \n", + "\n", + " Mrs Others \n", + "0 0 0 \n", + "1 1 0 \n", + "2 0 0 \n", + "3 1 0 \n", + "4 0 0 \n", + "5 0 0 \n", + "6 0 0 \n", + "7 0 0 \n", + "8 1 0 \n", + "9 1 0 \n", + "10 0 0 \n", + "11 0 0 \n", + "12 0 0 \n", + "13 0 0 \n", + "14 0 0 \n", + "15 1 0 \n", + "16 0 0 \n", + "17 0 0 \n", + "18 1 0 \n", + "19 1 0 \n", + "20 0 0 \n", + "21 0 0 \n", + "22 0 0 \n", + "23 0 0 \n", + "24 0 0 \n", + "25 1 0 \n", + "26 0 0 \n", + "27 0 0 \n", + "28 0 0 \n", + "29 0 0 \n", + "... ... ... \n", + "1279 0 0 \n", + "1280 0 0 \n", + "1281 0 0 \n", + "1282 1 0 \n", + "1283 0 0 \n", + "1284 0 0 \n", + "1285 0 0 \n", + "1286 1 0 \n", + "1287 0 0 \n", + "1288 1 0 \n", + "1289 0 0 \n", + "1290 0 0 \n", + "1291 0 0 \n", + "1292 0 0 \n", + "1293 0 0 \n", + "1294 0 0 \n", + "1295 0 0 \n", + "1296 0 0 \n", + "1297 0 0 \n", + "1298 0 0 \n", + "1299 0 0 \n", + "1300 0 0 \n", + "1301 0 0 \n", + "1302 1 0 \n", + "1303 0 0 \n", + "1304 0 0 \n", + "1305 0 1 \n", + "1306 0 0 \n", + "1307 0 0 \n", + "1308 0 0 \n", + "\n", + "[1309 rows x 17 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "58e43a50-20cc-40d4-bae8-ab455cd52887", + "_execution_state": "idle", + "_uuid": "60486db5b7e05da01f617a8555289670e1ab9c3a" + }, + "source": [ + "### Sex" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "_cell_guid": "79870c7d-e04f-4269-bf4d-ff314bfc1e96", + "_execution_state": "idle", + "_uuid": "c8262045e243302a4d4e84426c4b8c2932a0702b" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# check if there is any NAN\n", + "df.Sex.isnull().sum(axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "_cell_guid": "fbf9e34d-01cd-4004-a509-cbc94b66edbc", + "_execution_state": "idle", + "_uuid": "ff1e3b6673013d9ab3d16f113c3bc57520572d4a" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SexSurvived
0female0.742038
1male0.188908
\n", + "
" + ], + "text/plain": [ + " Sex Survived\n", + "0 female 0.742038\n", + "1 male 0.188908" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the correlation between Sex and Survived\n", + "df[['Sex', 'Survived']].groupby(['Sex'], as_index=False).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "_cell_guid": "b083a394-d2d4-4449-b745-77854545c825", + "_execution_state": "idle", + "_uuid": "4d5e24fded5c0296f278a2b8100c896c88050635" + }, + "outputs": [], + "source": [ + "# map the two genders to 0 and 1\n", + "df.Sex = df.Sex.map({'male':0, 'female':1})" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "a2e1256c-73d3-4f73-b51c-eb7aaca1790a", + "_execution_state": "idle", + "_uuid": "025f3222286d5591c0600ef07ed6ed0c8f1a3309" + }, + "source": [ + "### Age" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "_cell_guid": "8ba1f221-c912-4eac-a944-6b0afa88a382", + "_execution_state": "idle", + "_uuid": "30bc1b17267fb3eb668f4d383b649ecca05051a3" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "263" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# check if there is any NAN\n", + "df.Age.isnull().sum(axis=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "441e1d2e-1b66-4482-ba21-987ac1d96b34", + "_execution_state": "idle", + "_uuid": "b812f4eb80b8b2d4116dafbc70991f6c9a6d8c1e" + }, + "source": [ + "### SibSp and Parch" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "_cell_guid": "a4e895a6-47cf-4641-8228-14ea595a9ea2", + "_execution_state": "idle", + "_uuid": "a6cc2c00d763ac3f55f349f21d5679dfd9b5359b" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(0, 0)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# check if there is any NAN\n", + "df.SibSp.isnull().sum(axis=0), df.Parch.isnull().sum(axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "_cell_guid": "cca00b7a-da36-4903-a025-ef4c50dab61e", + "_execution_state": "idle", + "_uuid": "03437dedc00ad11570635e56ae79faee105bd808" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FamilySurvived
010.303538
120.552795
230.578431
340.724138
450.200000
560.136364
670.333333
780.000000
8110.000000
\n", + "
" + ], + "text/plain": [ + " Family Survived\n", + "0 1 0.303538\n", + "1 2 0.552795\n", + "2 3 0.578431\n", + "3 4 0.724138\n", + "4 5 0.200000\n", + "5 6 0.136364\n", + "6 7 0.333333\n", + "7 8 0.000000\n", + "8 11 0.000000" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# create a new feature \"Family\"\n", + "df['Family'] = df['SibSp'] + df['Parch'] + 1\n", + "\n", + "# inspect the correlation between Family and Survived\n", + "df[['Family', 'Survived']].groupby(['Family'], as_index=False).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "_cell_guid": "5217e017-3029-4fbe-afc6-3315d5937431", + "_execution_state": "idle", + "_uuid": "5099b2a2783647bce53f71acb07af846641b3c87" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1 790\n", + "2 235\n", + "3 159\n", + "4 43\n", + "6 25\n", + "5 22\n", + "7 16\n", + "11 11\n", + "8 8\n", + "Name: Family, dtype: int64" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the amount of people for each Family size\n", + "df['Family'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "_cell_guid": "88730abd-facd-4842-ba22-6fab5e2ec332", + "_execution_state": "idle", + "_uuid": "2df0bad861e11098213fcbb90a7787504c793a9b" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FamilySurvived
000.161290
110.303538
220.552795
330.578431
440.724138
\n", + "
" + ], + "text/plain": [ + " Family Survived\n", + "0 0 0.161290\n", + "1 1 0.303538\n", + "2 2 0.552795\n", + "3 3 0.578431\n", + "4 4 0.724138" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Family = df.Family.map(lambda x: 0 if x > 4 else x)\n", + "df[['Family', 'Survived']].groupby(['Family'], as_index=False).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "_cell_guid": "d26de4ea-d9c9-460d-8a2f-f3e388de9b83", + "_execution_state": "idle", + "_uuid": "b4055838fa086398aa33e6804cfc2ec871d72fc3" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1 790\n", + "2 235\n", + "3 159\n", + "0 82\n", + "4 43\n", + "Name: Family, dtype: int64" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Family'].value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "380e18ed-ce81-4a4c-ac47-bc8ac4db8122", + "_execution_state": "idle", + "_uuid": "9b02b9181f1224830ad4a93cb0477aa278ec4d38" + }, + "source": [ + "### Ticket" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "_cell_guid": "3e355a5b-65dc-4072-9cf8-2aa9e2c5c311", + "_execution_state": "idle", + "_uuid": "09ad80a90051b181a3ffd1dd7deb7671dbce32b8" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# check if there is any NAN\n", + "df.Ticket.isnull().sum(axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "_cell_guid": "dce618df-3f32-45a2-814b-ef392c2248b3", + "_execution_state": "idle", + "_uuid": "c8f76441795cf10e50a38c538da303a488741d08" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 A/5 21171\n", + "1 PC 17599\n", + "2 STON/O2. 3101282\n", + "3 113803\n", + "4 373450\n", + "5 330877\n", + "6 17463\n", + "7 349909\n", + "8 347742\n", + "9 237736\n", + "10 PP 9549\n", + "11 113783\n", + "12 A/5. 2151\n", + "13 347082\n", + "14 350406\n", + "15 248706\n", + "16 382652\n", + "17 244373\n", + "18 345763\n", + "19 2649\n", + "Name: Ticket, dtype: object" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Ticket.head(20)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "_cell_guid": "78913d9b-373b-41ad-bb86-2c4e3b18e183", + "_execution_state": "idle", + "_uuid": "4200860d04ec1319a5fa456a11ffe1f7138b7eb8" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TicketSurvived
010.630137
120.464481
230.239203
340.200000
450.000000
560.166667
670.111111
780.000000
891.000000
9A0.068966
10C0.340426
11F0.571429
12L0.250000
13P0.646154
14S0.323077
15W0.153846
\n", + "
" + ], + "text/plain": [ + " Ticket Survived\n", + "0 1 0.630137\n", + "1 2 0.464481\n", + "2 3 0.239203\n", + "3 4 0.200000\n", + "4 5 0.000000\n", + "5 6 0.166667\n", + "6 7 0.111111\n", + "7 8 0.000000\n", + "8 9 1.000000\n", + "9 A 0.068966\n", + "10 C 0.340426\n", + "11 F 0.571429\n", + "12 L 0.250000\n", + "13 P 0.646154\n", + "14 S 0.323077\n", + "15 W 0.153846" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Ticket = df.Ticket.map(lambda x: x[0])\n", + "\n", + "# inspect the correlation between Ticket and Survived\n", + "df[['Ticket', 'Survived']].groupby(['Ticket'], as_index=False).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "_cell_guid": "efd9bf4b-03bb-4f87-b709-f9662a1b2e51", + "_execution_state": "idle", + "_uuid": "e0a4426fc762e668c82b46fe9bd41fdb92786fa4" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "3 429\n", + "2 278\n", + "1 210\n", + "P 98\n", + "S 98\n", + "C 77\n", + "A 42\n", + "W 19\n", + "7 13\n", + "F 13\n", + "4 11\n", + "6 9\n", + "L 5\n", + "5 3\n", + "9 2\n", + "8 2\n", + "Name: Ticket, dtype: int64" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the amount of people for each type of tickets\n", + "df['Ticket'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "_cell_guid": "7d2ecc98-5a2a-4066-87ca-1fe5ca6594d2", + "_execution_state": "idle", + "_uuid": "7c911250fe2a3ab1ed23f97acce1cdb855cbc181" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TicketFare
0165.771211
1220.235194
2315.380682
3416.764036
4519.262500
5619.153711
679.197438
788.397900
897.750000
9A10.189681
10C28.152273
11F24.677246
12L1.515000
13P119.698253
14S17.542900
15W31.056579
\n", + "
" + ], + "text/plain": [ + " Ticket Fare\n", + "0 1 65.771211\n", + "1 2 20.235194\n", + "2 3 15.380682\n", + "3 4 16.764036\n", + "4 5 19.262500\n", + "5 6 19.153711\n", + "6 7 9.197438\n", + "7 8 8.397900\n", + "8 9 7.750000\n", + "9 A 10.189681\n", + "10 C 28.152273\n", + "11 F 24.677246\n", + "12 L 1.515000\n", + "13 P 119.698253\n", + "14 S 17.542900\n", + "15 W 31.056579" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[['Ticket', 'Fare']].groupby(['Ticket'], as_index=False).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "_cell_guid": "21e8a215-01a0-45d0-bc41-20f4a25aa509", + "_execution_state": "idle", + "_uuid": "d809da11e565b2524707b4086121c45f880a5554" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TicketPclass
011.147619
122.309353
232.904429
343.000000
452.333333
562.111111
673.000000
783.000000
893.000000
9A3.000000
10C2.558442
11F1.846154
12L3.000000
13P1.102041
14S2.561224
15W2.315789
\n", + "
" + ], + "text/plain": [ + " Ticket Pclass\n", + "0 1 1.147619\n", + "1 2 2.309353\n", + "2 3 2.904429\n", + "3 4 3.000000\n", + "4 5 2.333333\n", + "5 6 2.111111\n", + "6 7 3.000000\n", + "7 8 3.000000\n", + "8 9 3.000000\n", + "9 A 3.000000\n", + "10 C 2.558442\n", + "11 F 1.846154\n", + "12 L 3.000000\n", + "13 P 1.102041\n", + "14 S 2.561224\n", + "15 W 2.315789" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[['Ticket', 'Pclass']].groupby(['Ticket'], as_index=False).mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "4eab8333-25fd-46d1-a307-ac60f4c3f277", + "_execution_state": "idle", + "_uuid": "08aba02f0f1b6560033ddfcde7796ea2bdb5da7b" + }, + "source": [ + "### Fare" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "_cell_guid": "c245d17f-4245-4afa-a779-9256ce72057b", + "_execution_state": "idle", + "_uuid": "2f3a3c7dd4140e492f7b56ea21a25b0e54b3af8c" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# check if there is any NAN\n", + "df.Fare.isnull().sum(axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "_cell_guid": "fcedc1d2-1fc9-4d15-bda6-4e0c7a8c1b79", + "_execution_state": "idle", + "_uuid": "279a4049e39c1350bbfb47815f4dc8bd64d84d76" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1043 3\n", + "Name: Ticket, dtype: object" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Ticket[df.Fare.isnull()]" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "_cell_guid": "4809801d-6cae-47d3-bd02-9a9c615f99c6", + "_execution_state": "idle", + "_uuid": "056b56f133204ca80786d24925efbc93c3b7f399" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1043 3\n", + "Name: Pclass, dtype: int64" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Pclass[df.Fare.isnull()]" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "_cell_guid": "9a90fe36-97a8-4378-8a1b-a11324d429e5", + "_execution_state": "idle", + "_uuid": "7ca9047deed61252837c0c85afa81ffcac8766c1" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1043 NaN\n", + "Name: Cabin, dtype: object" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Cabin[df.Fare.isnull()]" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "_cell_guid": "2d51f02f-bcb3-424d-a18b-a251e63aacc9", + "_execution_state": "idle", + "_uuid": "481f6e7941608bff1133d8adf1354229b1a115fc" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1043 S\n", + "Name: Embarked, dtype: object" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Embarked[df.Fare.isnull()]" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "_cell_guid": "6f9d5841-adf8-4c9c-9c59-39715827a0ad", + "_execution_state": "idle", + "_uuid": "0835a6f504f77c8d42245b5a9346e169bdc302a3" + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEKCAYAAAAIO8L1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAFgJJREFUeJzt3X+w3XV95/HnKwEEid2IRsMGqNZkq+isqHeQ6kyGameLbkd+bGVgKKbd1FAGBrPDzm50tI0rbN1uETbVMoSBNWYoFgupGcbRFSqlrhVM5If8qMutVbxCIYg/SAkoyXv/uN+r94Yvyb14v/d7bs7zMXPmfD+f8znf8z73QF7n8/11UlVIkrS3BX0XIEkaTAaEJKmVASFJamVASJJaGRCSpFYGhCSpVWcBkeTQJLcnuSvJvUk+3PS/MsltSR5I8pdJDmn6X9C0R5vHX9FVbZKk/etyBvE08Laqej1wHHBSkhOA/wFcWlUrgB8Aq5vxq4EfVNVy4NJmnCSpJ50FRI3b2TQPbm4FvA34q6Z/E3BKs3xy06Z5/O1J0lV9kqR9O6jLlSdZCGwHlgOfAP4R+GFVPdMMGQOWNcvLgO8CVNUzSX4EvAR4bK91rgHWABx++OFvevWrX93lW5CkA8727dsfq6ol+xvXaUBU1W7guCSLgS3Aa9qGNfdts4VnXQekqjYCGwFGRkZq27Zts1StJA2HJN+Zzrg5OYqpqn4I3AKcACxOMhFMRwEPNctjwNEAzeP/Cnh8LuqTJD1bl0cxLWlmDiQ5DPgN4H7gS8BvN8NWAZ9tlrc2bZrH/6a8kqAk9abLTUxHApua/RALgOuq6sYk9wGfTnIRcAdwVTP+KmBzklHGZw5ndFibJGk/OguIqrobeENL/7eA41v6nwLe3VU9kqSZ8UxqSVIrA0KS1MqAkCS1MiAkSa0MCElSKwNCktTKgJAktTIgJEmtDAhJUisDQpLUyoCQJLUyICRJrQwISVIrA0KS1MqAkCS1MiAkSa0MCElSKwNiQG3evJmVK1dy7bXX9l2KpCFlQAyoK6+8EoDLL7+850okDSsDYgBt3rx5SttZhKQ+GBADaGL2MMFZhKQ+GBCSpFYGhCSplQEhSWplQAygs846a0p71apVPVUiaZgZEAPonHPOmdJevXp1T5VIGmYGxICamEU4e5DUl1RVNytOjgY+BSwF9gAbq+p/JVkPvBfY0Qz9QFV9rnnO+4HVwG7ggqr6wr5eY2RkpLZt29ZJ/ZJ0oEqyvapG9jeuyxnEM8CFVfUa4ATgvCTHNo9dWlXHNbeJcDgWOAN4LXAS8OdJFnZY30C7/fbbOfHEE9m+fXvfpUgaUp0FRFU9XFVfb5afAO4Hlu3jKScDn66qp6vqn4BR4Piu6ht069evZ8+ePXzoQx/quxRJQ2pO9kEkeQXwBuC2puv8JHcnuTrJi5u+ZcB3Jz1tjH0HygHr9ttvZ+fOnQDs3LnTWYSkXnQeEEkWAdcDa6vqx8DlwKuA44CHgUsmhrY8/Vk7SJKsSbItybYdO3a0PGX+W79+/ZS2swhJfeg0IJIczHg4XFNVNwBU1SNVtbuq9gBX8vPNSGPA0ZOefhTw0N7rrKqNVTVSVSNLlizpsvzeTMwenqstSXOhs4BIEuAq4P6q+tik/iMnDTsVuKdZ3gqckeQFSV4JrABu76q+QbZo0aJ9tiVpLnQ5g3grcDbwtiR3Nrd3An+S5BtJ7gZ+HfhPAFV1L3AdcB/weeC8qtrdYX0Da+9NTB/5yEf6KUTSUDuoqxVX1Zdp36/wuX0852Lg4q5qmi+OP/54Fi1axM6dO1m0aBFvetOb+i5J0hDyTOoBtX79ehYsWODsQVJvOptB6Bdz/PHHc8stt/RdhqQh5gxCktTKgJAktTIgJEmtDAhJUisDQpLUyoAYUFu2bGHlypVs3bq171IkDSkDYkBddtllAFxyySX7GSlJ3TAgBtCWLVuY+KW/qnIWIakXBsQAmpg9THAWIakPBsQA2vt3wrv63XBJ2hcDYgCNXyn9uduSNBcMiAG0du3aKe0LL7ywp0okDTMv1vcL2rBhA6Ojo52+xk033cRNN900a+tbvnw5F1xwwaytT9KByRnEgDr44IMBWLZsWc+VSBpWziB+QV19E59Y74YNGzpZvyTtjzMISVIrA0KS1MqAkCS1MiAkSa0MCElSKwNCktTKgJAktTIgJEmtDAhJUisDQpLUyoCQJLXqLCCSHJ3kS0nuT3Jvkvc1/Uck+WKSB5r7Fzf9SbIhyWiSu5O8savaJEn71+UM4hngwqp6DXACcF6SY4F1wM1VtQK4uWkDvANY0dzWAJd3WJskaT86C4iqeriqvt4sPwHcDywDTgY2NcM2Aac0yycDn6pxXwUWJzmyq/okSfs2J/sgkrwCeANwG/DyqnoYxkMEeFkzbBnw3UlPG2v69l7XmiTbkmzbsWNHl2VL0lDrPCCSLAKuB9ZW1Y/3NbSlr57VUbWxqkaqamTJkiWzVaYkaS+dBkSSgxkPh2uq6oam+5GJTUfN/aNN/xhw9KSnHwU81GV9kqTn1uVRTAGuAu6vqo9NemgrsKpZXgV8dlL/e5qjmU4AfjSxKUqSNPe6/MnRtwJnA99IcmfT9wHgo8B1SVYDDwLvbh77HPBOYBR4Evi9DmuTJO1HZwFRVV+mfb8CwNtbxhdwXlf1SJJmxjOpJUmtDAhJUisDQpLUyoCQJLUyICRJrQwISVIrA0KS1MqAkCS1MiAkSa0MCElSKwNCktTKgJAktTIgJEmtDAhJUisDQpLUyoCQJLUyICRJrQwISVIrA0KS1MqAkCS1MiAkSa0MCElSq2kFRMb9TpI/bNrHJDm+29IkSX2a7gziz4FfA85s2k8An+ikIknSQDhomuPeXFVvTHIHQFX9IMkhHdYlSerZdGcQP02yECiAJEuAPZ1VJUnq3XQDYgOwBXhZkouBLwP/vbOqJEm9m1ZAVNU1wH8B/hh4GDilqj6zr+ckuTrJo0numdS3Psn3ktzZ3N456bH3JxlN8s0kv/n83o4kabbsdx9EkgXA3VX1OuAfZrDuTwIfBz61V/+lVfWne73GscAZwGuBfw3clOTfVNXuGbyeJGkW7XcGUVV7gLuSHDOTFVfVrcDj0xx+MvDpqnq6qv4JGAU8jFaSejTdo5iOBO5NcjvwLxOdVfWu5/Ga5yd5D7ANuLCqfgAsA746acxY0/csSdYAawCOOWZGmSVJmoHpBsSHZ+n1Lgc+wvjRUB8BLgH+I5CWsdW2gqraCGwEGBkZaR0jSfrFTSsgqupvZ+PFquqRieUkVwI3Ns0x4OhJQ48CHpqN15QkPT/TvdTGCUm+lmRnkp8k2Z3kxzN9sSRHTmqeCkwc4bQVOCPJC5K8ElgB3D7T9UuSZs90NzF9nPGjjD4DjADvYfwf8eeU5FrgROClScaAPwJOTHIc45uPvg2cA1BV9ya5DrgPeAY4zyOYJKlf0w0Iqmo0ycLmH+7/neQr+xl/Zkv3VfsYfzFw8XTrkSR1a7oB8WRz7aU7k/wJ4yfLHd5dWZKkvk33UhtnN2PPZ/ww16OB/9BVUZKk/u1zBpHkmKp6sKq+03Q9xewd8ipJGmD7m0H89cRCkus7rkWSNED2FxCTT2D7lS4LkSQNlv0FRD3HsiTpALe/o5he35wQF+CwSSfHBaiq+qVOq5Mk9WafAVFVC+eqEEnSYJnuYa6SpCFjQEiSWhkQkqRWBoQkqZUBIUlqZUBIkloZEJKkVgaEJKmVASFJamVASJJaGRCSpFYGhCSplQEhSWplQEiSWhkQkqRWBoQkqdX+flHugLBhwwZGR0f7LmNGHnjgAQAuuOCCniuZmeXLl8+7miW1G4qAGB0d5Y5v3MeeFx7RdynTlp+M/wT49n/8554rmb4FTz7edwmSZtFQBATAnhcewVPH/lbfZRzQDr3vxr5LkDSLOtsHkeTqJI8muWdS3xFJvpjkgeb+xU1/kmxIMprk7iRv7KouSdL0dLmT+pPASXv1rQNurqoVwM1NG+AdwIrmtga4vMO6JEnT0FlAVNWtwN4bpU8GNjXLm4BTJvV/qsZ9FVic5MiuapMk7d9cH+b68qp6GKC5f1nTvwz47qRxY03fsyRZk2Rbkm07duzotFhJGmaDch5EWvqqbWBVbayqkaoaWbJkScdlSdLwmuuAeGRi01Fz/2jTPwYcPWncUcBDc1ybJGmSuQ6IrcCqZnkV8NlJ/e9pjmY6AfjRxKYoSVI/OjsPIsm1wInAS5OMAX8EfBS4Lslq4EHg3c3wzwHvBEaBJ4Hf66ouSdL0dBYQVXXmczz09paxBZzXVS2SpJkblJ3UkqQBY0BIkloZEJKkVgaEJKmVASFJamVASJJaGRCSpFYGhCSplQEhSWplQEiSWhkQUgeuuOIKVq5cyVVXXdV3KdLzZkBIHbjmmmsA2LRp035GSoPLgJBm2RVXXDGl7Sxiflm3bh0rV67kgx/8YN+l9M6AkGbZxOxhgrOI+eUrX/kKALfeemvPlfTPgJCkxrp166a0h30WYUBIUmNi9jBh2GcRBoQ0y84666wp7VWrVj3HSGmwGRDSLDvnnHOmtFevXt1TJdIvxoCQZtmWLVumtLdu3dpTJZqpt7zlLVPaK1eu7KmSwWBASLPssssum9K+5JJLeqpEM/XRj350Svuiiy7qqZLBYEBIs6yq9tnWYJuYRQz77AHgoL4LkA40SaaEQpIeq9FM7T2LGGZDERBjY2MsePJHHHrfjX2XckBb8OT3GRt7pu8yerd27VouvfTSn7UvvPDCHqvRTF122WXccMMNnH766Zx//vl9l9MrNzFJs+zUU0/92awhCe9617t6rkgzccMNNwBw3XXX9VxJ/4ZiBnHUUUfxyNMH8dSxv9V3KQe0Q++7kaOOWtp3GQNhYhbh7GF+2fsAg49//ONDPYtwBiF14NRTT+XWW2919jDPTMweJgz7LMKAkCS16mUTU5JvA08Au4FnqmokyRHAXwKvAL4NnF5VP+ijPklSvzOIX6+q46pqpGmvA26uqhXAzU1bkubMaaedNqV9+umn91TJYBikTUwnAxMXzt8EnNJjLZKG0Nq1a6e0h3kHNfQXEAX8nyTbk6xp+l5eVQ8DNPcv66k2SRL9BcRbq+qNwDuA85JM+5z2JGuSbEuybceOHd1VKGnotB3mOsx6CYiqeqi5fxTYAhwPPJLkSIDm/tHneO7GqhqpqpElS5bMVcmShoCHuU415wGR5PAkL5pYBv4dcA+wFZj4ZZVVwGfnujZJ0s/1cZjry4EtzaUIDgL+oqo+n+RrwHVJVgMPAu/uoTZJUmPOA6KqvgW8vqX/+8Db57oeSZpw2mmnTdnMNOyHuQ7FtZikNhs2bGB0dLSTdY+NjQHj1wGbbcuXL+eCCy6Y9fUKDjvssH22h80gnQchHTB27drFrl27+i5DM3TNNddMaW/atOk5Rg4HZxAaWl1+C59Y94YNGzp7DalrziAkSa0MCElqjIyMTGmfcMIJPVUyGAwISWps3759Svu2227rqZLBYEBIUqOq9tkeNgaEJKnV0BzFtODJxzn0vhv7LmPa8tSPAahDf6nnSqZvwZOPA/4mtXSgGIqAWL58ed8lzNgDDzwBwIpXzad/cJfOy7+1pHZDERDz8axTj6OX1LehCAjNb11eEqMrDzzwADC/vpzMt0t4zNV/F7P9N5lPf2cDQgNvdHSU/3fP1zlm0e6+S5m2Q346fvzHU9/+Ws+VTM+DOxf2XYIGkAGheeGYRbv54MjOvss4YF20bVHfJcxYF9/CN2/ezJVXXvmz9rnnnsuZZ545668zX3iYqyQ1zj777CntYQ4HMCAkaYqlS8ePHDz33HN7rqR/bmKSpEmWLl3K0qVLh372AM4gJEnPwYCQJLVyE5MG3tjYGP/yxMJ5eaTNfPGdJxZyePMzqbNtvp3HMh/PYYFuzq8wIDQvPL07fOeJ+XOs/k/3BICDF8yPq4E+vTsc3tG6R0dHuePeO2BxRy8w2/aM393xvTv6rWMmftjNag0IDbwTTzxxXn0DhZ9/C12xYkXPlUxfp9fRWgx7TtzT3fqH3IJbutlbYEBo4M23qT54LS0dGNxJLUlq5QxCUqfGxsbg+7Dgr+fJ99GJS37Nn11e8AyM1ewfZGBASOrU4sWL2bVrV99lTNtErYcdcljPlczAIeN/59lmQEjq1NVXX913CTPi/qOfmydzPknSXBu4gEhyUpJvJhlNsq7veiRpWA1UQCRZCHwCeAdwLHBmkmP7rUqShlOqBudMzyS/Bqyvqt9s2u8HqKo/bhs/MjJS27Ztm8MKn62rywh0eaLVfPrJwy51eQkIP7/uzcf/92AwPr8k26tqZL/jBiwgfhs4qap+v2mfDby5qs6fNGYNsKZp/irwzTkvdO68FHis7yL0vPn5zV8H+mf3y1W1ZH+DBu0oprT0TUmwqtoIbJybcvqVZNt0Ul6Dyc9v/vKzGzdQ+yCAMeDoSe2jgId6qkWShtqgBcTXgBVJXpnkEOAMYGvPNUnSUBqoTUxV9UyS84EvMH6i+9VVdW/PZfVpKDalHcD8/OYvPzsGbCe1JGlwDNomJknSgDAgJEmtDIgBlOTqJI8muafvWjQzSY5O8qUk9ye5N8n7+q5J05fk0CS3J7mr+fw+3HdNfXIfxABKshLYCXyqql7Xdz2aviRHAkdW1deTvAjYDpxSVff1XJqmIUmAw6tqZ5KDgS8D76uqr/ZcWi+cQQygqroVeLzvOjRzVfVwVX29WX4CuB9Y1m9Vmq4at7NpHtzchvZbtAEhdSTJK4A3ALf1W4lmIsnCJHcCjwJfrKqh/fwMCKkDSRYB1wNrq+rHfdej6auq3VV1HONXcjg+ydBu5jUgpFnWbLu+Hrimqm7oux49P1X1Q+AW4KSeS+mNASHNomYn51XA/VX1sb7r0cwkWZJkcbN8GPAbwD/0W1V/DIgBlORa4O+BX00ylmR13zVp2t4KnA28Lcmdze2dfRelaTsS+FKSuxm/NtwXq+rGnmvqjYe5SpJaOYOQJLUyICRJrQwISVIrA0KS1MqAkCS1MiCkfUiyuzlU9Z4kn0nywn2MXZ/kP89lfVKXDAhp33ZV1XHNVXV/AvxB3wVJc8WAkKbv74DlAEnek+Tu5ncDNu89MMl7k3ytefz6iZlHknc3s5G7ktza9L22+Q2CO5t1rpjTdyU9B0+Uk/Yhyc6qWpTkIMavr/R54FbgBuCtVfVYkiOq6vEk64GdVfWnSV5SVd9v1nER8EhV/VmSbwAnVdX3kiyuqh8m+TPgq1V1TZJDgIVVtauXNyxN4gxC2rfDmks/bwMeZPw6S28D/qqqHgOoqrbf7nhdkr9rAuEs4LVN//8FPpnkvcDCpu/vgQ8k+a/ALxsOGhQH9V2ANOB2NZd+/pnmgnz7m3p/kvFfkrsrye8CJwJU1R8keTPw74E7kxxXVX+R5Lam7wtJfr+q/maW34c0Y84gpJm7GTg9yUsAkhzRMuZFwMPNpb/PmuhM8qqquq2q/hB4DDg6ya8A36qqDcBW4N92/g6kaXAGIc1QVd2b5GLgb5PsBu4AfnevYR9i/JfkvgN8g/HAAPifzU7oMB40dwHrgN9J8lPgn4H/1vmbkKbBndSSpFZuYpIktTIgJEmtDAhJUisDQpLUyoCQJLUyICRJrQwISVKr/w9pkw+R5PmaqAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# use boxplot to visualize the distribution of Fare for each Pclass\n", + "sns.boxplot('Pclass','Fare',data=df)\n", + "plt.ylim(0, 300) # ignore one data point with Fare > 500\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "_cell_guid": "137bcdac-da05-49e6-8aed-6b4061f40c16", + "_execution_state": "idle", + "_uuid": "13c768161562d3c2c77a3f0ec8647e9dbf3eb146" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Fare
Pclass
187.508992
221.179196
313.302889
\n", + "
" + ], + "text/plain": [ + " Fare\n", + "Pclass \n", + "1 87.508992\n", + "2 21.179196\n", + "3 13.302889" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the correlation between Pclass and Fare\n", + "df[['Pclass', 'Fare']].groupby(['Pclass']).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "_cell_guid": "95b35c28-1a17-4995-8d76-0160b07f4a68", + "_execution_state": "idle", + "_uuid": "a187382bb74abb543ec58f1b56c78a3c7767564b" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Fare
Pclass
10.919302
20.642476
30.864050
\n", + "
" + ], + "text/plain": [ + " Fare\n", + "Pclass \n", + "1 0.919302\n", + "2 0.642476\n", + "3 0.864050" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# divide the standard deviation by the mean. A lower ratio means a tighter \n", + "# distribution of Fare in each Pclass\n", + "df[['Pclass', 'Fare']].groupby(['Pclass']).std() / df[['Pclass', 'Fare']].groupby(['Pclass']).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "_cell_guid": "9a40bab3-eee7-43e6-acdc-d70066aa8a34", + "_execution_state": "idle", + "_uuid": "47d8265715f80a20ede4648c53021ad5a509889e" + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEKCAYAAAAIO8L1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3X2cFNWd7/HPbxhABhQjoDAIGAJ59GZVWGPWXW6I0UGuxHg3rkg2MT4su74wJndXMHJNYu4qPidZDctdVsnirgQ0xg0gEUwkS9xoDPiAKG4cJmJgjMCAKA5PM/PbP6p66B6qp3umu6Zrur/v12te06e66vRvoLt/dU6dc8rcHRERkY6qSh2AiIgkkxKEiIhEUoIQEZFIShAiIhJJCUJERCIpQYiISKTYEoSZHWNmz5rZi2b2spl9O9z+fjP7tZm9ZmbLzKxfuL1/WK4Pnz8lrthERCS3OFsQB4FPu/sfAacBU8zsLOB24LvuPh7YA1wZ7n8lsMfdxwHfDfcTEZESiS1BeGBfWOwb/jjwaeBH4fbFwOfCxxeGZcLnzzEziys+ERHpXHWclZtZH2ADMA6YD2wB3nb3lnCXbcDI8PFI4PcA7t5iZnuBIcCuDnXOBGYCDBw4cMKHP/zhOP8EEZGys2HDhl3uPizXfrEmCHdvBU4zs+OBR4GPRO0W/o5qLRy1Doi7LwQWAkycONHXr19fpGhFRCqDmW3NZ78eGcXk7m8DvwDOAo43s1RiOhloDB9vA0YBhM8PBnb3RHwiInK0OEcxDQtbDpjZAOAzwGZgLfD5cLfLgJ+Ej5eHZcLnn3StJCgiUjJxdjGNABaH1yGqgIfcfaWZvQIsNbObgeeB+8P97wf+1czqCVoO02OMTUREcogtQbj7RuD0iO0NwJkR2w8AF8cVj4iIdI1mUouISCQlCBERiaQEISIikZQgREQkkhKEiIhEUoIQEZFIShAiIhJJCUJERCIpQYiISCQlCBERiaQEISIikZQgREQkkhKEiIhEUoIQEZFIShAiIhJJCUJERCIpQYiISCQliDK2dOlS6urqePjhh0sdioj0QkoQZewHP/gBAPfdd1+JIxGR3kgJokwtXbo0o6xWhIh0lRJEmUq1HlLUihCRrqoudQC9wYIFC2hoaMjYtn37dgBGjhzZvm3s2LFcffXVPRqbiEhclCC66cCBA6UOQUQkVkoQeYhqFcyePRuAO++8s6fDycvll1+e0c101VVXlTAaEemNdA2iTE2fPj2jfPHFF5coEhHprZQgytjll18OqPUgIt2jLqYyNn369KNaEiIi+YqtBWFmo8xsrZltNrOXzeyr4fabzGy7mb0Q/kxNO+YGM6s3s/8ys7q4YhMRkdzibEG0AH/n7s+Z2bHABjN7Inzuu+5+V/rOZvZRYDrwMaAW+JmZfdDdW2OMUUREsoitBeHub7r7c+Hjd4HNwMhODrkQWOruB939d0A9cGZc8YmISOd65CK1mZ0CnA78Otx0jZltNLNFZva+cNtI4Pdph22j84QiIiIxij1BmNkg4BHga+7+DrAA+ABwGvAmcHdq14jDPaK+mWa23szW79y5M6aoRUQk1gRhZn0JksOD7v5jAHd/y91b3b0N+GeOdCNtA0alHX4y0NixTndf6O4T3X3isGHD4gxfRKSixTmKyYD7gc3u/p207SPSdrsI2BQ+Xg5MN7P+ZvZ+YDzwbFzxiYhI5+IcxXQ28EXgJTN7Idw2F7jUzE4j6D56HfhrAHd/2cweAl4hGAE1SyOYRERKJ7YE4e5PEX1dYVUnx9wC3BJXTCIikj8ttSEiIpGUIEREJJIShIiIRFKCEBGRSEoQIiISSQlCREQiKUGIiEgkJQgREYmkBCEiIpGUIEREJJIShIiIRFKCEBGRSEoQIiISSQlCREQiKUGIiEgkJQgREYmkBCEiIpGUIEREJJIShIiIRFKCKGMrVqygrq6OVauy3gY8EdauXUtdXR3r1q0rdSgikkYJoozNnz8fgHvuuafEkXTurrvuAuD2228vcSQikk4JokytWLECdwfA3RPbili7di0tLS0AtLS0qBUhkiBKEGUq1XpISWorItV6SFErQiQ5lCDKVKr1kK2cFKnWQ7ayiJSOEkSZMrNOy0lRXV3daVlESkcJokzNmjUro3zttdeWKJLOXXfddRnl66+/vkSRiEhHShBlatq0ae2tBjNj6tSpJY4o2uTJk9tbDdXV1UyaNKnEEYlIihJEGUu1IpLaekhJtSLUehBJltg6fM1sFPAAMBxoAxa6+z+Y2QnAMuAU4HXgL9x9jwWnu/8ATAWagS+7+3NxxVcJpk2bxrRp00odRk6TJ09m8uTJpQ5DRDqIswXRAvydu38EOAuYZWYfBb4O/NzdxwM/D8sA5wPjw5+ZwIIYYxMRkRxiSxDu/maqBeDu7wKbgZHAhcDicLfFwOfCxxcCD3jgGeB4MxsRV3wiItK5HrkGYWanAKcDvwZOcvc3IUgiwInhbiOB36cdti3c1rGumWa23szW79y5M86wRUQqWuwJwswGAY8AX3P3dzrbNWLbUbO73H2hu09094nDhg0rVpgiItJBrAnCzPoSJIcH3f3H4ea3Ul1H4e8d4fZtwKi0w08GGuOMT0REsostQYSjku4HNrv7d9KeWg5cFj6+DPhJ2vYvWeAsYG+qK0pERHpenOsanA18EXjJzF4It80FbgMeMrMrgTeAi8PnVhEMca0nGOZ6eYyxiYhIDrElCHd/iujrCgDnROzvwKyIfUVEpAQ0k1pERCIpQYiISCQlCBERiaQEISIikZQgREQkkhKEiIhEUoIQEZFIShAiIhJJCUJERCIpQYiISCQlCBERiaQEISIikZQgREQkkhKEiIhEUoIQEZFIShDSJWvXrqWuro5169aVOhQRiZkShHTJXXfdBcDtt99e4khEJG5KEJK3tWvX0tLSAkBLS4taESJlTglC8pZqPaSoFSFS3pQgJG+p1kO2soiUFyUIyVt1dXWnZREpL0oQkrfrrrsuo3z99deXKBIR6Ql5JQgL/KWZfTMsjzazM+MNTZJm8uTJ7a2G6upqJk2aVOKIRCRO+bYg/hH4JHBpWH4XmB9LRJJoqVaEWg8i5S/fTuRPuPsZZvY8gLvvMbN+McYlCTV58mQmT55c6jBEpAfk24I4bGZ9AAcws2FAW2xRiYhIyeWbIO4BHgVONLNbgKeAebFFJSIiJZdXF5O7P2hmG4BzAAM+5+6bOzvGzBYBFwA73P3UcNtNwF8BO8Pd5rr7qvC5G4ArgVbgWndf3fU/pzgWLFhAQ0NDp/ts2bIFgNmzZ2fdZ+zYsVx99dVFjU1EpKfkTBBmVgVsDL/kX+1C3f8CfB94oMP277p7xpRcM/soMB34GFAL/MzMPujurV14vaJpaGjgtVc2MXrwgKz79Gs9CMDB7Vsin39j7/5YYutMx8S2fft2AEaOHJmxnxKXiOQjZ4Jw9zYze9HMRrv7G/lW7O7rzOyUPHe/EFjq7geB35lZPXAm8HS+r1dsowcP4IY/Hd/t42996rUiRtM9Bw4cKHUIItKL5TuKaQTwspk9C7yX2ujun+3Ga15jZl8C1gN/5+57gJHAM2n7bAu3HcXMZgIzAUaPHt2Nly9fHVsFqe6vO++8sxThiEgvl2+C+HaRXm8B8PcEo6H+HrgbuILgukZHHlWBuy8EFgJMnDgxch8RESlcvhep/6MYL+bub6Uem9k/AyvD4jZgVNquJwONxXhNERHpnnyX2jjLzH5jZvvM7JCZtZrZO119MTMbkVa8CNgUPl4OTDez/mb2fmA88GxX6xcRkeLJt4vp+wSjjB4GJgJfIvgSz8rMfgh8ChhqZtuAbwGfMrPTCLqPXgf+GsDdXzazh4BXgBZgVqlGMImISCDv9Zrdvd7M+oRf3D8ws1/l2P/SiM33d7L/LcAt+cYjIiLxyjdBNIdrL71gZncAbwID4wtLRERKLd+lNr4Y7nsNwTDXUcCfxxWUiIiUXqctiNTkOHffGm46QPGGvIqISILlakH8e+qBmT0ScywiIpIguRJE+gS2sXEGIiIiyZIrQXiWxyIiUuZyjWL6o3BCnAED0ibHGeDuflys0YmISMl0miDcvU9PBSIiIsmS7zBXERGpMEoQIiISSQlCREQiKUGIiEgkJQgREYmkBCEiIpGUIEREJJIShIiIRFKCEBGRSEoQ0iUbNmzg/PPP5/nnny91KCISMyUI6ZJ58+bR1tbGzTffXOpQRCRmShCStw0bNrBv3z4A9u3bp1aESJlTgpC8zZs3L6OsVoRIeVOCkLylWg/ZyiJSXpQgJG+DBg3qtCwi5UUJQvI2d+7cjPKNN95YokhEpCcoQUjeJkyY0N5qGDRoEKeffnqJIxKROClBSJfMnTuXqqoqtR5EKkCue1KLZJgwYQI//elPSx2GiPSA2FoQZrbIzHaY2aa0bSeY2RNm9lr4+33hdjOze8ys3sw2mtkZccUlIiL5ibOL6V+AKR22fR34ubuPB34elgHOB8aHPzOBBTHGJSIieYgtQbj7OmB3h80XAovDx4uBz6Vtf8ADzwDHm9mIuGITEZHcevoi9Unu/iZA+PvEcPtI4Pdp+20Ltx3FzGaa2XozW79z585YgxURqWRJGcVkEds8akd3X+juE9194rBhw2IOS0SkcvV0gngr1XUU/t4Rbt8GjErb72SgsYdjExGRND2dIJYDl4WPLwN+krb9S+FoprOAvamuKBERKY3Y5kGY2Q+BTwFDzWwb8C3gNuAhM7sSeAO4ONx9FTAVqAeagcvjiktERPITW4Jw90uzPHVOxL4OzIorFhER6bqkXKQWEZGEUYIQEZFIShDSJfX19Vx00UU0NDSUOhQRiZkShHTJTTfdRHNzM9/61reKVqeSjkgyKUFI3urr60nNXt+xY0fRvtDvuOMOmpubue2224pSn4gUhxKE5O2mm27KKBejFVFfX8/WrVsB2Lp1q1oRIgmiBCF567j21Y4dO7Lsmb877rgjo6xWhEhyKEFISaVaD9nKIlI6uqNchMbGRt7bu59bn3qt23W8sXc/Ay3e5aQWLFjQaZfMli1bAJg9e3an9YwdO5arr7465+v179+fgwcPZpQLNWbMmIykMGbMmILrFJHiUILoxRoaGnh180aGHp9lh7bg1643N2atY9fb+b9eTU1NRoIYOHBg/gdnMWfOHGbNOjKJ/utf/3one4tIT1KCiFBbW8tB388Nfzq+23Xc+tRr9K+t7dIxixYtYtmyZcyYMYPLLrss9wHA0OPhc5/ufk/hvz/Zlve+e/bsySjv3t3xflBdN27cuPZWxJgxYxg7dmzBdYpIcegaRIIsW7YMgCVLlpQ4kp41Z84campqKrL1oDkgkmRKEAmxaNGijPLixYuz7Fk6w4cPzyiPGFGcu8KOGzeORx99tCJbD5oDIkmmBJEQqdZDShJbEd/4xjcyyt/85jdLFEl50BwQSTolCMnbuHHj2lsRI0aMqMgz/mLSHBBJOl2kli75xje+wezZsxPXeug45Hf79u0AjBw5sn1bvsN5e4rmgEjSqQWREJdccklGecaMGSWKpHNxXC9YsWIFdXV1rFq1qmh1HjhwgAMHDhStvjh0nPOhOSCSNGpBJMQVV1yRcR0i32Gu5WD+/PkA3HPPPUydOrVbdXRsGaQmB955552FBRcjzQGRpFMLIkFSrYikth7isGLFCoI7zoK7F7UVkXSpOSCA5oBIIilBJMgVV1zB6tWrK7L1kHLPPfeUKJLSqOQ5IJJ86mKSkkq1HrKVy13qmo5IEilBSEmZWUZSMLMSRhO/3jjaSiqXupikpNIv0gJce+21JYqkNHrDaCupXGpBSElNmzaN+fPn4+6YWbdHMfUWvXG0lVQutSCk5FKtiEprPYgknRKElNy0adNYvXp12bceejOtOluZlCBEJCetOluZSnINwsxeB94FWoEWd59oZicAy4BTgNeBv3D3PdnqkODWqO/s7dpNfzra9TYc8nhvjSq9W9Sqs5rUVxlK2YKY7O6nufvEsPx14OfuPh74eVgWkRLTqrOVK0mjmC4EPhU+Xgz8Ari+VMH0BrW1tfSzXQXfcnToiK7dGlUqi1adrVylShAOrDEzB/7J3RcCJ7n7mwDu/qaZnVii2EQkTeqe4enlcqbJjEeUqovpbHc/AzgfmGVmk/I90Mxmmtl6M1u/c+fO+CIUESBYLypdpa0bVcmTGUvSgnAProq6+w4zexQ4E3jLzEaErYcRwI4sxy4EFgJMnDixshbuESmB1KqzW7durYhVZzWZ8Ygeb0GY2UAzOzb1GDgP2AQsB1LLmF4G/KSnYyu1tWvXUldXx7p160odikgGrTpbmUrRxXQS8JSZvQg8Czzm7o8DtwHnmtlrwLlhuaLcddddANx+++0ljiS7RYsWUVdXx+LFi0sdivSgOO4kKMnX4wnC3Rvc/Y/Cn4+5+y3h9iZ3P8fdx4e/d/d0bKW0du1aWlpaAGhpaUlsKyJ117slS5aUOBIRiZtmUidEqvWQksRWxKJFizLKakWIlDcliIRItR6ylZMg/Z7ZoFaESLlL0kS5ilZVVUVbW1tGuVxpnLlI76AEkRB9+/bl4MGDGeVK0Z0x5h2TTEdbtmwBjgxRzEaJSCQ7JYiESE8OUeUk+OxnP8vy5cvbyxdddFG36inGOPOGhgY2vroJhh6TZY9DAGzcVZ+9kl2VOflJJF9KEAkxaNAg9u3bl1FOmubm5ozyu+++W6JIQkOPoc+F3R922fqT+O9tUIyWjlo5UipKEFm8sXc/tz71Wtbnd7wXnOGfOLB/1uPHj4x8KtLcuXOZO3due/nGG2/M/+Ae8rOf/eyocq4unEoXtHQ2Y0NOiHzePVgM4KWdb0U/31RRo70lYZQgIuQzGehQeObXf+QHIp8fPzK/elImTJjAgAED2L9/PzU1NZx++ul5HyvJZkNOoPqCum4d27JydZGjEcmfEkSEfJrzcazPcuyxx7J///5Edi+JSOWpiASxYcMGbrzxRubNm5fYM/P6+np27AjWJ9yxY0fed+3a9Xb2O8rtDS9pDO4k3+x6G4aO6HK4IlIBKiJBzJs3j7a2Nm6++WYeeeSRUocTKequXQsXLuz0mFwJZG/YDTZ0RHQ3WPBc17rCRKRylH2C2LBhQ/vooH379vH8888nohXRcXRL1F27Zs+e3ekIllxdYUlZplhzFoqr479nY2Mj+/fv7/SYAQMGUFt75M6BHf8t46izN8j13oTCR5pFvUZvmRxa9gli3rx5GeWktiL69++fMfehf//o0VE9LdcHKPWh6ezN3dDQwKZXNzJgSHQdB8O7emzZuTHr6+xvyi/eShCMjHoVGzIMAG/eDy2HOz3mPYemncE/ojcdfaOtoM7f0mdI0N/Y1nwIb2nttM5mP8SencFQ59amN7v8dyRBQ0MDv91cz8jBo7PuU93aD4D3Gg9FPr997xtdft3ecgOisk8Q6XMLosql0vHLtL6+nlmzZrWXv/e97yWu6+fEE09sv04CcNJJJ+V97IAhMP4C6/Zrv7ayd94bqrGxEX9nb7dHI3nTbhoPH/1FbUOG0f+Ci7tV58GVD0du7zNkBDXTZnarzuYVnXeHJtnIwaOZ9Wdzc++Yxfxfzuv0+agTp6S07nMp+wTRGyagQbDefqoVkaS7dnV8c9fVHRmu+cADD/R0OCLSg8o+QfSGCWgpo0aNoqGhIdF37Uq1Iq666qqSxtHY2AjvHChsNvSuAzQeaixeUBFqa2tp6tunoHkQtcPyb6l1V2NjI61NTbz7L98OX/gwePTouHZWBdXhmmGHD9J4OEsfYoI1Njay7+33crYCOrP97a0MYmB7uSeua/SUsk8QEyZMyCgn4QJ1NjU1NZx66qmJaT1EGT58OMOHD+fii7vXvSHJdNxxx2VclD7Y1kJbW+ddglVVRv++fYJC3xqOO+64OEPsNRoaGqh/5TVGH5t9KYV+LcFX76HfN0c+/8a722OJravKPkHU12cu1pbv/AJJttraWnb1ay54LabaobW5d6wACxYsKHUIJVFbW8t7HCr4GsTA2n4Z20YfO5Ib/vgr3a7z1t/c2+1ji6nsE0THLqW5c+eydOnSEkUjIuWusbGR3Xt28zdPXg/A4dbDtOXqrgOqrIq+fYIuu4MthzihT/T6XT2p7BPEnj17Oi33lN7SL6k5C9k1NTVx6623MnfuXE44ofQf3p5UX1/P7Nmzufvuu9UCz6Fjdx0HWyBHdx0AVUZV/6DLbkD/AYnosiv7BJEUwXjrjQwfnP2NUtUaDOV8p/GlyOf/sDf+oZ4NDQ1s3ryRwe+Lfr41PBFq/EP2OQt7O+TgxsZG9r9T2FDV/U3QeDjeC8q5LFmyhE2bNvHggw/yla90v/ugN7rjjjtobm7Oa4Z/pSun7joliB40fLBx1aTu3ynuvnWdT4YqlsHvg0nndf/4dWuKF0tSNDU1sXr1atyd1atX84UvfKFiWhH19fXtM/23bt1atOt4ldwi6y3KPkFcc801fP/7328vf/WrXy1hNJWptraW3e/syvr8wb3B7/6Dc9eTYVcnw1z3hrNeB/eLfj48nqGdv2bKkiVLOHw4SNCHDx/uUivCm3ZnnSjne4OZyDb42KzH0gPDXDvTnXXC8lHJLbLeouwTxLRp0zISxNSpU7tcR1S/fFRffDn2uxdDrrPNLe8E/5YfGJZ9UUGGZdaTs869YZ1DO6lzaGY9CxYs4IknnmgvHzx4kLa2oE8t9Ttl5cqVrFq1iqqqqqOWRTn33HPb3we5//ZgEucHsiWBYScdVUdjYyPe1MSBxf8YfUxLS/C7OsvH+/BhGg/nf0vbqHXCCtXU1MSaNWtwd9asWVNRLbLepNcniI4f6ubm5va7dEVJzQQ2M2pqatq3p3+o83HMMdnuhRytsbGRpibn75dHr+cCkFr6prpP9POHWmEI8fbDB3HC8mXRz7eGMfbJEiNAawvQdiTOOBYV7ImFCltbWzt9L7W1tXX6PBwdZz6DFXKdaBw1ZyEtkaXiAqhKJQrITGR9+3bpAmgcqxEsWbIkI/nm04ro+FmH3J93yP1Z3773jU4nyu3aF9ztb+ig6CS+fe8bfLB2XKcxFKqr33MphX7P9foE0ROK0SrI9aEG2ofCeVtV+7b0D3Z1WE+ccsXpqQ81VRnHZXwB9Ys/zjhcffXVWVc43bjx6IvyH//4xwtuNXb1RCMVV8dyetIp9kqhLWmJJqrcHU8++WR7PS0tLTz55JMl6WbK51rKH7YEJ3Ud5zqkfLB2XNmO7LJ8slBSTZw40devX9/pPulrB6WsXl362zj2xBLAxTiLzufLB0ofZ9x1zpw5M6NrZcyYMRUzmufee+9l5cqV7eULLrig4C/ze++9l8cff5yWlhaqq6uZMmVKYq5DdHzPp7qTP/CBI92Vvb072cw2uPvEXPupBVEiveXN1VvijNucOXMyVttN8npZxTZjxoyML/MvfOELRalzzZpguFtVVVVR6oxLd1p55SJxCcLMpgD/APQB7nP32wqp75JLLmHZsiMd6jNmzCgsQEmkbGd9xRpEMG7cOMaMGcPWrVsTtdpuTxgyZAhTpkzhscceY8qUKUW5mDxkyBDOO+88HnvsMc4777xEXaDWSdERVbl36Tlm1geYD5wPfBS41Mw+WkidV1xxRUb5sssuK6Q66SWOOeaYop/5zZkzh5qamopqPaTMmDGDU089tahn+nHUKcWVqGsQZvZJ4CZ3rwvLNwC4+61R++dzDQJg0aJFLFu2jBkzZpR1gsin7xRK339aCX28lS6uEUdSHPleg0hagvg8MMXdrwrLXwQ+4e7XpO0zE0jd9upDwH/lWf1QIPtsre7pDXX2hhhVp+pUnT1b5xh3H5Zrp6Rdg4haqCgjg7n7QqDLw0fMbH0+GbPc6uwNMapO1ak6k1lnoq5BANuAUWnlkyHmmWEiIhIpaQniN8B4M3u/mfUDpgPLSxyTiEhFSlQXk7u3mNk1wGqCYa6L3P3lIlUfx6ym3lBnb4hRdapO1ZnAOhN1kVpERJIjaV1MIiKSEEoQIiISqewThJldZGZuZh8uYp2tZvaCmW0ys4fNrCb3UTnr/L9m9rKZbQzr/kSB9S0ysx1mtqnQ2NLqPMbMnjWzF8NYv12EOkeZ2Voz2xzWWfAdncxsuJktNbMtZvaKma0ysw8WWOeHwv+X1M87Zva1btTz3fTjzGy1md2XVr7bzP62wFj7mNnzZrYy99551dfa4W8/pQh17su9V5fr/Gr4mXy5O/83Weo83sx+ZGavhu/RTxahztfN7KXw3zL3TN/86vw/4d+9ycx+aGbFWUbA3cv6B3gI+CXBDO1i1bkv7fGDwN8WWN8ngaeB/mF5KFBbYJ2TgDOATUX8uw0YFD7uC/waOKvAOkcAZ4SPjwV+C3y0wBifBv4mbdtpwJ8V8d+hD/AHgslGXT32YuCh8HEVsAF4Ou35pwkmhxYS398CS4CVRfp79xWjnjjrBE4FNgE1BINvfgaML0K9i4Grwsf9gOOLUOfrwNAi/u0jgd8BA8LyQ8CXi1F3WbcgzGwQcDZwJcGQ2Tj8Eij0biEjgF3ufhDA3Xe5e0HzP9x9HbC7wLg61ununjrz6xv+FDTKwd3fdPfnwsfvApsJ3vDdNRk47O7/P+01XnD3XxYSZwfnAFvcvTu3VvtP4E/Cxx8j+FJ718zeZ2b9gY8Az3c3MDM7GfhfwH259i0zHwGecfdmd28B/gO4qJAKzew4ghOt+wHc/ZC7v11wpPGoBgaYWTVBkizK/LGyThDA54DH3f23wG4zO6OYlYf/GecDLxVY1RpglJn91sz+0cz+Z+HRxSPsvngB2AE84e6/LmLdpwCnE7RMuutUgrPyOE0HftidA8PE32JmowkSxdMEf+8ngYnARnfPftvB3L4HzAHacu3YBQPSupceLWK9xbQJmGRmQ8Iu36lkTrrtjrHATuAHYZfdfWY2sNBACU6q1pjZhnDpoMIqc98O3AW8AbwJ7HX3NYXWC+WfIC4FloaPl4blYhgQfkmuJ/hPub+QysKz8gkEa0ztBJaZ2ZcLDTIO7t7q7qcRzHI/08xOLUa9YWvvEeBr7v649Pf1AAAEDUlEQVROMeqMQziB87PAwwVUk2pFpBLE02nlXxUQ2wXADncvdoLc7+6nhT8FnZXHxd03A7cDTwCPAy8Chd76rpqgm3aBu58OvAcUYynfs939DIKTy1lmNqmQyszsfcCFwPuBWmCgmf1l4WGWcYIwsyHAp4H7zOx1YDZwiZlFrffUVekfmK8UeMYHtH/x/sLdvwVcA/x54WHGJ2xq/wKYUmhdZtaXIDk86O4/LrC6lwmSbVzOB55z97cKqONXBMngfxCc+T5D0IL4E4Lk0V1nA58N3+9LgU+b2b8VUF+v4u73u/sZ7j6JoHv1tQKr3AZsS2sl/4ggYRQk1X3s7juAR4EzC6zyM8Dv3H2nux8GfsyRbsyClG2CAD4PPODuY9z9FHcfRXAh509LHNdRwhEy49M2nQZ0p387VmY2zMyODx8PIHhjvlpgnUbQAtvs7t8pPEqeBPqb2V+lvcYfF7Hb7lK62b2U5j+BC4Dd4YnBbuB4jgxW6BZ3v8HdT3b3Uwi6wZ5096KcSfYGZnZi+Hs08L8p8P/J3f8A/N7MPhRuOgd4pcAYB5rZsanHwHkEJwmFeAM4y8xqws/TOQTX8gpWzgniUoLsnO4RIIm3lBsELA6HZG4kuFnSTYVUaGY/JPiy+ZCZbTOzKwsPkxHA2jDG3xBcgyh0KOXZwBcJznZT/dxTu1uZB8M4LgLODYe5vkzwb1nwRbuwb/tcgjO0QrxEMFLtmQ7b9rp7sZd/Tqqa8H2Z+iloaG/oETN7BVgBzHL3PUWo8yvAg+F7/jRgXoH1nQQ8ZWYvAs8Cj7n744VUGLZwfgQ8R/A+qqJIS25oqQ0REYlUzi0IEREpgBKEiIhEUoIQEZFIShAiIhJJCUJERCIpQYhkES7bkBp6+wcz255W7nTGs5n9wszyvnm8mX3NirAqsEgxJeqWoyJJ4u5NBGPfMbObCFYgvSuml/sa8G9Ac0z1i3SZWhAi3ZB+PwMzmxOu7/+imd3WYb8qM1tsZjeH5fPM7Gkze86Ce4kMMrNrCdbQWWtma3v2LxHJTi0IkQKY2fkEqwZ/wt2bzeyEtKerCe4XssndbzGzocCNwGfc/T0zu57gXiL/L5xJPLmCZlJLL6AEIVKYzwA/cPdmgHBdpZR/Irg50C1h+SyCZVT+M1wzsh8FrL0kEjclCJHCGNlvmvQrYLKZ3e3uB8J9n3D3Yi07LxIrXYMQKcwa4IrUCKQOXUz3A6uAh8ObSz0DnG1m48J9a+zIvbLfJbjlqkhiKEGIFCBciXM5sD68idR1HZ7/DsEqm/8KNAFfBn4Yrg76DPDhcNeFwE91kVqSRKu5iohIJLUgREQkkhKEiIhEUoIQEZFIShAiIhJJCUJERCIpQYiISCQlCBERifTfWXDY2QM0oh8AAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# use boxplot to visualize the distribution of Fare for each Ticket\n", + "sns.boxplot('Ticket','Fare',data=df)\n", + "plt.ylim(0, 300) # ignore one data point with Fare > 500\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "_cell_guid": "0c52113f-bf33-4187-9393-49e050b0503c", + "_execution_state": "idle", + "_uuid": "3f7e7fb7ee1879e529ca16e7af2d72f4131f759f" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Fare
Ticket
165.771211
220.235194
315.380682
416.764036
519.262500
619.153711
79.197438
88.397900
97.750000
A10.189681
C28.152273
F24.677246
L1.515000
P119.698253
S17.542900
W31.056579
\n", + "
" + ], + "text/plain": [ + " Fare\n", + "Ticket \n", + "1 65.771211\n", + "2 20.235194\n", + "3 15.380682\n", + "4 16.764036\n", + "5 19.262500\n", + "6 19.153711\n", + "7 9.197438\n", + "8 8.397900\n", + "9 7.750000\n", + "A 10.189681\n", + "C 28.152273\n", + "F 24.677246\n", + "L 1.515000\n", + "P 119.698253\n", + "S 17.542900\n", + "W 31.056579" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the correlation between Ticket and Fare \n", + "# (we saw this earlier)\n", + "df[['Ticket', 'Fare']].groupby(['Ticket']).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "_cell_guid": "cbd10230-dda2-4819-b330-3e67a6bef4a4", + "_execution_state": "idle", + "_uuid": "c6407a63d494490385a0024f572f3d3b7c1c5dc1" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Fare
Ticket
10.817411
21.284139
31.351244
40.497578
50.284367
60.735873
70.080078
80.005961
90.000000
A0.466846
C0.736299
F0.556023
L2.236068
P0.944596
S1.024639
W0.676999
\n", + "
" + ], + "text/plain": [ + " Fare\n", + "Ticket \n", + "1 0.817411\n", + "2 1.284139\n", + "3 1.351244\n", + "4 0.497578\n", + "5 0.284367\n", + "6 0.735873\n", + "7 0.080078\n", + "8 0.005961\n", + "9 0.000000\n", + "A 0.466846\n", + "C 0.736299\n", + "F 0.556023\n", + "L 2.236068\n", + "P 0.944596\n", + "S 1.024639\n", + "W 0.676999" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# divide the standard deviation by the mean. A lower ratio means a tighter \n", + "# distribution of Fare in each Ticket type\n", + "df[['Ticket', 'Fare']].groupby(['Ticket']).std() / df[['Ticket', 'Fare']].groupby(['Ticket']).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "_cell_guid": "43b41c45-b149-44da-a031-42b409966f28", + "_execution_state": "idle", + "_uuid": "84e953d33cf72578729f20c54cb8c2e8c29bcb54" + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEKCAYAAAAIO8L1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAGfRJREFUeJzt3X90X3Wd5/HnKw2FlmynsoYfkjJlSVHRxUozEdfZQDHOoMcRyugMPW6puyx1WZCDw54VagtxCl3Xs/ijR+3QCmPoskUc6dADCEOhlWVGDakwhYLaiPzIwEILCERa2jTv/SP3W75Jb5M0zc2933xfj3Nyvt/P/d7v/b6bnOaV9/3xuYoIzMzMBqvJuwAzMysmB4SZmaVyQJiZWSoHhJmZpXJAmJlZKgeEmZmlyiwgJB0hqUPSP0vaKukryfITJf1c0jZJP5A0OVl+eDLuSl6fmVVtZmY2vCw7iLeAsyLiA8Bs4GxJpwP/E/hGRMwCXgUuTNa/EHg1IhqBbyTrmZlZTjILiOjXkwwPS74COAv4u2R5O3Bu8vycZEzy+kclKav6zMxsaLVZblzSJGAz0Ah8B/gN8LuI6E1W6QaOT54fDzwHEBG9kl4D/jWwY9A2FwGLAI488sg573nPe7L8J5iZTTibN2/eERH1w62XaUBExF5gtqTpwDrgvWmrJY9p3cJ+84BExCpgFUBTU1N0dnaOUbVmZtVB0jMjWW9czmKKiN8Bm4DTgemSSsHUADyfPO8GZgAkr/8B8Mp41GdmZvvL8iym+qRzQNIUoBV4EtgIfDpZbSFwR/J8fTImef2B8EyCZma5yXIX03FAe3Icoga4LSLulPQEcKuka4FHgBuT9W8E1kjqor9zOD/D2szMbBiZBUREbAE+mLL8KaA5Zfku4DNZ1WNmZgfHV1KbmVkqB4SZmaVyQJiZWSoHhJmZpXJAmJlZKgeEmZmlckCYmVkqB4SZmaVyQJiZWSoHhJmZpXJAmJlZKgeEmZmlckCYmVkqB4SZmaVyQJiZWSoHhJmZpXJAmJlZKgdEQa1Zs4aWlhbWrl2bdylmVqUcEAW1evVqAFauXJlzJTYaN9xwAy0tLdx4443Dr2xWUA6IAlqzZs2AsbuIynPLLbcA0N7ennMlZqPngCigUvdQ4i6istxwww0Dxu4irFI5IMzGWKl7KHEXYZXKAWFmZqkcEAV00UUXDRhffPHFOVViZtXMAVFACxYsGDCeP39+TpXYaHz2s58dMF64cGFOlZgdGgdEQZW6CHcPlefzn//8gPGFF16YUyVmh8YBUVALFizgwQcfdPdQoUpdhLsHq2SKiGw2LM0AbgaOBfqAVRHxLUltwEXA9mTVxRFxd/Keq4ALgb3AZRFx71Cf0dTUFJ2dnZnUb2Y2UUnaHBFNw62XZQfRC1wREe8FTgcukXRK8to3ImJ28lUKh1OA84H3AWcD35U0KcP6zDLT0dHBmWeeyebNm/MuxWzUMguIiHghIn6RPH8DeBI4foi3nAPcGhFvRcRvgS6gOav6zLLU1tZGX18fS5cuzbsUs1Ebl2MQkmYCHwR+niy6VNIWSTdJekey7HjgubK3dTN0oJgVUkdHBz09PQD09PS4i7CKlXlASKoDfgRcHhGvAyuBk4DZwAvA9aVVU96+3wESSYskdUrq3L59e8pbzPLV1tY2YOwuwipVpgEh6TD6w+GWiLgdICJejIi9EdEHrObt3UjdwIyytzcAzw/eZkSsioimiGiqr6/PsnyzUSl1Dwcam1WKzAJCkoAbgScj4utly48rW20e8HjyfD1wvqTDJZ0IzAI6sqrPLCt1dXVDjs0qRZYdxEeABcBZkh5Nvj4BfE3SY5K2AHOBLwJExFbgNuAJ4B7gkojYm2F9ZpkYvItp2bJl+RRidohqs9pwRDxE+nGFu4d4z3XAdVnVZDYempubqauro6enh7q6OubMmZN3SWaj4iupzTLQ1tZGTU2NuweraJl1EGbVrLm5mU2bNuVdhtkhcQdhZmapHBBmZpbKAWFmZqkcEGZmlsoBYWZmqRwQZhlYt24dLS0trF+/Pu9SzEbNAWGWgW9+85sAXH/99cOsaVZcDgizMbZu3TpKd2qMCHcRVrEcEGZjrNQ9lLiLsErlgCioDRs20NLSwsaNG/MuxQ7S4Pu8Z3Xfd7OsOSAKavny5YBnAq1E/TPdH3hsVikcEAW0YcMGent7Aejt7XUXUWEuv/zyAeMrrrgip0rMDo0DooBK3UOJu4jKMm/evH1dgyQ+9alP5VyR2eg4IAqo1D0caGzFV+oi3D1YJfN03wVUW1s7IBRqa/1jqjTz5s1j3rx5eZdhdkjcQRTQ4sWLB4yXLl2aUyVmVs0cEAXU2tq6r2uora1l7ty5OVdkZtXIAVFQpQOb5513Xs6V2Gh4LiabCBwQBVX6xXL77bfnXImNhudisonAAVFAvg6isnkuJpsoVMnTADQ1NUVnZ2feZYy5s846a7+zmB544IEcK7KDccYZZwyYXkMSP/nJT3KsyGwgSZsjomm49dxBFJCvg6hsnovJJgoHRAENvu7B10FUFs/FZBOFA6KAfB1EZfNcTDZROCAKyNdBVDbPxWQThQOioEpdhLuHyuS5mGwiyOwsJkkzgJuBY4E+YFVEfEvSUcAPgJnA08BfRMSr6v+T61vAJ4A3gc9FxC+G+oyJehaTmVmWinAWUy9wRUS8FzgduETSKcCVwP0RMQu4PxkDfByYlXwtAlZmWJuZmQ0js4CIiBdKHUBEvAE8CRwPnAO0J6u1A+cmz88Bbo5+PwOmSzouq/rMzGxo43IMQtJM4IPAz4FjIuIF6A8R4OhkteOB58re1p0sG7ytRZI6JXVu3749y7LNzKpa5gEhqQ74EXB5RLw+1Kopy/Y7QBIRqyKiKSKa6uvrx6pMMzMbJNOAkHQY/eFwS0SUZp17sbTrKHl8KVneDcwoe3sD8HyW9ZmZ2YFlFhDJWUk3Ak9GxNfLXloPLEyeLwTuKFt+gfqdDrxW2hVlZmbjL8s5HD4CLAAek/Rosmwx8FXgNkkXAs8Cn0leu5v+U1y76D/N9T9mWJuZmQ0js4CIiIdIP64A8NGU9QO4JKt6zMzs4PhKajMzS+WAMDOzVA4IMzNL5YAwM7NUDggzM0vlgDAzs1QOCDMzS+WAMDOzVA4IMzNL5YAwM7NUDggzM0vlgDAzs1QOCDMzS+WAMDOzVA4IMzNL5YAoqA0bNtDS0sLGjRvzLsXMqpQDoqCWL18OwLJly3KuxMyqlQOigDZs2EBvby8Avb297iLMLBcOiAIqdQ8l7iLMLA8OiAIqdQ8HGpuZjQcHRAHV1tYOOTYzGw8OiAJavHjxgPHSpUtzqsTMqtmIAkL9/oOkq5PxCZKasy2terW2tu7rGmpra5k7d27OFZlZNRppB/Fd4MPA/GT8BvCdTCoy4O0uwt2DmeVlpDu3PxQRp0l6BCAiXpU0OcO6qt60adOoqalh2rRpeZdiZlVqpB3EHkmTgACQVA/0ZVaVcdVVV9HX18eXvvSlvEsxsyo10oBYAawDjpZ0HfAQsHzot9hodXR0sGfPHgB2797N5s2bc67IzKrRiAIiIm4B/jvwP4AXgHMj4odDvUfSTZJekvR42bI2Sf8i6dHk6xNlr10lqUvSryT96ej+ORPDVVddNWDsLsLM8jDsMQhJNcCWiHg/8MuD2Pb3gW8DNw9a/o2I+F+DPuMU4HzgfcC7gA2STo6IvQfxeRNGqXso2b17d06VmFk1G7aDiIg+4J8lnXAwG46IB4FXRrj6OcCtEfFWRPwW6AJ8Gq2ZWY5GehbTccBWSR3A70sLI+JTo/jMSyVdAHQCV0TEq8DxwM/K1ulOlu1H0iJgEcAJJxxUZpmZ2UEY6UHqrwCfBP4auL7s62CtBE4CZtN/LKO0DaWsG2kbiIhVEdEUEU319fWjKKH4LrroogHjiy++OKdKzKyajaiDiIifjMWHRcSLpeeSVgN3JsNuYEbZqg3A82PxmZVowYIFrF69et94/vz5Q6xto7VixQq6uroy2XZ3dzcADQ0NY77txsZGLrvssjHfrtlgI51q43RJD0vqkbRb0l5Jrx/sh0k6rmw4Dyid4bQeOF/S4ZJOBGYBHQe7/Ymk1EW4e6hMO3fuZOfOnXmXYXZIFJG6J2fgSlIn/WcZ/RBoAi4AZkXE4iHesxY4E3gn8CJwTTKeTf/uo6eBz0fEC8n6Xwb+E9ALXB4RPx6urqampujs7By2frPxVvoLf8WKFTlXYrY/SZsjomm49UY8j3REdEmalJx6+reS/mmY9dP2i9w4xPrXAdeNtB4zM8vWSA9Sv5nMvfSopK9J+iJwZIZ1Vb0dO3bwhS98gZdffjnvUsysSo00IBYk615K/2muM4A/z6oog/b2drZs2UJ7e3vepZhZlRoyIEoXx0XEMxGxKyJej4ivRMRfRUQ2p38YO3bs4K677iIiuOuuu9xFmFkuhusg/r70RNKPMq7FEu3t7fvuQ71nzx53EWaWi+ECovwCtn+TZSH2tnvvvXfA+J577smpEjOrZsMFRBzguWWodLvRA43NzMbDcL95PpBcECdgStnFcQIiIny7swz09PQMOTYzGw9DBkRETBqvQuxtM2fO5Omnnx4wNjMbbyM9zdXG0ZIlSwaMr7766pwqMbNq5oAooJNPPnlf1zBz5kwaGxvzLcjMqpIDoqCWLFnCkUce6e7BzHLj02MK6uSTT+bHPx52vkIzs8y4gyioDRs20NLSwsaNG/MuxcyqlAOioJYvXw7AsmXLcq7EzKqVA6KANmzYsG+qjd7eXncRZpYLB0QBlbqHEncRZpYHB0QBlbqHA43NzMaDA6KAPBeTmRWBA6KAFi8eeKvvpUuX5lSJmVUzB0QBtba27usaamtrmTt3bs4VmVk1ckAUVKmLcPdgZnnxzu2Cam1tpbW1Ne8yzKyKuYMwM7NUDggzM0vlgDAzs1QOCDMzS+WAMDOzVJkFhKSbJL0k6fGyZUdJuk/StuTxHclySVohqUvSFkmnZVWXmZmNTJYdxPeBswctuxK4PyJmAfcnY4CPA7OSr0XAygzrMjOzEcgsICLiQeCVQYvPAdqT5+3AuWXLb45+PwOmSzouq9rMzGx4430M4piIeAEgeTw6WX488FzZet3Jsv1IWiSpU1Ln9u3bMy3WzKyaFeUgtVKWRdqKEbEqIpoioqm+vj7jsvLjW46aWd7GOyBeLO06Sh5fSpZ3AzPK1msAnh/n2grFtxw1s7yNd0CsBxYmzxcCd5QtvyA5m+l04LXSrqhq5FuOmlkRZHma61rgp8C7JXVLuhD4KvAxSduAjyVjgLuBp4AuYDXwX7OqqxL4lqNm+eno6ODMM89k8+bNeZeSu8xmc42I+Qd46aMp6wZwSVa1VBrfctQsP21tbfT19bF06VLuvvvuvMvJVVEOUluZSZMmDTk2s2x0dHTQ09MDQE9PT9V3EQ6IApo+ffqQYzPLRltb24Bxtd+wywFRQC+//PKQYzPLRql7ONC42jggCkjSkGMzy0ZdXd2Q42rjgCig/mP2Bx6bWTYG72Kq9jMIHRBmZonm5uZ9XUNdXR1z5szJuaJ8OSDMzMq0tbVRU1NT9d0DZHgdhJlZJWpubmbTpk15l1EI7iDMzCyVOwgrvBUrVtDV1ZV3GQdl27ZtAFx22WU5VzJyjY2NFVWvZc8BYYXX1dXFrx//BSfU7c27lBGbvKe/Od/19MM5VzIyz/b4an3bnwPCKsIJdXtZ0lTdFy1l6drO6j7f39L5GEQBHXbYYQPGkydPzqkSM6tmDogC2rNnz4Dx7t27c6rEzKqZA8LMzFI5IMzMLJUDwszMUjkgzMwslQOigHxHOTMrAgdEAe3du3fIsZnZeHBAmJlZKgeEmZmlckCYmVkqB4SZmaXyZH2HKIupqCdPnjxgeo3JkyeP6TTMntbZzEbCHUQBzZw5c8ixmdl4cAdxiLL6S7y1tZXdu3fzrne9i+9973uZfIaZ2VBy6SAkPS3pMUmPSupMlh0l6T5J25LHd+RRW1HMnDmTmpoarr322rxLMbMqlecuprkRMTsimpLxlcD9ETELuD8ZV62pU6dy6qmn0tjYmHcpZlalinQM4hygPXneDpybYy1mZlUvr4AI4B8kbZa0KFl2TES8AJA8Hp1TbWZmRn4HqT8SEc9LOhq4T9IvR/rGJFAWAZxwwglZ1WdmVvVy6SAi4vnk8SVgHdAMvCjpOIDk8aUDvHdVRDRFRFN9ff14lWxmVnXGPSAkHSnpX5WeA38CPA6sBxYmqy0E7hjv2szM7G157GI6BlgnqfT5/yci7pH0MHCbpAuBZ4HP5FCbmZklxj0gIuIp4AMpy18GPjre9ZiZWboineZqZmYF4oAwM7NUDggzM0vlgDAzs1QOCDMzS+WAMDOzVA4IM7Mya9asoaWlhbVr1+ZdSu4cEGZmZVavXg3AypUrc64kfw4IM7PEmjVrBoyrvYvwLUet8Lq7u/n9G5O4trMu71ImrGfemMSR3d15l5G7UvdQsnLlSubPn59TNfmrioBYsWIFXV1deZdxULZt2wZkd8/rrDQ2NlZczWaWrioCoquri0cee4K+qUflXcqIaXcAsPk3/y/nSkau5s1XMtluQ0MDu3pfYElTTybbN7i2s44jGhryLsMKpioCAqBv6lHsOuWTeZcxoR3xxJ15l2B2SCZNmsTevXsHjKuZD1KbmSXKwyFtXG0cEGZmlsoBYWaWmDJlypDjauOAMDNLLFu2bMB4+fLlOVVSDA4IM7NEc3PzgPGcOXNyqqQYHBBmZolrrrlmwHhwR1FtHBBmZomNGzcOGN933305VVIMVXEdRHd3NzVvvubz9DNW8+bLdHf3ZrLtZ3sqa6qNF9/s/9vrmKl9OVcyMs/2TOLkvIuwwqmKgLDK1tjYmHcJB213MlXKETNn5VzJyJxM5X2fx2sKnbGeOqaSpqOpioBoaGjgxbdqfSV1xo544k4aGo4d8+1Wyn+mcqWaV6xYkXMl+cvqF3l3dzc7d+4c8+0OVpoXbax0d3dn8v3IIniqIiDMLD9dXV08svURmD7GGxYwdYy3ORXYXjauhx7Gdg6wHnrY/i/bh1/xYPxubDdXUjUBUfPmKxV1DEK7XgcgjpiWcyUj1z9Z39h3EFbZuru7oZfMfollRlROzb3J93mMVUVAVNq+VYBt294AYNZJlfQL99iK/F5btqZPn57JrqC33nqLvr6xPwmgj/5t1qgGMpiKqaamhsMPP3xsNzq5//s81qoiILwP2yw/N910UybbzfLYBvQfu8yCD1KbVYAsz4LJ8oZPlfQLJkv+HmSvcBfKSTpb0q8kdUm6Mu96zEZjypQpVT/Rm1W+QnUQkiYB3wE+BnQDD0taHxFP5FuZTUT+C9RsaIUKCKAZ6IqIpwAk3QqcAxQ2ILLaTeFdFGaWN0VE3jXsI+nTwNkR8Z+T8QLgQxFxadk6i4BFyfDdwK/GvdDx805gR95F2Kj551e5JvrP7g8jon64lYrWQShl2YAEi4hVwKrxKSdfkjojoinvOmx0/POrXP7Z9SvaQepuYEbZuAF4PqdazMyqWtEC4mFglqQTJU0GzgfW51yTmVlVKtQupojolXQpcC8wCbgpIrbmXFaeqmJX2gTmn1/l8s+Ogh2kNjOz4ijaLiYzMysIB4SZmaVyQBSQpC9L2ippi6RHJX0o75ps5CQdK+lWSb+R9ISkuyX5jp4VQFKDpDskbZP0lKRvSxrjqVcrhwOiYCR9GPgkcFpEnAq0As/lW5WNlCQB64BNEXFSRJwCLAaOybcyG07ys7sd+PuImAXMAqYAX8u1sBwV6iwmA+A4YEdEvAUQERP5as6JaC6wJyL+prQgIh7NsR4bubOAXRHxtwARsVfSF4FnJH05Isb21nIVwB1E8fwDMEPSryV9V9IZeRdkB+X9wOa8i7BReR+DfnYR8TrwNFCVd8JyQBRM8lfKHPrnm9oO/EDS53Ityqw6iEFT+5Qtr0oOiAKKiL0RsSkirgEuBf4875psxLbSH/BWebYCA+ZfkjSN/uNHE3lS0ANyQBSMpHdLmlW2aDbwTF712EF7ADhc0kWlBZL+yLsKK8L9wFRJF8C++9NcD3w7Isb+ptoVwAFRPHVAe3J65BbgFKAt35JspKJ/aoJ5wMeS01y30v/z86STBVf2s/u0pG3Ay0BfRFyXb2X58VQbZmYpJP07YC1wXkRU5YkHDggzM0vlXUxmZpbKAWFmZqkcEGZmlsoBYWZmqRwQVpUk7U1myi19XXkQ7z1T0p2H+PmbJDUNv2Y2n282Ep6sz6rVzoiYnccHJxdgmRWeOwizMpKelrRc0k8ldUo6TdK9yUVv/6Vs1WmS1iUXNP6NpJrk/SuT922V9JVB271a0kPAZ8qW10hql3RtMv6T5LN/IemHkuqS5WdL+mXy/vPG5ZthVc8BYdVqyqBdTH9Z9tpzEfFh4P8C3wc+DZwO/HXZOs3AFcC/BU7i7V/aX46IJuBU4AxJp5a9Z1dE/HFE3JqMa4FbgF9HxBJJ7wSWAK0RcRrQCfyVpCOA1cCfAf8eOHaMvgdmQ/IuJqtWQ+1iWp88PgbURcQbwBuSdkmanrzWERFPAUhaC/wx8HfAX0haRP//rePonyplS/KeHwz6nBuA28qmcjg9Wf8f++9dw2Tgp8B7gN9GxLbk8/43/bP9mmXKAWG2v7eSx76y56Vx6f/M4CkIQtKJwH8D/igiXpX0feCIsnV+P+g9/wTMlXR9ROyif1rp+yJifvlKkmanfJ5Z5ryLyWx0miWdmBx7+EvgIWAa/SHwmqRjgI8Ps40bgbuBH0qqBX4GfERSI4Ckqcm9rH8JnCjppOR981O3ZjbG3EFYtZoiqfxWoPdExIhPdaV/189X6T8G8SCwLiL6JD1C/30FngL+cbiNRMTXJf0BsAb4LPA5YK2kw5NVlkTEr5PdVndJ2kF/GL3/IGo1GxVP1mdmZqm8i8nMzFI5IMzMLJUDwszMUjkgzMwslQPCzMxSOSDMzCyVA8LMzFL9f1LBoZmsHDwLAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# use boxplot to visualize the distribution of Fare for each Embarked\n", + "sns.boxplot('Embarked','Fare',data=df)\n", + "plt.ylim(0, 300) # ignore one data point with Fare > 500\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "_cell_guid": "c4eb229d-67fc-423f-8d67-5d415e30ee53", + "_execution_state": "idle", + "_uuid": "c025b7ada92e44d73f768eaa49d39c49fb1dfd0c" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Fare
Embarked
C62.336267
Q12.409012
S27.418824
\n", + "
" + ], + "text/plain": [ + " Fare\n", + "Embarked \n", + "C 62.336267\n", + "Q 12.409012\n", + "S 27.418824" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the correlation between Embarked and Fare\n", + "df[['Embarked', 'Fare']].groupby(['Embarked']).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "_cell_guid": "e34bef31-0dea-4c44-877e-98a72bd53036", + "_execution_state": "idle", + "_uuid": "af43b5c1114f3912f0c6b6015e53c12affb86ce5" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Fare
Embarked
C1.350514
Q1.097278
S1.352954
\n", + "
" + ], + "text/plain": [ + " Fare\n", + "Embarked \n", + "C 1.350514\n", + "Q 1.097278\n", + "S 1.352954" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# divide the standard deviation by the mean. A lower ratio means a tighter \n", + "# distribution of Fare in each Embarked\n", + "df[['Embarked', 'Fare']].groupby(['Embarked']).std() / df[['Embarked', 'Fare']].groupby(['Embarked']).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "_cell_guid": "16822339-ab02-4799-b4ba-8c2ae824bb03", + "_execution_state": "idle", + "_uuid": "f277be6f1b7c6e16d77dc0353f885f28906e229a" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SurvivedFare
00.022.117887
11.048.395408
\n", + "
" + ], + "text/plain": [ + " Survived Fare\n", + "0 0.0 22.117887\n", + "1 1.0 48.395408" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "guess_Fare = df.Fare.loc[ (df.Ticket == '3') & (df.Pclass == 3) & (df.Embarked == 'S')].median()\n", + "df.Fare.fillna(guess_Fare , inplace=True)\n", + "\n", + "# inspect the mean Fare values for people who died and survived\n", + "df[['Fare', 'Survived']].groupby(['Survived'],as_index=False).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "_cell_guid": "e8477ab2-68ca-4632-835f-ba8df5a6ba36", + "_execution_state": "idle", + "_uuid": "2e55f9e20c72a0d8621b838ed15386c7ce3be9b4" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\HP-PC\\Anaconda3\\lib\\site-packages\\seaborn\\axisgrid.py:230: UserWarning: The `size` paramter has been renamed to `height`; please update your code.\n", + " warnings.warn(msg, UserWarning)\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAd4AAAEYCAYAAADyL5dqAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAFepJREFUeJzt3X+w5XV93/Hnq4AmEcsPWZgVMAt0cUKauOItYmhSHUoEmriaBIM1uk2Yrp2BjlKdBqST0M44wQZldCKYtTAujggYddxpCbIlWpNO+LHQ5ceCwAY2sLJZFkzBn8TFd/8436tnb8/d++vczz3n8nzMnDnf8znf7+e8z/d+97z2+ztVhSRJauMfLXUBkiS9mBi8kiQ1ZPBKktSQwStJUkMGryRJDRm8kiQ1ZPBKktSQwStJUkMGryRJDR241AUAnHnmmXXzzTcvdRmSpKWTpS6glZFY43366aeXugRJkpoYieCVJOnFwuCVJKkhg1eSpIYMXkmSGjJ4JUlqyOCVJKkhg1eSpIYMXkmSGjJ4JUlqaCQuGTksV2x+eMF9XHjGiUOoRJKkwVzjlSSpIYNXkqSGDF5JkhoyeCVJasjglSSpIYNXkqSGDF5JkhoyeCVJasjglSSpIYNXkqSGDF5JkhqaMXiTHJvkq0keTLItyXu79kuTfDPJ1u5xdt80FyfZnuShJG9ezC8gSdI4mc1NEvYC76+qu5O8HLgryebuvSuq6vL+kZOcBJwL/DzwSuB/Jjmxql4YZuGSJI2jGdd4q2pXVd3dDX8beBA4ej+TrAWur6rnq+oxYDtwyjCKlSRp3M1pH2+SVcBrgdu7pguS3JvkmiSHdW1HA0/0TbaT/Qe1JEkvGrMO3iQHA18A3ldVzwFXAScAa4BdwEcmRx0weQ3ob32SLUm27NmzZ86FS5I0jmYVvEkOohe6n62qLwJU1e6qeqGqfgR8ip9sTt4JHNs3+THAk1P7rKoNVTVRVRMrVqxYyHeQJGlszOao5gBXAw9W1Uf72lf2jfY24P5ueBNwbpKXJjkOWA3cMbySJUkaX7M5qvk04F3AfUm2dm0fBN6RZA29zcg7gPcAVNW2JDcCD9A7Ivp8j2iWJKlnxuCtqr9i8H7bm/YzzYeADy2gLkmSliWvXCVJUkMGryRJDRm8kiQ1ZPBKktSQwStJUkMGryRJDRm8kiQ1ZPBKktSQwStJUkMGryRJDRm8kiQ1ZPBKktSQwStJUkMGryRJDRm8kiQ1ZPBKktSQwStJUkMGryRJDRm8kiQ1ZPBKktSQwStJUkMGryRJDRm8kiQ1ZPBKktSQwStJUkMGryRJDRm8kiQ1ZPBKktSQwStJUkMGryRJDRm8kiQ1ZPBKktTQjMGb5NgkX03yYJJtSd7btR+eZHOSR7rnw7r2JPl4ku1J7k1y8mJ/CUmSxsVs1nj3Au+vqp8DTgXOT3IScBFwa1WtBm7tXgOcBazuHuuBq4ZetSRJY2rG4K2qXVV1dzf8beBB4GhgLbCxG20j8NZueC1wbfXcBhyaZOXQK5ckaQzNaR9vklXAa4HbgaOqahf0whk4shvtaOCJvsl2dm2SJL3ozTp4kxwMfAF4X1U9t79RB7TVgP7WJ9mSZMuePXtmW4YkSWNtVsGb5CB6ofvZqvpi17x7chNy9/xU174TOLZv8mOAJ6f2WVUbqmqiqiZWrFgx3/olSRorszmqOcDVwINV9dG+tzYB67rhdcCX+9rf3R3dfCrw7OQmaUmSXuwOnMU4pwHvAu5LsrVr+yBwGXBjkvOAx4FzuvduAs4GtgPfA353qBVLkjTGZgzeqvorBu+3BTh9wPgFnL/AuiRJWpa8cpUkSQ0ZvJIkNWTwSpLUkMErSVJDBq8kSQ0ZvJIkNWTwSpLUkMErSVJDBq8kSQ0ZvJIkNWTwSpLUkMErSVJDBq8kSQ0ZvJIkNWTwSpLUkMErSVJDBq8kSQ0ZvJIkNWTwSpLUkMErSVJDBq8kSQ0ZvJIkNWTwSpLUkMErSVJDBq8kSQ0ZvJIkNWTwSpLUkMErSVJDBq8kSQ0ZvJIkNWTwSpLUkMErSVJDMwZvkmuSPJXk/r62S5N8M8nW7nF233sXJ9me5KEkb16swiVJGkezWeP9NHDmgPYrqmpN97gJIMlJwLnAz3fTXJnkgGEVK0nSuJsxeKvq68C3ZtnfWuD6qnq+qh4DtgOnLKA+SZKWlYXs470gyb3dpujDurajgSf6xtnZtUmSJOYfvFcBJwBrgF3AR7r2DBi3BnWQZH2SLUm27NmzZ55lSJI0XuYVvFW1u6peqKofAZ/iJ5uTdwLH9o16DPDkNH1sqKqJqppYsWLFfMqQJL1IJLkkybZuS+vWJK8fQp9vSXLRkOr7zmzHPXCeH7CyqnZ1L98GTB7xvAm4LslHgVcCq4E75vMZkiQBJHkD8GvAyVX1fJIjgJfMctoDq2rvoPeqahO93GpqxuBN8jngjcARSXYCfwi8MckaepuRdwDvAaiqbUluBB4A9gLnV9ULi1O6JOlFYiXwdFU9D1BVTwMk2QFMVNXTSSaAy6vqjUkupbfytwp4OskJwO9V1bZuuq8B7wd+AZgALgHuAY6vqh8l+RngIeB44FXAJ4AVwPeAf1tV30hyHHAdvRy9eS5fZjZHNb+jqlZW1UFVdUxVXV1V76qqX6iqX6yqt/St/VJVH6qqE6rq1VX153MpRpKkAW4Bjk3ycJIrk/yLWUzzOmBtVf1r4Hrg7dDbYgu8sqrumhyxqp6lF7yT/f468JWq+iGwAfj3VfU64APAld04HwOuqqp/BvzdXL6MV66SJI20qvoOvSBdD+wBbkjyb2aYbFNVfb8bvhE4pxt+O/D5AePfAPx2N3xu9xkHA78EfD7JVuBP6a19A5wGfK4b/sxcvs+89vFKktRSt9vya8DXktwHrKO3S3NyBfKnpkzy3b5pv5nkmSS/SC9c3zPgIzYBf5TkcHoh/xfAy4D/W1VrpitrPt/FNV5J0khL8uokq/ua1gB/S+8Yo9d1bb85QzfXA/8ROKSq7pv6ZrdWfQe9Tcj/vTtz5zngsSTndHUkyWu6Sf43vTVjgHfO5fsYvJKkUXcwsDHJA0nuBU4CLgX+M/CxJH8JzHQg75/RC8ob9zPODcDvdM+T3gmcl+QeYBu9KzQCvBc4P8mdwCFz+TKpmtea8lBNTEzUli1bFtzPFZsfXnAfF55x4oL7kCTN2aALMC1LrvFKktSQwStJUkMGryRJDRm8kiQ1ZPBKktSQwStJUkMGryRJUyQ5M8lDSbYPunVgkpcmuaF7//Ykq2bbt5eMlCSNrFUX/Y9Lh9nfjsv+1Yz9JTmA3h2JzqB3n/k7k2yqqgf6RjsP+Puq+idJzgU+zE+u9bxfrvFKkrSvU4DtVfVoVf0DvctNrp0yzlpgYzf8Z8DpSWZ1ERCDV5KkfR0NPNH3emfXNnCcqtoLPAu8YjadG7ySJO1r0Jrr1Osrz2acgQxeSZL2tRM4tu/1McCT042T5EB6N0r41mw6N3glSdrXncDqJMcleQm9uxptmjLOJnr3BAb4LeAvapZ3HfKoZkmS+lTV3iQXAF8BDgCuqaptSf4LsKWqNgFXA59Jsp3emu650/e4L4NXkjSyZnP6z2KoqpuAm6a0/UHf8A+Ac+bTt5uaJUlqyOCVJKkhg1eSpIYMXkmSGjJ4JUlqyOCVJKkhg1eSpD5JrknyVJL7p3k/ST7e3RLw3iQnz6V/z+OVJI2uSw+5dLj9PTub/j4N/Alw7TTvnwWs7h6vB67qnmfFNV5JkvpU1dfZ/3WX1wLXVs9twKFJVs62f4NXkqS5mc1tA6dl8EqSNDfzviUgGLySJM3VbG4bOK0Zg3fQ0V1JDk+yOckj3fNhXfuCjvSSJGkMbALe3WXeqcCzVbVrthPP5qjmT/P/H911EXBrVV2W5KLu9e+zwCO9RsEVmx8eSj8XnnHiUPqRJLWV5HPAG4EjkuwE/hA4CKCqPknvrkVnA9uB7wG/O5f+Zwzeqvp6klVTmtd2RQFsBL5GL3h/fKQXcFuSQ5OsnMv/BCRJ+rHZnf4zVFX1jhneL+D8+fY/3328R02Gafd8ZNe+oCO9JEla7oZ9cNWsj/RKsj7JliRb9uzZM+QyJEkaTfMN3t2TJwt3z0917bM+0quqNlTVRFVNrFixYp5lSJI0XuYbvJuAdd3wOuDLfe3zPtJLkqTlbsaDq6Y5uusy4MYk5wGPA+d0oy/oSC9Jkpa72RzVPN3RXacPGHdBR3pJkrTceeUqSZIaMnglSWrI4JUkqSGDV5KkhgxeSZIaMnglSWrI4JUkqSGDV5KkhgxeSZIaMnglSWrI4JUkqSGDV5KkhgxeSZIamvHuRJqfKzY/vOA+LjzjxCFUIkkaJQbvFKc+vmEo/dz2qvVD6UeStLy4qVmSpIYMXkmSGjJ4JUlqyOCVJKkhg1eSpIYMXkmSGjJ4JUlqaFmdxzusc3AlSVosrvFKktSQwStJUkMGryRJDRm8kiQ1ZPBKktSQwStJUkPL6nSiUTKcU5suH0IfkqRR4hqvJEkNGbySJDVk8EqS1NCC9vEm2QF8G3gB2FtVE0kOB24AVgE7gLdX1d8vrExJkpaHYazxvqmq1lTVRPf6IuDWqloN3Nq9liRJLM6m5rXAxm54I/DWRfgMSZLG0kKDt4BbktyVZH3XdlRV7QLono8cNGGS9Um2JNmyZ8+eBZYhSdJ4WOh5vKdV1ZNJjgQ2J/nGbCesqg3ABoCJiYlaYB2SJI2FBa3xVtWT3fNTwJeAU4DdSVYCdM9PLbRISZKWi3kHb5KXJXn55DDwq8D9wCZgXTfaOuDLCy1SkqTlYiGbmo8CvpRksp/rqurmJHcCNyY5D3gcOGfhZUqStDzMO3ir6lHgNQPanwFOX0hRkiQtV165SpKkhgxeSZIaMnglSWrI4JUkqSGDV5KkhhZ65Sotois2PzyUfi4848Sh9CNJWjjXeCVJasjglSSpIYNXkqSGDF5JkhoyeCVJasjglSSpIU8nGmGnPr5hSD1dPqR+JEkLZfBqdr76R8Pp500XD6cfSRpTbmqWJKkhg1eSpIYMXkmSGjJ4JUlqyOCVJKkhg1eSpIYMXkmSGvI8Xo2nYZxX7DnFkpaAa7ySJDVk8EqS1JCbmvXi5WUwJS0Bg1ez8tePPjOUft7wpqF0M5R63nD8K4ZQiSTNjcH7InDF5ocX3MepQ6gDhlMLDK8eSWrNfbySJDVk8EqS1JCbmqWF8pxiSXNg8KqpUx/fsNQlqBX/QyINZPBKy4lhJ428RQveJGcCHwMOAP5bVV22WJ+l/XMtU3MyrPObR8hfX/2BBffxhvMuH0Il0iIFb5IDgE8AZwA7gTuTbKqqBxbj86T5GNq5yUM4H3gYwTAsI3V+8yj9J8ALrmhIFmuN9xRge1U9CpDkemAtYPBq2RlWgC83XuREGmyxgvdo4Im+1zuB1/ePkGQ9sL57+Z0kDw3hc48Anh5CP62MU73jVCtY72Kz3nn74GxGGqF6Z2UY9d5cVWcOo5hRt1jBmwFttc+Lqg3AUHc+JtlSVRPD7HMxjVO941QrWO9is97FZb3L22JdQGMncGzf62OAJxfpsyRJGhuLFbx3AquTHJfkJcC5wKZF+ixJksbGomxqrqq9SS4AvkLvdKJrqmrbYnzWFON23sw41TtOtYL1LjbrXVzWu4ylqmYeS5IkDYU3SZAkqSGDV5KkhpZF8CY5M8lDSbYnuWip65kqybFJvprkwSTbkry3a780yTeTbO0eZy91rZOS7EhyX1fXlq7t8CSbkzzSPR+21HUCJHl13zzcmuS5JO8bpfmb5JokTyW5v69t4PxMz8e75fneJCePSL1/nOQbXU1fSnJo174qyff75vMnR6Teaf/+SS7u5u9DSd48ArXe0FfnjiRbu/ZRmLfT/X6N7PI78qpqrB/0Dt76G+B44CXAPcBJS13XlBpXAid3wy8HHgZOAi4FPrDU9U1T8w7giClt/xW4qBu+CPjwUtc5zfLwd8DPjtL8BX4FOBm4f6b5CZwN/Dm98+FPBW4fkXp/FTiwG/5wX72r+scbofk78O/f/du7B3gpcFz3+3HAUtY65f2PAH8wQvN2ut+vkV1+R/2xHNZ4f3x5yqr6B2Dy8pQjo6p2VdXd3fC3gQfpXd1r3KwFNnbDG4G3LmEt0zkd+Juq+tulLqRfVX0d+NaU5unm51rg2uq5DTg0yco2lfYMqreqbqmqvd3L2+idnz8Sppm/01kLXF9Vz1fVY8B2er8jTeyv1iQB3g58rlU9M9nP79fILr+jbjkE76DLU45sqCVZBbwWuL1ruqDbHHPNqGy67RRwS5K70ru8J8BRVbULev8YgSOXrLrpncu+P1qjOn9h+vk5Dsv079Fbq5l0XJL/k+R/JfnlpSpqgEF//1Gev78M7K6qR/raRmbeTvn9Gufld0kth+Cd8fKUoyLJwcAXgPdV1XPAVcAJwBpgF71NTKPitKo6GTgLOD/Jryx1QTNJ72ItbwE+3zWN8vzdn5FeppNcAuwFPts17QJeVVWvBf4DcF2Sf7xU9fWZ7u8/yvP3Hez7H8eRmbcDfr+mHXVA26jM35GwHIJ3LC5PmeQgegvtZ6vqiwBVtbuqXqiqHwGfouHmrplU1ZPd81PAl+jVtntyk1H3/NTSVTjQWcDdVbUbRnv+dqabnyO7TCdZB/wa8M7qduh1m2yf6YbvorfP9MSlq7JnP3//kZy/SQ4EfgO4YbJtVObtoN8vxnD5HRXLIXhH/vKU3X6bq4EHq+qjfe39+z3eBtw/ddqlkORlSV4+OUzvoJr76c3Xdd1o64AvL02F09pnbWFU52+f6ebnJuDd3dGhpwLPTm7SW0pJzgR+H3hLVX2vr31FevfgJsnxwGrg0aWp8if28/ffBJyb5KVJjqNX7x2t6xvgXwLfqKqdkw2jMG+n+/1izJbfkbLUR3cN40HvKLqH6f1v8JKlrmdAff+c3qaWe4Gt3eNs4DPAfV37JmDlUtfa1Xs8vaM+7wG2Tc5T4BXArcAj3fPhS11rX80/AzwDHNLXNjLzl95/CHYBP6S3RnDedPOT3qa6T3TL833AxIjUu53evrvJZfiT3bi/2S0n9wB3A78+IvVO+/cHLunm70PAWUtda9f+aeDfTRl3FObtdL9fI7v8jvrDS0ZKktTQctjULEnS2DB4JUlqyOCVJKkhg1eSpIYMXkmSGjpwqQuQlrMkL9A7pWLSW6tqxxKVI2kEeDqRtIiSfKeqDp7HdAdU1QuLUZOkpeWmZqmx7h6rf5nk7u7xS137G7v7nl5Ht5ac5HeS3NHdi/VPJ69iJGl8ualZWlw/PXlTc+CxqnobvWvanlFVP0iymt6VjCa6cU4B/mlVPZbk54DfpnfDih8muRJ4J3Bt4+8gaYgMXmlxfb+q1kxpOwj4kyRrgBfY96L3d1TvHrHQu7fw64A7e5fL5acZvRtTSJojg1dq70JgN/Aaert7ftD33nf7hgNsrKqLG9YmaZG5j1dq7xBgV/VuV/cuYLr9trcCv5XkSIAkhyf52UY1SlokBq/U3pXAuiS30dvM/N1BI1XVA8B/Am5Jci+wGVg5aFxJ48PTiSRJasg1XkmSGjJ4JUlqyOCVJKkhg1eSpIYMXkmSGjJ4JUlqyOCVJKmh/wdP84s77E+/gAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# visualize the distribution of Fare for people who survived and died\n", + "grid = sns.FacetGrid(df, hue='Survived', size=4, aspect=1.5)\n", + "grid.map(plt.hist, 'Fare', alpha=.5, bins=range(0,210,10))\n", + "grid.add_legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "_cell_guid": "0269e823-86a3-442b-b48c-dc659c8b2005", + "_execution_state": "idle", + "_uuid": "61e09e2b4fc7a0a1f31a3b066e80656794d73702" + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEKCAYAAAD9xUlFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3X903XWd5/Hn+97cJqEJtKS1YpPaasosiG3QLNCpPxDFqegpzrYqCFtnD2fZPQd2HMexwNF1ldnZWYo/Z0CXzizj4HFEtLvQ4bAHHcBR0WJTCZVWkdgCSYulhJQ2Nc3P9/5xv/fbm+Qm997kfu9N7vf1OCcn+X7v98f7c7ncd7+fn+buiIiIACQqHYCIiMwdSgoiIhJSUhARkZCSgoiIhJQUREQkpKQgIiIhJQUREQkpKYiISEhJQUREQjWVDqBYS5Ys8ZUrV1Y6DBGReWXPnj0vu/vSfMfNu6SwcuVKOjo6Kh2GiMi8YmbPF3Kcqo9ERCSkpCAiIiElBRERCSkpiIhISElBRERCSgoiIhJSUhARkVBk4xTM7G7gA8BL7n5BjtcN+CpwBfB74E/c/RdRxZPLXY89y/eePMQblyzk1PAoR0+corE+xcDwGM2L6li5pIFUMsHJoRGWnLGAA72/Z90bzmbV0kYWLkhycmiU5sX1NDXU0ts/SE/fQLid0ds/yL7Dxzk+MMSJUyP09J0EjObFZ7B8cT2vO6uew6+eApzXnVXPyaHR8NoLFyT59e9O8KsXj4XnNNbVcGZ9Kjzv+MBQuJ0dT7Gmij+j68gJOruP0dayiNZljUWdO9NjS6Hc96v0faV6leszFeXgtW8AdwD3TPH6+4DVwc/FwNeD32Vx3mceYmAkvT71sy+dnPT6Uz3HgZcm7f9OR0/4d10q/aD14bc2c9+eHlKJBMNjY2zbtIaNbct5oPMQn7yvk5GxwuOqScDI2OnfxcjEk7l/oR7oPMRNO/ZOij/js/f/knt2vRBub1m3gluvfHNB5xZzn1Ir9/0qfV+pXuX8TEVWfeTuPwJemeaQK4F7PG0XsMjMzokqnmx3PfZsmBBm49TwGKeGx7hn1wucGh7jxOAIp4bH2LpjL11HTrD1e08V/cWeOb7Y87Lj2bpjL739gwWd09s/yE079k6KP3N+15ET4xICwD0/e4GuIyfynlvMfUqt3Per9H2lepX7M1XJNoXlQHfWdk+wbxIzu97MOsys4+jRo7O+8f17X5z1NaaTSiTo7D5G0irz9qYSCXr6Bgo6tqdvgFRifJzZ53d2H8t5Xmf3sbznFnOfUiv3/Sp9X6le5f5MVTIpWI59Of/57u7b3b3d3duXLs07n1NeH1wT7QPJ8NgYbS2LGPUZ/HO/RPdvXlxf0LHNi+sZHhsfZ/b5bS2Lcp7X1rIo77nF3KfUyn2/St9Xqle5P1OVTAo9QEvWdjNwuBw3/k/vWk19Ta6cVJy6VIK6VIIt61ZQl0rQWFtDXSrBtk1raF3WyO2b11JT5DucOb7Y87Lj2bZpTcENUU0NtWzbtGZS/JnzW5c1smXdinHnbFm3gtZljXnPLeY+pVbu+1X6vlK9yv2ZMvfZ161PeXGzlcCDU/Q+ej9wI+neRxcDf+PuF+W7Znt7u5dqllT1PjpNvY+q475SvWb7mTKzPe7enve4qJKCmX0buBRYAhwB/huQAnD3/xV0Sb0D2EC6S+p/cPe83/alTAoiInFRaFKIrEuqu1+d53UHbojq/iIiUjyNaBYRkZCSgoiIhJQUREQkpKQgIiIhJQUREQkpKYiISEhJQUREQkoKIiISUlIQEZGQkoKIiISUFEREJKSkICIiISUFEREJKSmIiEhISUFEREJKCiIiElJSEBGRkJKCiIiElBRERCSkpCAiIiElBRERCSkpiIhISElBRERCSgoiIhJSUihSb/8gT3Ufo7d/sNKhiIiUXE2lA5hPHug8xE079pJKJBgeG2PbpjVsbFte6bBEREpGTwoF6u0f5KYdezk1PMaJwRFODY+xdcdePTGISFVRUihQT98AqcT4tyuVSNDTN1ChiERESk9JoUDNi+sZHhsbt294bIzmxfUVikhEpPQiTQpmtsHMnjGzLjO7OcfrK8zsMTN70sz2mtkVUcYzG00NtWzbtIa6VILG2hrqUgm2bVpDU0NtpUMTESmZyBqazSwJ3AlcDvQAu81sp7vvzzrsM8B97v51MzsfeAhYGVVMs7WxbTnnn3Mmnd3HaGtZROuyxkqHJCJSUlH2ProI6HL3AwBmdi9wJZCdFBw4M/j7LOBwhPHMmnofiUi1i7L6aDnQnbXdE+zL9jngWjPrIf2U8F8ijGdW1PtIROIgyqRgOfb5hO2rgW+4ezNwBfBNM5sUk5ldb2YdZtZx9OjRCELNT72PRCQOokwKPUBL1nYzk6uHrgPuA3D3nwF1wJKJF3L37e7e7u7tS5cujSjc6an3kYjEQZRJYTew2sxWmdkC4Cpg54RjXgDeDWBm55FOCpV5FMhDvY9EJA4ia2h29xEzuxF4GEgCd7v7PjO7Fehw953AJ4G/M7NPkK5a+hN3n1jFNGdsbFvO+tYl9PQN0Ly4XglBRKqOzeHv4Jza29u9o6Oj0mGIiMwrZrbH3dvzHacRzSIiElJSEBGRkKbOnoHe/kF6+gZYuCDJyaHR8Hcl2hkysRR672KPjwO9JyKnKSkUKTOq2cecwVGnJgEjY1CXSj90lXOUc7EjrDUiezK9JyLjqfqoCNmjmgdH0w30I8HQhVPDY2Ud5VzsCGuNyJ5M74nIZEoKRcg1qnmico1yLnaEtUZkT6b3RGQyJYUi5BrVPFG5RjkXO8JaI7In03siMpmSQhGyRzXXJtNTO9UE72BdKlHWUc7FjrDWiOzJ9J6ITKbBazOg3kfVRe+JxEGhg9fU+2gGmhpqZ/zlUeovoGJjmU3s1UrvichpSgplpO6PIjLXqU2hTNT9UUTmAyWFMlH3RxGZD5QUykTdH0VkPlBSKBN1fxSR+UANzWWkRXpEZK5TUigzdX8UkblM1UciIhLSk0Jg4qCy6bYB9h0+Djhvet1Z4b/8oxwZW+prz+R6hZ6jEcIi85eSApMHlX34rc3ct6cn5/bA8AhjDmPB7CCppPHFD63FIbKBaaUe9DaT6xV6jgboicxvsZ/7qLd/kPW3Pcqp4elnP53OgqRhZgyOnL5GXSrB4zddNut/KeeKbzbXnsn1Cj2n1LGKSOkUOvdR7NsUClkjIR8zI5mwcftKNTCt1IPeZnK9Qs/RAD2R+S/21UeFrJGQj7szOuESpRqYVupBbzO5XqHnaICeyPwX+yeFXIPKtqxbMeV2TQKyHwpSSeMLH1rL7ZujGZhW6kFvM7leoedogJ7I/Bf7NoUM9T4q3TnqfSQy9xTapqCkICISA2poFhGRoikpiIhISElBRERCSgoiIhKKNCmY2QYze8bMuszs5imO+bCZ7TezfWb2T1HGIyIi04ts8JqZJYE7gcuBHmC3me109/1Zx6wGbgHWu3ufmb0mqngKMZ+6lIqIRCHKEc0XAV3ufgDAzO4FrgT2Zx3zH4E73b0PwN1fijCeaUU5kZsmiROR+SLK6qPlQHfWdk+wL9u5wLlm9riZ7TKzDRHGM6Xe/kFu2rGXU8NjnBgc4dTwGFt37KW3f3BOX1tEpNSmfVIwsxPAlKPb3P3M6U7PdUqO+68GLgWagR+b2QXufmxCHNcD1wOsWLFiupBnJDOR2ylOz9uTmchttlU9UV5bRKTUpk0K7t4IYGa3Ar8Dvkn6y/4aoDHPtXuAlqztZuBwjmN2ufswcNDMniGdJHZPiGM7sB3SI5rz3LdoUU7kpkniRGQ+KbT66I/c/WvufsLdj7v714FNec7ZDaw2s1VmtgC4Ctg54Zj7gXcBmNkS0tVJBwoPvzSinMhNk8SJyHxSaEPzqJldA9xLugroamB0uhPcfcTMbgQeBpLA3e6+L3jq6HD3ncFr7zWz/cH1PuXuvTMsy6xsbFvO+tYlkfQQivLaIiKlVNCEeGa2EvgqsJ50Ungc+DN3fy7C2HLShHgiIsUrdEK8gp4Ugi//K2cblIiIzG0FtSmY2blm9oiZPR1srzGzz0QbmoiIlFuhDc1/R3rk8TCAu+8l3XAsIiJVpNCkcIa7/3zCvpFSByMiIpVVaFJ42czeSDD4zMw2Ay9GFpWIiFREoV1SbyA9eOzfmNkh4CDpAWyxoQntRCQOCk0Kz7v7e8xsIZBw9xNRBjXXaEI7EYmLQquPDprZduASoD/CeOYcTWgnInFSaFL4A+BfSFcjHTSzO8zsbdGFNXdkJrTLlpnQTkSk2hSUFNx9wN3vc/d/B1wInAn8a6SRzRGa0E5E4qTg9RTM7J1m9jXgF0Ad8OHIoppDNKGdiMRJQQ3NZnYQ6ATuIz1p3clIoyqD3v5Bvr/vd+x5/hWWnVnHH1/YTOuyRnr7B9l3+DjHB4YAOLM+xfnnnMkXNq/l+d5+zl5Yy6IzUvT2D9LUUEvXkRN0dh9jZdMZpGqSk3onZa4HzptedxZAzl5Mmd5NCxckOTk0Ou717Hv8fngsvFYxiWliHFEltTj00iq0jHF4L6T6FNr7aK27H480kjJ6oPMQH7+3c9y+O394gLe1NrHrQC8jY1OcmCWVNC5edTY/6eodty+ZsLB30gOdh/jkfZ3h9RIGyYRRV5Mc14sp07vJx5zBUaculX6A27ZpDR3PvcI9u17Ief8vfmhtQb2gJsZRzLnFiEMvrULLGIf3QqrTtLOkmtlWd99mZn9LjhXY3P1Powwul9nOktrbP8gl/+NfGC7gi3+m6lIJHrzxbbz/b3/M4MjU72/muA/c8RNO5QhoQRKGppmgvLYmwU9vvizvv1b/8H8+MimOQs4tRm//IOtve3RcOepSCR6/qXT3qLRCyxiH90Lmn1LNkvqr4HfVzFWd7jVkTLPK6KylEgk6u4+RtATTLTuROW7icp0ZRgJy7M9IJizvsp49fQM54yjk3GLEYdnRQssYh/dCqle+5Tj/Ofhzr7s/WYZ4IpfuNRRdQoB076S2lkWM+vSPI5njJvZuyvBpEgLA6Jjn7QXVvLg+ZxyFnFuMOPTSKrSMcXgvpHoV2vvoS2b2azP7SzN7U6QRRaypoZYvfrgt52tvb22ipsB3JJU03t7aNGlfpndS67JGbt+8dtz1EpY+JrsXU+uyxrB3U23SgHRVQ10qwRc+1MaWdSumvP/tm/P3gmpqqJ0UR6HnFiMOvbQKLWMc3gupXgWtvAZgZq8l3Q31I6THKXzH3f97hLHlVKqV1wrtffRy/yC3P/wbTmZV7tfVJNi+pZ13nLtUvY9y3Kfae9yo95HMR4W2KRScFLIu/GZgK/ARd18ww/hmrNzLcarRUESqQaFJodCV184zs88FK6/dAfwUaJ5ljPOCqgJEJE4KHafwD8C3gfe6++EI45mTNrYtZ33rElUFiEjVy5sUzCwJ/Nbdv1qGeOaspoZaJQMRqXp5q4/cfRRoMrOytx9Erbd/kKe6j2kabBGRQMGL7ACPm9lOIJz3yN2/FElUZaBpCEREJit0nMJh4MHg+Masn3lJC+eIiORW0JOCu38+6kDKqdzTEKi/uojMF4VOnf0YuSfEu6zkEZVBOachUDWViMwnhbYp/EXW33XAJmCk9OGUR2bswdYJX9al/ld8djVV5qlk6469rG9doicGEZmTCq0+2jNh1+NmNq+X4yzH2APNliki802h1UdnZ20mgHbgtZFEVEZRjz3QbJkiMt8U2vtoD+k1FTpIT3Hx58B1+U4ysw1m9oyZdZnZzdMct9nM3Mzyzssxn2iKDBGZb6Z9UjCzfwt0u/uqYPtjpNsTngP25zk3CdwJXA70ALvNbKe7759wXCPwp8ATMyzDnKYpMkRkPsn3pHAXMARgZu8A/hr4R+BVYHuecy8Cutz9gLsPAfcCV+Y47i+BbcCpIuKeV5oaalnbskgJQUTmvHxJIenurwR/fwTY7u473P2/Aq15zl0OdGdt9wT7QmZ2IdDi7g9OdyEzu97MOsys4+jRo3luKyIiM5U3KZhZporp3cCjWa/la6S2HPvCsQ5mlgC+DHwyX5Duvt3d2929fenSpfkOnxHNgyQikv+L/dvAv5rZy8AA8GMAM2slXYU0nR6gJWu7mfR0GRmNwAXAD80M0r2ZdprZRncv3yo6wLd2Pc/nH9zPgqQxPDrGje9azUcvXqHqHhGJnbwrr5nZJcA5wPfd/WSw71ygwd1/Mc15NcBvSD9hHAJ2Ax91931THP9D4C/yJYRSr7z2rV3P8+n7n560v7Ymwe2bNfpYRKpDoSuv5R2n4O67cuz7TQHnjZjZjcDDQBK42933mdmtQIe778x3jaj19g/y+X/OmaMYHBnT6GMRiZ1Cp7mYEXd/CHhowr7PTnHspVHGkktP3wCpZIKh0dGcr2v0sYjETaGD16pS8+J6RqepPtPoYxGJm1gnBYAbLm2ltibBwgVJkgappGn0sYjEVqTVR3NZ9pTW4Pznd7by0YtXAGj0sYjEViyfFCauvDY44tz5wy5Ao49FJN5imRQyU1pnyzQqZ8s1oE2D3ESkmsWy+qh5cT0nh8avEXRyaGRco3KuFdMctIqaiFS1WCaFvpNDjE3odDTm6f1NDbU5V0z71PeeAozBEa2iJiLVK5bVR53dx6bdn6t6KWkJkonx0znlqnISEZnPYvmk0NayaNr9uVZMG/Ux8PFJQeMYRKTaxPJJoXVZI1vWrRi3b8u6FbQuawRyr5h2++a13L5Zq6iJSHXLOyHeXFPKCfG6jpygs/sYbS2LwoSQrbd/cNKYhVz7RETmupJNiFfNWpc15kwGGU0NtZO++HPtExGpFrGsPhIRkdyUFEREJKSkkEWjlUUk7mLdppAt1whmjVYWkbjRkwKTJ8g7NZxedU1PDCISN7F+Ush0L311YIhUIhFOXwFadU1E4im2SSG7umhodHTSXEgarSwicRTLpJBrwruaBNTWJFiQPN2moKcEEYmbWCaFzIR32dVF9aka7rzmLZxVn9JoZRGJrVg2NOea8G5odBRwJQQRibVYJoWmhlpWnD2+vWBoxLnhW0+y/rZH2dl5qEKRiYhUViyTQsfBXn5z5OS4fQ7qjioisRfLpPCjZ1+e9nUtniMicRXLpPCO1UumfV3dUUUkrmKZFFYtbWDCypoALKxNUltj3HBpa/mDEhGZA2KZFHr6Bli4YHJv3LXLFwHG9h8dUIOziMRSLJNC8+L6oAvqeD890MvgiOY/EpH4imVSaGqo5cZ3rc57nBqcRSRuIk0KZrbBzJ4xsy4zuznH639uZvvNbK+ZPWJmr48ynmwfvXgFtTXTF39ig7PWWxCRahdZUjCzJHAn8D7gfOBqMzt/wmFPAu3uvgb4HrAtqngmamqo5fbNa6hLJWisraEulWDLuhXjtrPnP3qg8xDrb3uUa//+CbU3iEjVinLuo4uALnc/AGBm9wJXAvszB7j7Y1nH7wKujTCeSTa2LWd96xJ6+gbC6S0+/u5zx21D7gn0tu7Yy/rWJZoSQ0SqSpTVR8uB7qztnmDfVK4D/l+uF8zsejPrMLOOo0ePlizAzHoK2QmgqaGWtS2Lxn3ZZybQy6b2BhGpRlE+KeQYCYDn2IeZXQu0A+/M9bq7bwe2A7S3t+e8RrGKWX4z1wR6GuAmItUoyieFHqAla7sZODzxIDN7D/BpYKO7l6UFt9jlN5saatm2ac2U7Q0iItUiyieF3cBqM1sFHAKuAj6afYCZXQjcBWxw95cijGWcXOsp5Ft+M1f7g4hItYksKbj7iJndCDwMJIG73X2fmd0KdLj7TuB2oAH4rpkBvODuG6OKKWOm1UFNDbVKBiJS1SJdec3dHwIemrDvs1l/vyfK+08lUx20dUKbgr7wRSTuYrkcJ6g6SEQkl9gmBVB1kIjIRLGc+2gqmsZCROIu1k8K2YoZtyAiUq30pEDx4xZERKqVkgKaxkJEJENJgfzjFtTWICJxEeukkPmyB6acxkJTZotInMS2oTlXw/LjN102btyCpswWkbiJ5ZPCVA3LwLhps9XWICJxE8ukUOiXvabMFpG4iWVSKPTLXlNmi0jcxLJNoZgJ8TRHkojESSyTAhT3Za85kkQkLmKbFEBf9iIiE8WyTSGj68gJvtfRTdeRE5UORURkTojtk8Jn7/8l9+x6Idzesm4Ft1755gpGJCJSebF5UsieqqLryIlxCQHgnp+9oCcGEYm9WDwpTBy9vOnC5pzHdXYfo3VZY5mjExGZO6o+KeSaquK7e17IeWxby6JyhiYiMudUffVRrtHLtTU1vP+C147bt2XdCj0liEjsVf2TwlSjl2/94AV84vJz6ew+RlvLIiUEERFi8KQw3VQVrcsa2dzeooQgIhKo+icF0FQVIiKFikVSAI1eFhEpRNVXH01Hy2yKiIwXmyeFiXKtvLaxbXmlwxIRqahYPilMtfKanhhEJO5imRS0zKaISG6RJgUz22Bmz5hZl5ndnOP1WjP7TvD6E2a2Msp4MrTMpohIbpG1KZhZErgTuBzoAXab2U5335912HVAn7u3mtlVwG3AR6KKKaPv5BDLF9Xy26OnnwwaUkmu2f5TXvn9MG9ZsYg3vqaBgaHTiaN+QZJUMsHL/YMsXFDDyaEREhjHB4e59Nyl1CSTPN/bz+ubGlj3xib6Tg7R2X2MxWek6Pv9cM4Bch0He/nRsy/zjtVLaF/VBKSrtnr6Bli4IMnJodHwt7rSzkzm/Zz4/k21XyTuzN2jubDZOuBz7v5HwfYtAO7+11nHPBwc8zMzqwF+Byz1aYJqb2/3jo6OGcc1ccrscsqenvvav9/FT7p6w9fe3trE5vYWbtqxFx9zBkedmgSMjEFdKv1Ap8bw4kzVmUCdDCSOzGyPu7fnOy7K6qPlQHfWdk+wL+cx7j4CvAo0RRVQrimzyykzPXfHwd5xCQHgx129fOq76cbvwdF0ThwJHlRODY+pMbxIU3Um6DpyQp0MRKYRZVKwHPsmPgEUcgxmdr2ZdZhZx9GjR2ccUGf3sRmfWyqd3cf40bMvT/Hq9E9tagwv3FSdCTq7j6mTgcg0okwKPUBL1nYzcHiqY4Lqo7OAVyZeyN23u3u7u7cvXbp0xgHNhamx21oW8Y7VS6Z4NVeOPE2N4YWbqjNBW8sidTIQmUaUSWE3sNrMVpnZAuAqYOeEY3YCHwv+3gw8Ol17wmy1Lmtky7oVUV0+r8z03O2rmnh76/hasre3NvGFD6Un7qtNppNDTfBfpy6VGDeRn+Q31USIrcsap5wgUUQibGgGMLMrgK8ASeBud/8rM7sV6HD3nWZWB3wTuJD0E8JV7n5gumvOtqEZ0m0Ln75/L08cPF2dtKqpntqkqfdRlVHvI5G0QhuaI00KUShFUhARiZu50PtIRETmGSUFEREJKSmIiEhISUFEREJKCiIiElJSEBGRkJKCiIiE5t04BTM7Cjw/w9OXAFNNPFSN4lTeOJUV4lVelbU0Xu/ueecJmndJYTbMrKOQwRvVIk7ljVNZIV7lVVnLS9VHIiISUlIQEZFQ3JLC9koHUGZxKm+cygrxKq/KWkaxalMQEZHpxe1JQUREphGbpGBmG8zsGTPrMrObKx3PbJnZ3Wb2kpk9nbXvbDP7gZk9G/xeHOw3M/uboOx7zewtlYu8eGbWYmaPmdmvzGyfmX082F+t5a0zs5+b2VNBeT8f7F9lZk8E5f1OsHgVZlYbbHcFr6+sZPwzYWZJM3vSzB4Mtqu5rM+Z2S/NrNPMOoJ9c+azHIukYGZJ4E7gfcD5wNVmdn5lo5q1bwAbJuy7GXjE3VcDjwTbkC736uDneuDrZYqxVEaAT7r7ecAlwA3Bf79qLe8gcJm7rwXagA1mdglwG/DloLx9wHXB8dcBfe7eCnw5OG6++Tjwq6ztai4rwLvcvS2r++nc+Sy7e9X/AOuAh7O2bwFuqXRcJSjXSuDprO1ngHOCv88Bngn+vgu4Otdx8/EHeAC4PA7lBc4AfgFcTHpQU02wP/xMAw8D64K/a4LjrNKxF1HGZtJfhJcBD5JerLwqyxrE/RywZMK+OfNZjsWTArAc6M7a7gn2VZtl7v4iQPD7NcH+qil/UF1wIfAEVVzeoDqlE3gJ+AHwW+CYu48Eh2SXKSxv8PqrwPhFwOe2rwBbgcz6t01Ub1kBHPi+me0xs+uDfXPms1wT5cXnEMuxL07drqqi/GbWAOwA/szdj5vlKlb60Bz75lV53X0UaDOzRcD/Bc7LdVjwe96W18w+ALzk7nvM7NLM7hyHzvuyZlnv7ofN7DXAD8zs19McW/byxuVJoQdoydpuBg5XKJYoHTGzcwCC3y8F++d9+c0sRTohfMvd/0+wu2rLm+Hux4Afkm5LWWRmmX/IZZcpLG/w+lnAK+WNdMbWAxvN7DngXtJVSF+hOssKgLsfDn6/RDrhX8Qc+izHJSnsBlYHPRoWAFcBOyscUxR2Ah8L/v4Y6br3zP4tQU+GS4BXM4+q84GlHwn+N/Ard/9S1kvVWt6lwRMCZlYPvId0I+xjwObgsInlzbwPm4FHPaiAnuvc/RZ3b3b3laT/v3zU3a+hCssKYGYLzawx8zfwXuBp5tJnudKNLmVs3LkC+A3putlPVzqeEpTn28CLwDDpf01cR7pu9RHg2eD32cGxRrr31W+BXwLtlY6/yLK+jfQj816gM/i5oorLuwZ4Mijv08Bng/1vAH4OdAHfBWqD/XXBdlfw+hsqXYYZlvtS4MFqLmtQrqeCn32Z76K59FnWiGYREQnFpfpIREQKoKQgIiIhJQUREQkpKYiISEhJQUREQnEZ0SwyY2Y2Sro7YMYH3f25CoUjEil1SRXJw8z63b1hBuclPT1dhci8oeojkRkws5Vm9mMz+0Xw84fB/kuDtR/+ieDpwsyuDdZH6DSzu4Kp3EXmJFUfieRXH8xYCnDQ3f+Y9Nw0l7v7KTNbTXqEeWZu/IuAC9z9oJmdB3yE9CRow2b2NeAa4J4yl0GkIEoKIvkNuHvbhH0p4A4zawNGgXOzXvu5ux8M/n438FZgdzCraz2nJzsTmXOUFERm5hPAEWAt6WrYU1mvncz624B/dPdbyhibyIypTUGMcA/pAAAAeUlEQVRkZs4CXnT3MeDfA1O1EzwCbA7mzs+sxfv6MsUoUjQlBZGZ+RrwMTPbRbrq6GSug9x9P/AZ0itt7SW9ito5ZYtSpEjqkioiIiE9KYiISEhJQUREQkoKIiISUlIQEZGQkoKIiISUFEREJKSkICIiISUFEREJ/X9rMRaTTBnl3QAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# visualize the correlation between Fare and Survived using a scatter plot\n", + "df[['Fare', 'Survived']].groupby(['Fare'],as_index=False).mean().plot.scatter('Fare','Survived')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "_cell_guid": "dfa08997-c349-4a56-b69d-1b32b9945730", + "_execution_state": "idle", + "_uuid": "5cd244995e5442dc614ce8dbcf4907d66b285345" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Fare-binSurvived
010.217877
120.201087
230.426901
340.443243
450.645349
\n", + "
" + ], + "text/plain": [ + " Fare-bin Survived\n", + "0 1 0.217877\n", + "1 2 0.201087\n", + "2 3 0.426901\n", + "3 4 0.443243\n", + "4 5 0.645349" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# bin Fare into five intervals with equal amount of people\n", + "df['Fare-bin'] = pd.qcut(df.Fare,5,labels=[1,2,3,4,5]).astype(int)\n", + "\n", + "# inspect the correlation between Fare-bin and Survived\n", + "df[['Fare-bin', 'Survived']].groupby(['Fare-bin'], as_index=False).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "_cell_guid": "cf7344e8-76ba-4ce7-807e-272f49272423", + "_execution_state": "idle", + "_uuid": "dc0da43c475aa71894141aa6578e22b01a99750e" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1014" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# check if there is any NAN\n", + "df.Cabin.isnull().sum(axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "_cell_guid": "eed848fb-b155-41b2-be78-d04334111893", + "_execution_state": "idle", + "_uuid": "742f1b8d7a6e09087135754b14da0e9137b9a981" + }, + "outputs": [], + "source": [ + "df = df.drop(labels=['Cabin'], axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "a61221c4-6b5b-489b-a124-35f2b3fae207", + "_execution_state": "idle", + "_uuid": "3506dbb251461b09ede6f1ece58c7f15340c6628" + }, + "source": [ + "### Embarked" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "_cell_guid": "8a8baacb-bcc3-439f-a540-0f5baac20b91", + "_execution_state": "idle", + "_uuid": "e1d3efbfad7ee51433326e86de59e2b9e8d5fb21" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# check if there is any NAN\n", + "df.Embarked.isnull().sum(axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "_cell_guid": "f7df232f-1f39-4aa6-8285-dcaa0c320e16", + "_execution_state": "idle", + "_uuid": "b21097135f1b6199b0b341ef12a88398b0c8f7ad" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EmbarkedTicketTitle
count130713091309
unique3165
topS3Mr
freq914429757
\n", + "
" + ], + "text/plain": [ + " Embarked Ticket Title\n", + "count 1307 1309 1309\n", + "unique 3 16 5\n", + "top S 3 Mr\n", + "freq 914 429 757" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe(include=['O']) # S is the most common" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "_cell_guid": "d65d5808-4d12-4a6c-8aa5-d67bd800a6fe", + "_execution_state": "idle", + "_uuid": "0833afac0ae691222f5436be2645a8e9120ca736" + }, + "outputs": [], + "source": [ + "# fill the NAN\n", + "df.Embarked.fillna('S' , inplace=True )" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "_cell_guid": "c5a5acd2-47f4-4eda-8cee-7f5759a80ec1", + "_execution_state": "idle", + "_uuid": "49fe53c86777c9b62b94a5b559ce15006368458d" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EmbarkedSurvivedPclassFareAgeSex
0C0.5535711.85185262.33626732.3321700.418519
1Q0.3896102.89430912.40901228.6300000.487805
2S0.3390092.34497827.51248529.2981510.319869
\n", + "
" + ], + "text/plain": [ + " Embarked Survived Pclass Fare Age Sex\n", + "0 C 0.553571 1.851852 62.336267 32.332170 0.418519\n", + "1 Q 0.389610 2.894309 12.409012 28.630000 0.487805\n", + "2 S 0.339009 2.344978 27.512485 29.298151 0.319869" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the correlation between Embarked and Survived as well as some other features\n", + "df[['Embarked', 'Survived','Pclass','Fare', 'Age', 'Sex']].groupby(['Embarked'], as_index=False).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "_cell_guid": "2a43f956-9690-4869-bd67-442c338bc57a", + "_execution_state": "idle", + "_uuid": "1426a18aa9b7a7dd0f18b260ec13f2d245f3a5a4" + }, + "outputs": [], + "source": [ + "df = df.drop(labels='Embarked', axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "_cell_guid": "c8fd62bb-30ea-4675-a593-771c6b8c8333", + "_execution_state": "idle", + "_uuid": "0972d4354355d154e63aa5ec97565260898749f5" + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA1gAAADQCAYAAAAalMCAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3X20XXV95/H3R8BKUQtooCmQBl2AIlNA70KQqUUQi8gyOEULOppWpmlX1SraKWi7lp3p2MHVFqS2aqNQ4pSCD2BhrIOkFKS2igaMPEUBkdFAJLFCxYepgt/54+zoJbknufeeh30e3q+1zjp7//Y+53x37pcf53t+e/92qgpJkiRJUu8e13YAkiRJkjQpLLAkSZIkqU8ssCRJkiSpTyywJEmSJKlPLLAkSZIkqU8ssCRJkiSpTyywFijJU5Ksbx7fSHLfrPV/afZZnuSVs15zXJKPDzCmi5N8L8mTZrVdkKSSPHVQn6vRYm5qVI1rbm6NTZNlXPNxge/3a0l+rr9RalhGNEeT5A+S3JXkziTXJXnWrO1vm7W8PMltg4plHFhgLVBV/WtVHVFVRwDvA87ful5Vz2t2Ww68suubDMbdwAqAJI8DXgDct+1OzX8g/t0nkLmpUTWuuTkrNk2Qcc3HBfo1YEEFVpJdF/lZ6rMRzdHXAc8DDq+qg4H/CVyV5AnN9rd1feUCTUIu+mWmj5J8p1k8F/jF5peGs7bZZ48kFyX5fJIvJFnRp4+/FPjVZvk44J+BR5rPXJ5kQ5L3ADcDB/TpMzUmxik3m19yb0ty67YxavKMam7Oji3J0iQ3NLHdluQXk+xirk6eEc/Hv0tyU5Lbk6xq2rbLwySnATPAJU38uyd5TpJPNa//ZJKlzeuvT/LHST4FvLFPx6EBajFHzwbeUFXfA6iqa4B/AV6V5Fxg9yaWS5r9d0ny/iZfr0myexPb05Nc3eTiPyV5RtN+cZLzklwHvDPJL+Uno3ZfyKzR3XEw9hXiiDoH+N2qOgU6w7aztv0+8I9V9dokewKfS/IPVfXdrTs0SfRPXd77lVV1xxztdwErkuwFnAH8DfDiWdsPAX69qn57sQeliTDSuZnkOcB+VXVY83l7LuooNY5GMTd//Hrgk1X1jiS7AD8NHIG5OslGMR9fW1Xfar6ofj7J5XRGMR6Th1X1UJLXN/GvS7Ib8G5gRVVtSfKrwDuA1zbvu2dV/dK8/2U0KoaWo0meDOxRVV/ZZr91wLOq6s1JXt+MuJFkOXAQcEZV/UaSDwO/QienVwO/VVV3JXku8B7g+Ob9DgZeWFWPJvnfwOuq6p+TPBH4fwv752mXBdbwvQh4aZLfbdafACwDNmzdoaoepvM/74W6AjgdeC7wm9ts+79V9dlFvKemxyjk5j3A05K8G/h74JpFfJYmT1u5udXngYuaL6p/V1Xrk5ir06utfPydJC9rlg+g8wX2y+w8Dw8BDgPWJgHYBdg0a/uHFhGnRtsgc3S2ANVl21eran2zfBOwvCmUngd8pMlFgJ+a9ZqPVNWjzfI/A+c1I2JXVNXGHmMdKgus4QvwK1X15a47LO6XL4DL6JxmtaaqfjQreQG+O/dLpB9rPTer6sEkhwO/TOd871fwk19ZNb3ayk0AquqGJM8HXgL8ryR/UlUfNFen1tDzsRmdeCFwTFV9L8n1wBPm2WcGuL2qjunymX4/mDx9zdGq+naS7yZ5WlXdM2u/ZwOf6vIe/z5r+VFgdzqXJj20daRrDrO/D5yb5O+Bk4HPJnlhVX2p2/GMGguswXgY6Hau6CeBNyR5Q1VVkiOr6guzd1jsrwpV9bUkvw/8w4Ij1rQY6dxMZ7asH1TV5Um+Aly80M/S2BrZ3Ezy88B9VfX+JHsAz07yCczVSTZq+fgzwINNcfUM4GjYYZ85O/4vA0uSHFNVn2lGYg+uqtsXGp9GyrBz9E+AP0/y8qr6fpIXAv+Rn4y0/jDJblX1w25v0BRqX23e4yPp/ILwC1X1xW33TfL0qroVuDXJMcAzAAusKXcL8EiSL9Lp7GYn9R8B7wJuaRLrXuCUfn1wVf1Vv95LE2nUc3M/4K/zk9kE39qvz9fIG+XcPA74r0l+CHwHeA3m6qQbtXy8GvitJLfQKZi2nlbdLQ8vBt6X5PvAMcBpdL4c/wyd737vAiywxtuwc/TdwF50Cp5HgW/Qua7v+8321c3n3UznGrBuXgW8N8kfALvRGbXdrsAC3pTkBXRGv+4A/k+P8Q9VqrqdOilJkiRJWginaZckSZKkPtlpgZXOPPqbM+uOzEn+JMmXktyS5GOzp6dN8tYkdyf5cpJfHlTgkiRJkjRq5jOCdTFw0jZta4HDquoXgDtpzvlNciid6UWf1bzmPencM0SSJEmSJt5OC6yqugH41jZt11TV1ruLfxbYv1leAVxWVf9eVV8F7gaO6mO8kiRJkjSy+nEN1mv5ycwe+wFfn7VtY9O2nSSrkqxLsu5Zz3pW0blRmQ8f2z5GxkknndT2v4WP0X2MBHPUxw4eI8Ec9bGTx0gwT33s4DEvPRVYzb0aHgEu2do0x25zBlNVq6tqpqpmdt99917CkIbim9/8ZtshaEIluTfJrUnWJ1nXtO2dZG2Su5rnvXb2PuaoRp05qnFgnqpXiy6wkqykM6f+q+onc71vBA6Ytdv+wP2LD0+SpsYLquqIqppp1s8Brq2qg4Brm3VJkjTiFlVgJTkJOBt4aVV9b9amq4DTk/xUkgOBg4DP9R6mJE2dFcCaZnkNcGqLsUiSpHmazzTtlwKfAQ5JsjHJmcBfAE8C1jantLwPoKpuBz5M547LVwOvq6pHBxa9JE2GAq5JclOSVU3bvlW1CaB53qe16CRJ0rzturMdquqMOZov3MH+7wDe0UtQkjRljq2q+5PsQ+eHqy/N94VNQbYKYNmyZYOKT5IkzVM/ZhGUJPWgqu5vnjcDH6Nze4sHkiwFaJ43d3ntjycMWrJkybBCliRJXVhgSVKLkuyR5Elbl4EXAbfRuaZ1ZbPbSuDKdiKUJEkLsdNTBCfd+WvvnLP9rBMPHnIk0sJ0y10wf8fMvsDHkkCnT/7bqro6yeeBDzfXvX4NeHmLMe6Ufal6ZQ5Jo8fvGosz9QWWJLWpqu4BDp+j/V+BE4YfkSRJ6oUFVhf+kiZJkiRpobwGS5IkSZL6xAJLkiRJkvrEAkuSJEmS+sQCS5IkSZL6xAJLEy3JAUmuS7Ihye1J3ti0/2GS+5Ksbx4ntx2rJI0i+1FJWhhnEdSkewR4S1Xd3NzM9aYka5tt51fVn7YYmySNA/tRSVoACyxNtKraBGxqlh9OsgHYr92oJGl82I9K0sJ4iqCmRpLlwJHAjU3T65PckuSiJHt1ec2qJOuSrNuyZcuQIpWk0WQ/Kkk7Z4GlqZDkicDlwJuq6tvAe4GnA0fQ+WX2z+Z6XVWtrqqZqppZsmTJ0OKVpFFjPypJ82OBpYmXZDc6XwouqaorAKrqgap6tKp+BLwfOKrNGCVplNmPStL8WWBpoiUJcCGwoarOm9W+dNZuLwNuG3ZskjQO7EclaWF2OslFkouAU4DNVXVY07Y38CFgOXAv8IqqerDphC8ATga+B/xaVd08mNCleTkWeDVwa5L1TdvbgDOSHAEUnRz+zXbCk6SRZz8qSQswn1kELwb+AvjgrLZzgGur6twk5zTrZwMvBg5qHs+lc372c/sZsLQQVfVpIHNs+sSwYxlH56+9c872s048eMiRSGqL/agkLcxOTxGsqhuAb23TvAJY0yyvAU6d1f7B6vgssOc2pxBIkiRJ0sRa7DVY+zb3xdh6f4x9mvb9gK/P2m8jXe6V4bStkiRJGrQkeyb5aJIvJdmQ5Ji2Y9Jk6/ckF3OdQlBz7ei0rZIkSRqCC4Crq+oZwOHAhpbj0YRbbIH1wNZT/5rnzU37RuCAWfvtD9y/+PAkSZKkxUnyZOD5dGbCpKp+UFUPtRuVJt1iC6yrgJXN8krgylntr0nH0cC/bT2VUJIkSRqypwFbgL9O8oUkH0iyx7Y7eemK+mmnBVaSS4HPAIck2ZjkTOBc4MQkdwEnNuvQmVHoHuBuOjcd/O2BRC1JkiTt3K7As4H3VtWRwHfpzH79GF66on7a6TTtVXVGl00nzLFvAa/rNShJmjZJdgHWAfdV1SlJDgQuA/YGbgZeXVU/aDNGSRpDG4GNVXVjs/5R5iiwpH7q9yQXkqTFeSOPvfD6ncD5VXUQ8CBwZitRSdIYq6pvAF9PckjTdAJwR4shaQpYYElSy5LsD7wE+ECzHuB4Or+0wmPvNyhJWpg3AJckuQU4AvjjluPRhNvpKYKSpIF7F/B7wJOa9acAD1XVI81613sKSpJ2rKrWAzNtx6HpYYElTZHz1945Z/tZJx485Ei0VZJTgM1VdVOS47Y2z7HrnPcUTLIKWAWwbNmygcQoSZLmz1MEJaldxwIvTXIvnUktjqczorVnkq0/gnW9p6AzX0mSNFossCSpRVX11qrav6qWA6cD/1hVrwKuA05rdpt9v0FJkjTCLLAkaTSdDbw5yd10rsm6sOV4JEnSPHgNliSNiKq6Hri+Wb4HOKrNeCRJ0sJZYEmS5q3bRCmSJKnDUwQlSZIkqU8ssDTRkhyQ5LokG5LcnuSNTfveSdYmuat53qvtWCVpFNmPStLCWGBp0j0CvKWqngkcDbwuyaHAOcC1VXUQcG2zLknanv2oJC2ABZYmWlVtqqqbm+WHgQ3AfsAKYE2z2xrg1HYilKTRZj8qSQvjJBcL1O0C77NOPHjIkWihkiwHjgRuBPatqk3Q+fKQZJ8ur1kFrAJYtmzZcALdhpMKSBoV49qPStIwOYKlqZDkicDlwJuq6tvzfV1Vra6qmaqaWbJkyeAClKQRZz8qSfNjgaWJl2Q3Ol8KLqmqK5rmB5IsbbYvBTa3FZ8kjTr7UUmav54KrCRnNTMK3Zbk0iRPSHJgkhubWYU+lOTx/QpWWqgkAS4ENlTVebM2XQWsbJZXAlcOOzZJGgf2o5K0MIsusJLsB/wOMFNVhwG7AKcD7wTOb2YVehA4sx+BSot0LPBq4Pgk65vHycC5wIlJ7gJObNYlSduzH5WkBeh1kotdgd2T/BD4aWATcDzwymb7GuAPgff2+DnSolTVp4F02XzCMGORpHFkPypJC7PoAquq7kvyp8DXgO8D1wA3AQ9V1SPNbhvpTOW6HWcVkgbHmQclSZLa0cspgnvRuQfGgcDPAXsAL55j15rr9c4qJEmSJGnS9DLJxQuBr1bVlqr6IXAF8DxgzyRbR8b2B+7vMUZJkiRJGgu9FFhfA45O8tPNDEMnAHcA1wGnNfs4q5AkSZKkqbHoAquqbgQ+CtwM3Nq812rgbODNSe4GnkJnaldJkiRJmng9zSJYVW8H3r5N8z3AUb28ryRJkiSNo55uNCxJkiRJ+gkLLEmSJEnqEwssSZIkSeoTCyxJalGSJyT5XJIvJrk9yX9r2g9McmOSu5J8KMnj245VkiTtnAWWJLXr34Hjq+pw4AjgpCRHA+8Ezq+qg4AHgTNbjFGSJM2TBZYktag6vtOs7tY8Cjiezq0wANYAp7YQniRJWiALLElqWZJdkqwHNgNrga8AD1XVI80uG4H92opPkiTNnwWWJLWsqh6tqiOA/encR/CZc+0212uTrEqyLsm6LVu2DDJMSZI0DxZYkjQiquoh4HrgaGDPJFtvBr8/cH+X16yuqpmqmlmyZMlwApUkSV3tuvNdJEmDkmQJ8MOqeijJ7sAL6UxwcR1wGnAZsBK4sr0opfacv/bOOdvPOvHgIUeicZZkF2AdcF9VndJ2PJpsFliS1K6lwJrmf/6PAz5cVR9PcgdwWZL/AXwBuLDNICVpzL0R2AA8ue1ANPkssCSpRVV1C3DkHO330LkeS5LUgyT7Ay8B3gG8ueVwNAUssDTRklwEnAJsrqrDmrY/BH4D2DojwNuq6hPtRChJo8++VGPuXcDvAU/qtkOSVcAqgGXLlg0prOHrdsqt+stJLjTpLgZOmqP9/Ko6onn4hUCSduxi7Es1hpJs/WHgph3t54RB6qepGcGyYp9OVXVDkuVtxyFJ48y+VGPsWOClSU4GngA8OcnfVNV/bjkuTbCpKbCkbbw+yWvozCj0lqp6cK6dPGVAknZop33ptPSjGk1V9VbgrQBJjgN+1+JKg9bTKYJJ9kzy0SRfSrIhyTFJ9k6yNsldzfNe/QpW6pP3Ak8HjgA2AX/WbUdPGZCkrubVl9qPSpo2vV6DdQFwdVU9AziczvSX5wDXVtVBwLXNujQyquqBqnq0qn4EvB9napOkBbMv1bipquu9B5aGYdEFVpInA8+nuTdLVf2gqh4CVgBrmt3WAKf2GqTUT0mWzlp9GXBbW7FI0riyL5WkufVyDdbT6EzN+tdJDgduonMTt32rahNAVW1Kss9cL/acbA1DkkuB44CnJtkIvB04LskRQAH3Ar/ZWoCSNAbsSyVp/nopsHYFng28oapuTHIBCzgdsKpWA6sBZmZmqoc4pK6q6ow5mi8ceiDz4EQTkkbVOPWlktS2XgqsjcDGqrqxWf8onQLrgSRLm9GrpcDmXoMcBzv6cnzWiQcPMRJJGn3d+kz7S0nSuFv0NVhV9Q3g60kOaZpOAO4ArgJWNm0rgSt7ilCSJEmSxkSv98F6A3BJkscD9wC/Tqdo+3CSM4GvAS/v8TMkSZIkaSz0VGBV1XpgZo5NJ/TyvpKk/vF0PEmShqfX+2BJkiRJkhoWWJIkSZLUJxZYkiRJktQnFliSJEmS1CcWWJIkSZLUJxZYkiRJktQnvd4HS9IUctpvSZKkuTmCJUmSJEl94giWJLUoyQHAB4GfBX4ErK6qC5LsDXwIWA7cC7yiqh5sK05J0ujxjJLR5AiWJLXrEeAtVfVM4GjgdUkOBc4Brq2qg4Brm3VJkjTiLLAkqUVVtamqbm6WHwY2APsBK4A1zW5rgFPbiVCSJC2EpwgOgcO3kuYjyXLgSOBGYN+q2gSdIizJPl1eswpYBbBs2bLhBCpJkrpyBEuSRkCSJwKXA2+qqm/P93VVtbqqZqpqZsmSJYMLUJIkzYsFliZakouSbE5y26y2vZOsTXJX87xXmzFKSXajU1xdUlVXNM0PJFnabF8KbG4rPsm+VJLmz1MENekuBv6CzixtW22dPODcJOc062e3EJtEkgAXAhuq6rxZm64CVgLnNs9XthBez7qdIq2xczH2pZI0L45gaaJV1Q3At7ZpdvIAjZJjgVcDxydZ3zxOplNYnZjkLuDEZl1qhX2pJM1fzyNYSXYB1gH3VdUpSQ4ELgP2Bm4GXl1VP+j1c6Q+mtfkAeAEAhq8qvo0kC6bTxhmLNICtToRixNISRpV/ThF8I10phV+crP+TuD8qrosyfuAM4H39uFzpKGrqtXAaoCZmZlqORxp4vmlefLYj0qaNj2dIphkf+AlwAea9QDHAx9tdvGUAY0iJw+QpN7Zl0rSHHq9ButdwO8BP2rWnwI8VFWPNOsb6dwwcztJViVZl2Tdli1begxDWpCtkwfAGE8eIEktsy+VpDksusBKcgqwuapumt08x65zng7gvVs0DEkuBT4DHJJkY5IzcfIASVoQ+1JJmr9ersE6FnhpM9vVE+hcg/UuYM8kuzajWPsD9/ceprQ4VXVGl01OHjAAi5mS22trpNFnXypJ87foEayqemtV7V9Vy4HTgX+sqlcB1wGnNbt5yoAkSZKkqTGI+2CdDbw5yd10rsm6cACfIUmSJEkjpx/TtFNV1wPXN8v3AEf1430nndMRS5IkSZOlLwWWJGn8+COPpEmX5ADgg8DP0pn1enVVXdBuVJp0FliSJEmaVI8Ab6mqm5M8CbgpydqquqPtwDS5BnENliRJktS6qtpUVTc3yw8DG+hyj1apXxzBkiRJ0sRLshw4Erhxjm2rgFUAy5YtG2pc87HQ26As5rYpC+Vp5t05giVJkqSJluSJwOXAm6rq29tur6rVVTVTVTNLliwZfoCaKBZYkiRJmlhJdqNTXF1SVVe0HY8mnwWWJEmSJlKS0Lkn64aqOq/teDQdLLAkSZI0qY4FXg0cn2R98zi57aA02ZzkQpIkSROpqj4NpO04NF0cwZIkSZKkPrHAkiRJkqQ+scCSJEmSpD7xGixJalGSi4BTgM1VdVjTtjfwIWA5cC/wiqp6cFgxDeMGlZIkTSpHsCSpXRcDJ23Tdg5wbVUdBFzbrEuSpDHgCJamVpJ7gYeBR4FHqmqm3Yg0jarqhiTLt2leARzXLK8BrgfOHlpQ0gLYl0rSY1lgadq9oKq+2XYQ0jb2rapNAFW1Kck+3XZMsgpYBbBs2bIhhSdtx75UkhqLPkUwyQFJrkuyIcntSd7YtO+dZG2Su5rnvfoXriRptqpaXVUzVTWzZMmStsORJGnq9XIN1iPAW6rqmcDRwOuSHIrXDmh8FHBNkpuaUYDtJFmVZF2SdVu2bBlyeJpiDyRZCtA8b245HmlHdtiX2o9KmjaLLrCqalNV3dwsPwxsAPajc+3Amma3NcCpvQYpDcixVfVs4MV0fiB4/rY7ODqgllwFrGyWVwJXthiLtDM77EvtRyVNm75cg9VcoH0kcCPzvHbA6wa66zZF8lknHjzkSCZbVd3fPG9O8jHgKOCGdqPStElyKZ0JLZ6aZCPwduBc4MNJzgS+Bry8vQilHbMvlaTH6rnASvJE4HLgTVX17STzel1VrQZWA8zMzFSvcUgLkWQP4HFV9XCz/CLgv7cclqZQVZ3RZdMJQw1EWgT7UknaXk8FVpLd6BRXl1TVFU3zA0mWNqNXXjugUbUv8LHmB4Fdgb+tqqvbDUmSxo59qSRtY9EFVjq96YXAhqo6b9amrdcOnIvXDmhEVdU9wOFtxyFJ48y+VJK218sI1rHAq4Fbk6xv2t6G1w5IkiRJC9btOvxxstC5BCZx7oFFF1hV9Wmg2wVXXjswRJOYmJIkSdI46uU+WJIkSZKkWfoyTbuGYxKGjSVJkqRJ5giWJEmSJPWJBZYkSZIk9YkFliRJkiT1iQWWJEmSJPWJBZYkSZIk9YmzCEqSRt5i7vfnPQIlSW1wBEuSJEmS+sQCS5IkSZL6xAJLkiRJkvrEAkuSJEmS+sQCS5IkSZL6ZOJmEew2a9Q0Wsy/hbNrSRoni+nnnF1QkjRIE1dgSaPOHwF645djSZI0yjxFUJIkSZL6ZGAjWElOAi4AdgE+UFXnDuqzpMUwRzXqzNHhcnR0ccxTjTpzVMM2kBGsJLsAfwm8GDgUOCPJoYP4LGkxzFGNOnNU48A81agzR9WGQY1gHQXcXVX3ACS5DFgB3DGgz9OATeAvu+aoRp05qnFgnmrUmaMaukEVWPsBX5+1vhF47uwdkqwCVjWr30ny5Tne56nANwcS4Whr7bjfPOD9d6DbMV9dVSf172N+bKc5CubpDvTtmPuVQ33MxR0ZZp6ao73ry3EPKbf6ZeT60nnmKPj3mhY7Omb70tHT2v/vW/5vuae+dFAFVuZoq8esVK0GVu/wTZJ1VTXTz8DGwTQedwvHvNMcBfO0m2k8Zhj6cZujPZrG4x7FvnQ+OQr+vabFKOYo2Jd2M43HDL0f96BmEdwIHDBrfX/g/gF9lrQY5qhGnTmqcWCeatSZoxq6QRVYnwcOSnJgkscDpwNXDeizpMUwRzXqzFGNA/NUo84c1dAN5BTBqnokyeuBT9KZEvOiqrp9EW+101MKJtQ0HvdQj7mPOQr+vabJ0I7bHO2LaTxu+9Lx4jEPmDnas2k8ZujxuFO13WmokiRJkqRFGNQpgpIkSZI0dSywJEmSJKlPRrbASnJSki8nuTvJOW3HMyhJ7k1ya5L1SdY1bXsnWZvkruZ5r7bj7FWSi5JsTnLbrLY5jzMdf9787W9J8uz2Iu9uWnIUpiNPJzFHYXrydBpyFCYzT6clR2E68tQcHW/TkKMw+DwdyQIryS7AXwIvBg4FzkhyaLtRDdQLquqIWfPtnwNcW1UHAdc26+PuYmDbG7N1O84XAwc1j1XAe4cU47xNYY7C5OfpxUxQjsJU5umk5yhMWJ5OYY7C5OfpxZij427ScxQGnKcjWWABRwF3V9U9VfUD4DJgRcsxDdMKYE2zvAY4tcVY+qKqbgC+tU1zt+NcAXywOj4L7Jlk6XAinbdpz1GYsDydwBwF83SichQmMk+nPUdhwvLUHJ1IE5WjMPg8HdUCaz/g67PWNzZtk6iAa5LclGRV07ZvVW0CaJ73aS26wep2nOPw9x+HGPtpWvN0nHMUxifOfpjWHIXxztNxiLGfpjVPzdHxMa05Cn3M04HcB6sPMkfbpM4nf2xV3Z9kH2Btki+1HdAIGIe//zjE2E/m6WONy99/XOLsB3N0e+Pw9x+HGPvJPH2scfj7j0OM/WSObm/BOTCqI1gbgQNmre8P3N9SLANVVfc3z5uBj9EZin5g69Bj87y5vQgHqttxjsPffxxi7JspztNxzlEYnzh7NsU5CuOdp+MQY99McZ6ao2NiinMU+pino1pgfR44KMmBSR4PnA5c1XJMfZdkjyRP2roMvAi4jc6xrmx2Wwlc2U6EA9ftOK8CXtPM2nI08G9bh2xHyFTkKEx9no5zjsKU5OmU5yiMd55ORY7C1OepOToGpjxHoZ95WlUj+QBOBu4EvgL8ftvxDOgYnwZ8sXncvvU4gafQmb3kruZ577Zj7cOxXgpsAn5I55eAM7sdJ52h2L9s/va3AjNtx9/lmCY+R5vjnIo8ncQcbWKd+Dydlhxtjmni8nQacrQ5zqnIU3N0fB/TkqPNMQ00T9O8UJIkSZLUo1E9RVCSJEmSxo4FliRJkiT1iQWWJEmSJPWJBZYkSZIk9YkFliRJkiT1iQVWC5K8LEkleUbbsUjdmKcadeaoRp05qnFgnvafBVY7zgA+TedmddKoMk816sxRjTpzVOPAPO0zC6whS/JE4Fg6NzQ7vWl7XJL3JLk9yceTfCLJac225yT5VJKbknwyydIWw9eUME816sxRjTpzVOPAPB0MC6zhOxW4uqruBL6V5NnAfwKWA/8B+C/AMQBJdgPeDZxWVc8BLgLe0UbQmjrmqUadOapRZ45qHJinA7Br2wFMoTOAdzXLlzXruwEfqar2rX05AAABPklEQVQfAd9Icl2z/RDgMGBtEoBdgE3DDVdTyjzVqDNHNerMUY0D83QALLCGKMlTgOOBw5IUncQs4GPdXgLcXlXHDClEyTzVyDNHNerMUY0D83RwPEVwuE4DPlhVP19Vy6vqAOCrwDeBX2nOed0XOK7Z/8vAkiQ/HppN8qw2AtdUMU816sxRjTpzVOPAPB0QC6zhOoPtfxW4HPg5YCNwG/BXwI3Av1XVD+gk/zuTfBFYDzxveOFqSpmnGnXmqEadOapxYJ4OSKqq7RhEZxaXqvpOM1z7OeDYqvpG23FJs5mnGnXmqEadOapxYJ72xmuwRsfHk+wJPB74I5NYI8o81agzRzXqzFGNA/O0B45gSZIkSVKfeA2WJEmSJPWJBZYkSZIk9YkFliRJkiT1iQWWJEmSJPWJBZYkSZIk9cn/B2h/ie+85Tu9AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# visualize the correlation between Title and Age\n", + "grid = sns.FacetGrid(df, col='Title', size=3, aspect=0.8, sharey=False)\n", + "grid.map(plt.hist, 'Age', alpha=.5, bins=range(0,105,5))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": { + "_cell_guid": "770f5c78-948f-4261-ae6f-dda0a05da8e6", + "_execution_state": "idle", + "_uuid": "9b312ecc51104501196e3f6806abc92d29e9eb46" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Age
Title
Master5.482642
Miss21.795236
Mr32.252151
Mrs36.930636
Others45.074074
\n", + "
" + ], + "text/plain": [ + " Age\n", + "Title \n", + "Master 5.482642\n", + "Miss 21.795236\n", + "Mr 32.252151\n", + "Mrs 36.930636\n", + "Others 45.074074" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the mean Age for each Title\n", + "df[['Title', 'Age']].groupby(['Title']).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": { + "_cell_guid": "b81954da-2e36-410f-9505-c6b7ae26803f", + "_execution_state": "idle", + "_uuid": "ba25e608acce46fba9b0c623f8b62b91f29151a7" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Age
Title
Master4.161554
Miss12.192794
Mr12.422089
Mrs12.872625
Others11.303253
\n", + "
" + ], + "text/plain": [ + " Age\n", + "Title \n", + "Master 4.161554\n", + "Miss 12.192794\n", + "Mr 12.422089\n", + "Mrs 12.872625\n", + "Others 11.303253" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the standard deviation of Age for each Title\n", + "df[['Title', 'Age']].groupby(['Title']).std()" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": { + "_cell_guid": "38208976-cd0d-4756-aebc-5d980d7357a5", + "_execution_state": "idle", + "_uuid": "3d9ea40ed50748bfac7990788916d49eb3886174" + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA1gAAADQCAYAAAAalMCAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3X+0ZXV53/H3R8BIRIuYgUyEyZAWDWor6l0UJXEhBIvGFbBFK7UGG9JJV0yraBLRZi1tNWthfoimJtaJUsYuFRClUGKNsyYQwqqiAyI/JIoSlo4zYUaFCEmqAZ7+sffVy8y9c+/ce87ZZ+/zfq211zl7n73vefa9z+w5z/l+9/ebqkKSJEmStHaP6ToASZIkSRoKCyxJkiRJGhELLEmSJEkaEQssSZIkSRoRCyxJkiRJGhELLEmSJEkaEQssIMnDSW5ZsGwcw3u8Jsl7l3jt/476/fb6+Z9Kcn+Sa8b5Phq/IedqkhOSfCbJHUluTfKvx/VeGq+B5+lPJrmpPa87kvyHcb2Xxm/IubrgPZ6Y5JtLxaB+GHqu7nV+V4/zvSbh4K4DmBJ/X1UnHOhBSQ6qqofX+uZV9fy1/oxl/C7wo8CvjPl9NH5DztW/A36xqu5K8hPATUn+tKruH+N7ajyGnKe7gOdX1feSHAbcnuTqqto5xvfU+Aw5V+e9HfjzCbyPxmvoubqq85tWtmAtIcnGJH+R5OZ2eX67/ZQk1yb5CHBbu+3fJvlcW3W/P8lBS/zYY9rWpC8neeuC93pwwc++LskVSf4yyYeTZK3nUlXbgAfW+nM0nYaSq1X1laq6q32+E9gNrFvLz9T0GFCefr+qvteu/gj+Pzo4Q8nV9uc+FzgK+PRaf5amz5BydXCqauYX4GHglna5st32o8Dj2ufHAdvb56cAfwsc264fD/xv4JB2/Y9ovoXf+z1eQ/PN55OBQ4Hbgbn2tQcX/Oy/AY6m+U/7M8DPLPKzfmNBvAuXP9jPOZ4CXNP179rFXF0uV9vjTgTuBB7T9e/cxTxdZP9jgFtpWl1f2/Xv28VcXSxX259zXZuvrwHe2/Xv28Vc3c919SFgO/BZ4Kyuf99rXewi2FisWfIQ4L1JTqBJ6qcueO1zVfVX7fPTgOcCn28L+ENpvnlfzNaq+jZAkk8AP0OTTAt9rqp2tPvcAmwEbli4Q1X9Lk23P82ewedqkvXA/wTOrapHDuRYTY1B52lVfQP4Z2m6sv6vJFdU1b0rPV5TZci5+qvAJ6vqGzYwDMKQcxVgQ1XtTPJTwJ8lua2qvnYAx08VC6ylnQ/cCzyLpkL/fwte+9sFzwNsqao3Lzw4ycuA+abVX24fa6/32Hsd4HsLnj/MIn+jJL8BvGqRY6+vqv+0yHYN22ByNckTgT8BfquqPrvIceqvweTpD96s+TBwB/CzwBVL7afeGUquPg/42SS/ChwGPDbJg1V1wSLHq5+GkqtUex9rVd2d5Drg2YAF1gD9I2BHVT2S5Fxgqb6q24CrklxUVbuTHAE8oaquBK6c3ynJM4HT29f/HjgL+KXVBGYLlvYyiFxN8tg2jg9V1cdW836aakPJ06OBb1fV3yd5EnAy8K7VvK+m1iBytap+8OE2yWtounpZXA3LIHK1vZb+XTWDB/0YzXX1d1bzvtPCm3OX9kfAuUk+S9Pk+reL7VRVXwJ+C/h0kluBrcD6JX7mDTRdn24BPl5Veze5jkWSvwA+BpyWZEeSfzGJ99XEDCVXXwG8AHhNfjhU62BGFNJg8vR44MYkX6QZme33quq2CbyvJmcouarhG0quHg9sb6+r1wIXtjH3VqoWa/mTJEmSJB0oW7AkSZIkaURWVGAlOXzBePd3JnlekiOSbE1yV/v4pHEHK0lDkeSgJF9Ick27fmySG9tr6mXtPWmSJKlnVtqC9R7gU1X10zQjldwJXABsq6rjaG6e88ZJSVq519FcS+e9E7iovabeB5zXSVSSJGlNli2w2mGTXwB8EH4wi/39wJnAlna3LTQjjUiSltGORPfzwAfa9QCn8sOhvr2mSpLUUytpwfopYA/wP9ruLB9I8njgqKraBdA+HrnYwUk2JdmeZPsznvGMohlP38VlsWUqnHHGGV3/Hlymf1mrdwO/CcxPpPxk4P6qeqhd3wE8ZbEDvaa6HOAyFbyuuiyzTAXz1GUFy4qspMA6GHgO8L6qejbNEJAr7g5YVZuraq6q5g499NCVHiZ15lvf+lbXIWjAkrwU2F1VNy3cvMiui17Ivaaqj7yuqg/MU43KSgqsHTSTmN3Yrl9BU3Ddm2Q9QPu4ezwhStKgnAz8QpJ7gEtpuga+Gzg8yfzk70cDO7sJT5IkrcWyBVZV/TXwjSRPazedBnwJuBo4t912LnDVWCKUpAGpqjdX1dFVtRF4JfBnVfUqmskVz25385oqSVJPHbz8LgD8R+DD7bDBdwP/jqY4uzzJecDXgZePJ0RJmglvAi5N8g7gC7QDC0mSpH5ZUYFVVbcAc4u8dNpow5Gk2VFV1wHXtc/vBk7sMh5JkrR2K50HS5IkSZK0jJV2EZR6oR044AHgYeChqppLcgRwGbARuAd4RVXd11WMkiRJGq6ZKbAu2vqVRbeff/pTJxyJJuCFVbVwrNULgG1VdWGSC9r1N3UT2vLMVQ2Vua1pYj5qqMzt7tlFULPgTGBL+3wLcFaHsUhSLyR5XJLPJflikjuS/Jd2+7FJbkxyV5LL2gGwJEktCywNTQGfTnJTkk3ttqOqahdA+3jk3gcl2ZRke5Lte/bsmWC4kjS1vgecWlXPAk4AzkhyEvBO4KKqOg64DzivwxglaepYYGloTq6q5wAvBl6b5AUrOaiqNlfVXFXNrVu3brwRSlIPVOPBdvWQdimaybGvaLfbK0CS9mKBpUGpqp3t427gSpphr+9Nsh6gfdzdXYSS1B9JDkpyC811cyvwNeD+qnqo3WUH8JQljrVngKSZZIGlwUjy+CRPmH8OvAi4HbgaOLfd7Vzgqm4ilKR+qaqHq+oE4GiaL6yOX2y3JY61Z4CkmTQzowhqJhwFXJkEmtz+SFV9KsnngcuTnAd8HXh5hzFKUu9U1f1JrgNOAg5PcnDbinU0sLPT4CRpylhgaTCq6m7gWYts/zZw2uQjkqT+SrIO+Ie2uDoU+DmaAS6uBc4GLsVeAZK0DwssSZK0mPXAliQH0dxScHlVXZPkS8ClSd4BfAH4YJdBSkPh/FXDYYElSZL2UVW3As9eZPvdNPdjSVMhyeOA64Efoflse0VVvTXJsTQtrUcANwOvrqrvdxepZoWDXEiSJKnPnLNNU8UCS5IkSb3lnG2aNhZYkiRJ6rXVztnmfG0aBwssSZIk9dpq52xzvjaNw4oGuUhyD/AA8DDwUFXNJTkCuAzYCNwDvKKq7htPmJIkSdL+OWebpsGBtGC9sKpOqKq5dv0CYFt74+C2dl2SJEmamCTrkhzePp+fs+1OfjhnGzhnmyZoLV0Ez6S5YRC8cVCSJEndWA9cm+RW4PPA1qq6BngT8IYkXwWejHO2aUJWOg9WAZ9OUsD7q2ozcFRV7QKoql1JjlzswCSbgE0AGzZsGEHIkiRJUsM52zRtVlpgnVxVO9siamuSv1zpG7TF2GaAubm5fW4ulCQNx0Vbv9J1CJIkdWpFBVZV7Wwfdye5kubbgHuTrG9br9bTDIspaUyW+uB6/ulPnXAkkiRJWsqy92AleXySJ8w/B14E3A5cTXPDIHjjoCRJkiStqAXrKODKJPP7f6SqPpXk88DlSc4Dvg68fHxhSpIkSRoHe8mM1rIFVnuD4LMW2f5t4LRxBCVJkiRJfbSWYdolSZIkSQtYYEmSpH0kOSbJtUnuTHJHkte129+W5JtJbmmXl3QdqyRNk5UO0y5JkmbLQ8Abq+rmdrCrm5JsbV+7qKp+r8PYJGlq2YIlSROU5HFJPpfki22rwH9ptx+b5MYkdyW5LMlju45Vs62qdlXVze3zB4A7gad0G5UkTT8LLA1KkoOSfCHJNe26H1o1bb4HnFpVzwJOAM5IchLwTppWgeOA+4DzOoxRepQkG4FnAze2m34tya1JLk7ypCWO2ZRke5Lte/bsmVCkktQ9CywNzetovmWd54dWTZVqPNiuHtIuBZwKXNFu3wKc1UF40j6SHAZ8HHh9VX0XeB/wj2m+INgF/P5ix1XV5qqaq6q5devWTSxeSeqaBZYGI8nRwM8DH2jXgx9aNYXaltZbgN3AVuBrwP1V9VC7yw6W6Iplq4AmKckhNMXVh6vqEwBVdW9VPVxVjwB/DJzYZYySNG0ssDQk7wZ+E3ikXX8yfmjVFGo/nJ4AHE3z4fT4xXZb4lhbBTQR7ZdUHwTurKp3Ldi+fsFuLwNun3RskjTNLLA0CEleCuyuqpsWbl5kVz+0ampU1f3AdcBJwOFJ5kd2PRrY2VVcUutk4NXAqXsNyf47SW5LcivwQuD8TqPUTHM6AU0jh2nXUJwM/EJ7AX0c8ESaFq3DkxzctmL5oVWdS7IO+Iequj/JocDP0dwreC1wNnApcC5wVXdRSlBVN7D4F1WfnHQs0n44nYCmjgWWBqGq3gy8GSDJKcCvV9WrknwMP7RquqwHtiQ5iKYXweVVdU2SLwGXJnkH8AWarllT66KtX+k6BEmiqnbRDLZCVT2QxOkE1DkLLA3dm+jRh1YNX1XdSjPc9d7b78bBAiRp1faaTuBkmukEfhHYTtPKdd8ix2wCNgFs2LBhYrFq2LwHS4NTVddV1Uvb53dX1YlV9U+q6uVV9b2u45MkSaPldAKaJrZgSZIktZbq/nr+6U+dcCRaqaWmE1jw+h8D13QUnmaQLViSJEnqJacT0DSyBUuSJEl9NT+dwG3tBO4AbwHOSXICzfQs9wC/0k14mkUrLrDaEa+2A9+sqpcmOZZmZLYjgJuBV1fV98cTpiRJkvRoTiegaXQgXQRfB9y5YP2dNPMLHAfcB5w3ysAkSZIkqW9W1IKV5Gjg54HfBt7Q9nc9Ffg37S5bgLfRjNgiSZK0LAeUkDREK23Bejfwm8Aj7fqTgfur6qF2fQdLTOqWZFOS7Um279mzZ03BSpIkSdI0W7YFK8lLgd1VdVOSU+Y3L7JrLXZ8VW0GNgPMzc0tuo8kSZKklVuqBVjdW0kXwZOBX0jyEuBxwBNpWrQOT3Jw24p1NLBzfGFKkiRJ0vRbtsCqqjcDbwZoW7B+vapeleRjwNk0IwmeC1w1xjglSZI6s7/WAu8Zk7TQWiYafhPNgBdfpbkn64OjCUmSJEmS+umAJhququuA69rndwMnjj4kafjsNy1p2iU5BvgQ8OM0g1xtrqr3JDkCuAzYSDOB6yuq6r6u4pSkabOWFixJkjRcDwFvrKrjgZOA1yZ5OnABsK2dB3Nbuy5JallgSZKkfVTVrqq6uX3+AHAnzZQsZ9LMf0n7eFY3EUrSdDqgLoKSJGn2JNkIPBu4ETiqqnZBU4QlOXKJYzYBmwA2bNgwmUCnjBMpS7PJFixJkrSkJIcBHwdeX1XfXelxVbW5quaqam7dunXjC1CSpowFliRJWlSSQ2iKqw9X1SfazfcmWd++vh7Y3VV8EjQDsiS5NsmdSe5I8rp2+xFJtia5q318UtexajZYYEmSpH0kCc0ULHdW1bsWvHQ1zfyX4DyYmg4OyKKpYoElSZIWczLwauDUJLe0y0uAC4HTk9wFnN6uS51xQBZNGwe50GAkeRxwPfAjNLl9RVW9NcmxwKXAEcDNwKur6vvdRSpJ06+qbgCyxMunTTKWaefchtNjNQOySKNmgaUh+R5walU92N43cEOS/wO8Abioqi5N8t+B84D3dRmoJEkarb0HZGl6uS57zERHu1xNMW4B3z92EdRgVOPBdvWQdingVOCKdrtdBCRJGpjVDsjiaJcaBwssDUqSg5LcQnMR3Qp8Dbi/qh5qd9lB0y977+M2JdmeZPuePXsmF7AkSVoTB2TRtLGLoAalqh4GTkhyOHAlcPxiuy1y3GZgM8Dc3Nw+r0t9sr/uJAc6waldUyT1wPyALLe1X7ICvIVmAJbLk5wHfB14eUfxacZYYGmQqur+JNfRDNd6eJKD21aso4GdnQYnSZJGxgFZxmepL9kO9Mu6WWMXQQ1GknVtyxVJDgV+jmao1muBs9vd7CIgSZKksZn5Fiwr80FZD2xJchDNlweXV9U1Sb4EXJrkHcAXaPppS5IkSSM38wWWhqOqbqWZ+2Lv7XcDJ04+IkmSJM2aZQssJ2+VpputsP2S5BjgQ8CPA48Am6vqPUmOAC4DNgL3AK+oqvu6ilPqktc1SX22knuw5idvfRZwAnBGkpOAd9JM3noccB/N5K2SpP17CHhjVR1PMwjLa5M8HbgA2NZeU7e165IkqWeWLbCcvFWSRqeqdlXVze3zB2gGYnkKcCbNtRS8pkqS1FsrGkVwtZO3tsc6gaskLSLJRpr7Bm8EjqqqXdAUYcCRSxzjNVWSpCm2okEuVjt5a3usE7hK0l6SHAZ8HHh9VX03WWoKl0fzmiodGCfLljRpBzQPVlXdD1zHgslb25ecvFWSVijJITTF1Yer6hPt5nuTrG9fX0/TY0CSJPXMsgWWk7dK0uikaar6IHBnVb1rwUtX01xLwWuqpkCSi5PsTnL7gm1vS/LNJLe0y0u6jFGSptFKugg6easkjc7JwKuB29p7WwHeAlwIXJ7kPODrwMs7im9qOFR35y4B3kszrcBCF1XV700+HEnqh2ULrL5N3mpfa0nTrKpuAJa64eq0ScYi7U9VXd8OxCJJOgAHdA+WJEmaeb+W5Na2C+GTltrJES8lzaoVjSIoSdK42POgV94HvJ1m5OC3A78P/NJiOzripSYlycXAS4HdVfXMdtvbgH8PzFf3b6mqT3YToWaNLViSJGlFqureqnq4qh4B/pgpvFVAM+kS4IxFtl9UVSe0i8WVJsYCS5Ikrcj8VAKtlwG3L7WvNClVdT3wna7jkOZZYEmSpH0k+SjwGeBpSXa0I1z+TpLbktwKvBA4v9Mgpf1b9n5B7xXUOHgPliRJ2kdVnbPIZqdkUV+s6H5B7xXUONiCJUmSpEHxfkF1yQJLkiRJg+L9guqSXQQlSZLUW+39gqcAP5ZkB/BW4JQkJ9B0EbwH+JXOApwSTokxORZYkiRJ6i3vF9S0sYugBiPJMUmuTXJnkjuSvK7dfkSSrUnuah8XHUlIkiRJWisLLA3JQ8Abq+p44CTgtUmeDlwAbKuq44Bt7bokSZI0chZYGoyq2lVVN7fPHwDuBJ4CnAlsaXfbApzVTYSSJEkaOgssDVKSjcCzgRuBo6pqFzRFGHDkIvs70aAkSZLWzEEuNDhJDgM+Dry+qr6bZNljnGhQkvqtTyOkLRXr+ac/dcKRSBoHW7A0KEkOoSmuPlxVn2g33zs/H0b7uLur+CRJkjRsyxZYjsymvkjTVPVB4M6qeteCl64Gzm2fnwtcNenYJEmSNBtW0kVwfmS2m5M8AbgpyVbgNTQjs12Y5AKakdneNL5QpWWdDLwauC3JLe22twAXApcnOQ/4OvDyjuKTJEnqPbu57t+yBVY7KMD8AAEPJFk4Mtsp7W5bgOuwwFKHquoGYKkbrk6bZCySJEmaTQc0yMX+RmZLss/IbO0xm4BNABs2bFhLrBNlZS5JkiTpQK14kIu9R2Zb6XFVtbmq5qpqbt26dauJUZIkTViSi5PsTnL7gm3efy1Jy1hRC9b+RmZrW68cmU2SpGG5BHgv8KEF2y7A+68lLcEeYI2VjCLoyGySJM2Yqroe+M5em8+kue+a9vGsiQYlLcLWVk2blbRgOTKbJEmCFd5/Df29B3ta2TKwX5dga6umyEpGEXRkNkmSdECqajOwGWBubq46DkcDVlXXtwOxLeRo1+rMige5kCRJM+/e9r5rvP9aU+5Rra3Akq2t0qhZYEmSpJXy/msNSpJNSbYn2b5nz56uw9FAWGBJkqR9JPko8BngaUl2tPdcXwicnuQu4PR2XZpGK2ptdTohjcMBTTQsSVq7JBcDLwV2V9Uz221HAJcBG4F7gFdU1X2jfm9vlNdKVdU5S7zk/dfqg/nW1guxtVUTZguWJE3eJcAZe22bH/HqOGBbuy5JWoatrZo2tmBJA2VLxfRyxCtJGh1bWzVtbMGSpOmwohGvvCFbkqTpZguWJPWIcwtpli3VMj8UQz8/aVbYgiVJ08H5hSRJGgBbsA7Q/r5d8t4WSWvgiFeSNBC2Rs42W7AkacIc8UqSpOGyBUuSJswRryRJGi5bsCRJkiRpRCywNBhJLk6yO8ntC7YdkWRrkrvaxyd1GaMkSZKGzS6CGpJLgPcCH1qw7QJgW1VdmOSCdt3JW6W9eEO2JEmjYQuWBqOqrge+s9fmM4Et7fMtwFkTDUqSJEkzZdkCy25X6rmjqmoXQPt45GI7JdmUZHuS7Xv27JlogJIkSRqOlbRgXQKcsde2+W5XxwHb2nWpt6pqc1XNVdXcunXrug5HkiRJPbVsgWW3K/XcvUnWA7SPuzuOR5J6L8k9SW5LckuS7V3HI0nTZLWDXDyq21WSRbtdQdP1CtgEsGHDhlW+nbRqVwPn0kzaei5wVbfhdG9/gxmcf/pTJxiJpJ57YVV9q+sgpP1Jcg/wAPAw8FBVzXUbkWbB2Ae5sOuVJiXJR4HPAE9LsiPJeTSF1elJ7gJOb9clSdLseGFVnWBxpUlZbQvWvUnWt61XdrtaxlItBrYWjFZVnbPES6dNNBBJY7WaIeW93o5cAZ9OUsD7q2rz3jvYg0XSrFptC9Z8tyuw25UkSbPm5Kp6DvBi4LVJXrD3DvZg0ZSY/zLgprbofxRHEdY4rGSYdrtdSZKkH6iqne3jbuBK4MRuI5KWtN8vA/wiQOOwbBdBu12t3Gq6rUiS1CdJHg88pqoeaJ+/CPivHYclLWrhlwFJ5r8MuL7bqDR0Yx/kQpIkDcpRwA1Jvgh8DviTqvpUxzFJ+0jy+CRPmH9O82XA7d1GpVmw2kEuJEnSDKqqu4FndR2HtAJHAVcmgeYz70f8MkCTYIElSZKkwRnllwGOCK0DYRdBSZIkSRoRW7AkSYPnt8+SpEmxBUuSJEmSRsQCS5IkSZJGxAJLkiRJkkbEe7AkLetA71/xfhdJkjSrbMGSJEmSpBGxBUs6AENvmVnq/CRJ0r78f3NlVvN76vNnK1uwJEmSJGlELLAkSZIkaUTsIihp1ewaIUmS9Gi9LLCG8qFuEvfzDP2eIUmSZtmBfiY60NFf93eMNE59/gzbywJLkqRR6PN/4JKk6bSmAivJGcB7gIOAD1TVhSOJqjWUlqoDNavnPU7jztWl+Ld8tFkbRWg1uspV6UCYp+oLc1VdWPUgF0kOAv4QeDHwdOCcJE8fVWDSqJir6gtzVX1gnqovzFV1ZS2jCJ4IfLWq7q6q7wOXAmeOJixppMxV9YW5qj4wT9UX5qo6sZYugk8BvrFgfQfwz/feKckmYFO7+mCSLy/ys34M+NYaYumrzs77DV286Q8tdd6fqqozxvB+y+bqCvMUloi949/nJJirjzbtueo1dY169m96f+c9jlz1//+169U1dYT/Hvp6TQVzdWpM6Pq8plxdS4GVRbbVPhuqNgOb9/uDku1VNbeGWHrJ857cWy6y7VG5upI8Bf9mXccxaX3NVf9es2Ua8xTM1f3xvCf3lots8///A+B5r85augjuAI5ZsH40sHMNP08aF3NVfWGuqg/MU/WFuapOrKXA+jxwXJJjkzwWeCVw9WjCkkbKXFVfmKvqA/NUfWGuqhOr7iJYVQ8l+TXgT2mGvry4qu5Y5Y9btml2oDzvCTBXR8LznoAR5qp/r9nS1zwF/2azxlztH897FVK1T7dpSZIkSdIqrKWLoCRJkiRpAQssSZIkSRqRzgusJGck+XKSrya5oOt4xinJPUluS3JLku3ttiOSbE1yV/v4pK7jXKskFyfZneT2BdsWPc80/qD9+9+a5DndRb4083R4eQrmat/NSq4OMU/BXDVX+5Gr5unw8hTGn6udFlhJDgL+EHgx8HTgnCRP7zKmCXhhVZ2wYGz9C4BtVXUcsK1d77tLgL0nYVvqPF8MHNcum4D3TSjGFTNPgWHmKZirQzALuXoJA8pTMFfbdXN1ynPVPAWGmacw5lztugXrROCrVXV3VX0fuBQ4s+OYJu1MYEv7fAtwVoexjERVXQ98Z6/NS53nmcCHqvFZ4PAk6ycT6YqZpwPMUzBXB2pwuTrAPAVzFczVPuSqeTrAPIXx52rXBdZTgG8sWN/RbhuqAj6d5KYkm9ptR1XVLoD28cjOohuvpc6zDznQhxhHaZbzFMzVPpnlXO1znkJ/4hwVc5Ve5mofYhylWc5TGGGurnoerBHJItuGPG78yVW1M8mRwNYkf9l1QFOgDznQhxhHyTxdXB/yoA8xjpK5uq++5EBf4hwVc3VffciBPsQ4Subp4g44D7puwdoBHLNg/WhgZ0exjF1V7WwfdwNX0jQ93zvfzNg+7u4uwrFa6jz7kAN9iHFkZjxPwVztjRnP1T7nKfQnzpEwV3ubq32IcWRmPE9hhLnadYH1eeC4JMcmeSzwSuDqjmMaiySPT/KE+efAi4Dbac733Ha3c4Gruolw7JY6z6uBX2xHaDkJ+Jv55tkpYp7OTp6CudoL5mqv8xTMVXO1H7lqns5OnsIoc7WqOl2AlwBfAb4G/Oeu4xnjef4U8MV2uWP+XIEn04xUclf7eETXsY7gXD8K7AL+gabqP2+p86Rpdv3D9u9/GzDXdfxLnJN5OrA8bc/LXO3pMku5OsQ8bWM1V83Vqc9V83R4edqe11hzNe2BkiRJkqQ16rqLoCRJkiQNhgWWJEmSJI2IBZYkSZIkjYgFliRJkiSNiAWWJEmSJI2IBdYYJXlZkkry013HIu2Puaq+MFfVB+ap+sJcHQ8LrPE6B7iBZmI6aZqZq+oLc1V9YJ6qL8zVMbDAGpMkhwEn00xc9sp222OS/FGSO5Jck+STSc5uX3tukj9PclOSP02yvsPwNUPMVfWFuao+ME/VF+bq+Fhgjc9ZwKeq6ivAd5I8B/iXwEbgnwK/DDwPIMkhwH8Dzq6q5wIXA7/dRdCaSeaq+sJcVR+Yp+oLc3VMDu46gAE7B3itcKNSAAABSElEQVR3+/zSdv0Q4GNV9Qjw10mubV9/GvBMYGsSgIOAXZMNVzPMXFVfmKvqA/NUfWGujokF1hgkeTJwKvDMJEWThAVcudQhwB1V9bwJhSgB5qr6w1xVH5in6gtzdbzsIjgeZwMfqqqfrKqNVXUM8FfAt4B/1fZvPQo4pd3/y8C6JD9ohk3yjC4C18wxV9UX5qr6wDxVX5irY2SBNR7nsO83AB8HfgLYAdwOvB+4Efibqvo+TaK/M8kXgVuA508uXM0wc1V9Ya6qD8xT9YW5Okapqq5jmClJDquqB9um2c8BJ1fVX3cdl7Q3c1V9Ya6qD8xT9YW5unbegzV51yQ5HHgs8HYTVlPMXFVfmKvqA/NUfWGurpEtWJIkSZI0It6DJUmSJEkjYoElSZIkSSNigSVJkiRJI2KBJUmSJEkjYoElSZIkSSPy/wGWTk6BpNHf4gAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# visualize the correlation between Fare-bin and Age\n", + "grid = sns.FacetGrid(df, col='Fare-bin', size=3, aspect=0.8, sharey=False)\n", + "grid.map(plt.hist, 'Age', alpha=.5, bins=range(0,105,5))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": { + "_cell_guid": "d63fb4e1-0c8d-423d-bf93-5821b08fde51", + "_execution_state": "idle", + "_uuid": "d8472c4371963ccdc90498b657d3f7f26e9f1e99" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Age
Fare-bin
127.848315
227.898241
326.144009
430.728604
535.877913
\n", + "
" + ], + "text/plain": [ + " Age\n", + "Fare-bin \n", + "1 27.848315\n", + "2 27.898241\n", + "3 26.144009\n", + "4 30.728604\n", + "5 35.877913" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the mean Age for each Fare-bin\n", + "df[['Fare-bin', 'Age']].groupby(['Fare-bin']).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": { + "_cell_guid": "40a7c6d6-c3ae-46f0-baef-352ff8509783", + "_execution_state": "idle", + "_uuid": "3fb0839aa1bb049ea5998f458ad939272a736345" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Age
Fare-bin
110.812580
210.995650
313.523208
417.466022
515.150724
\n", + "
" + ], + "text/plain": [ + " Age\n", + "Fare-bin \n", + "1 10.812580\n", + "2 10.995650\n", + "3 13.523208\n", + "4 17.466022\n", + "5 15.150724" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the standard deviation of Age for each Fare-bin\n", + "df[['Fare-bin', 'Age']].groupby(['Fare-bin']).std()" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": { + "_cell_guid": "37c1feaf-efa8-467a-8fe3-90b8773bde01", + "_execution_state": "idle", + "_uuid": "cab042b668d7f35dd2b778162835bcb5a0bc1aed" + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAqsAAAGoCAYAAABhZ6zCAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3X20ZGV55/3vT140CgaVxiDdnWYmjQkyRvAMYphJCMg8DfrQZoIZSFQ0JD1JNBo0T8Q4S4yZrIVJRhJfQtIKA6gRCGrsYYjaQYg6KyANIgIt0EEDHdBuRFGikbRezx+12zkc6vR5rdq7qr6ftWqd2rvu2vva61x9+qq77n3fqSokSZKkLnpc2wFIkiRJs7FYlSRJUmdZrEqSJKmzLFYlSZLUWRarkiRJ6iyLVUmSJHWWxeoCJXlTktuS3JLk5iTPa/a/N8nhzfOHZ3nvMUmub963NclbBhjn45NclmRbc841gzqXumWEcvSnk9yUZFeSUwd1HnXPCOXo65Lc3sR5dZIfHdS51D0jlKe/luQLzbk+szu2cRLnWZ2/JM8H3g4cV1XfTXIgsG9V3Tej3cNVtV+f998B/EJVfT7JXsAzq+r2AcX6G8Czq+rXkpwG/FxV/ZdBnEvdMWI5ugZ4MvDbwKaqumIQ51G3jFiO/ixwfVV9O8mvNzH7d3QCjFiePrmqvtk8PwX4japaN4hztcWe1YU5GHigqr4LUFUP7E7cJNcmmdrdMMn/aHqNrk6yotl9EHB/897v7U7cJG9J8r4kn0xyV5JfXYZY1wMXN8+vAE5IkmU4rrptZHK0qr5cVbcA31/qsTRSRilHr6mqbzeb1wErl3pMjYxRytNvTtt8EjB2vZAWqwvzCWBVkjuT/FmSn5ml3ZOAm6rqKODvgHOa/ecBdyT5SJL/muQJ097zbOCFwPOBNyd5xsyDJvl0080/8/GCPjEcAtwLUFW7gIeApy3imjVaRilHNZlGNUfPBP5mAdep0TZSeZrkVUn+AfhD4DWLuuIOs1hdgKp6GHgusAHYCVyW5BV9mn4fuKx5/n7gPzTvfyswRe8fwS8CH5v2no9W1Xeq6gHgGuDoPuf/j1X1nD6Pv+0TQ79e1LH7tKVHG7Ec1QQaxRxN8tLmnH+0sKvVqBq1PK2qd1fVvwXeAPy3hV9xt+3ddgCjpqq+B1wLXJvkC8AZwEVzvW3a+/8BOD/Je4CdSZ42s80s2yT5NLB/n+P/dp8E3g6sArYn2Rv4YeDBOeLUGBihHNWEGqUcbXqy3gT8zO6vhDUZRilPp7kUOH+OGEeOPasLkOSZSdZO2/Uc4B/7NH0csPvu5l8EPtO8/4XTxo2uBb4HfKPZXp/kCU0yHwfcMPOgC/yktYnePyyaWD5Z3k039kYsRzWBRilHkxwJ/AVwSlXtWPDFamSNWJ5Oj/OFwF3zvtARYc/qwuwHvDPJAcAuYBu9rwhm+mfgWUlupDdWdPfdoy8Dzkvy7eb9v1RV32vy+bPA/wZWA79fM+44XIQLgPcl2UavR/W0JR5Po2FkcjTJvwc+AjwF+H+T/F5VPWspx9RIGJkcpfe1/37AXzXHv6eqTlniMTUaRilPX918A/CvwNf5vx1VY8OpqzogvfnXHq6qP247Fqkfc1RdZ45qFJini+MwAEmSJHWWPauSJEnqLHtWJUmS1FkWq5IkSeqsThSr69atK3rzjPnwUXSUeepjxqNzzFEfMx6dY476mPGYl04Uqw888EDbIUhzMk/Vdeaous4c1WJ0oliVJEmS+rFYlSRJUmdZrEqSJKmzLFYlSdKiJdkryeeSXNl2LBpPFquSJGkpXgtsbTsIja85i9UkFybZkeTWPq/9dpJKcmCznSTvSLItyS1JjhpE0JIkqX1JVgIvBN7bdiwaX3vPo81FwLuAS6bvTLIKOBG4Z9ruk4C1zeN5wPnNz046b/OdffefdeJhQ45EWjjzV11hLk60PwF+B9h/tgZJNgAbAFavXt23jTmkPZmzZ7WqPgU82Oel8+gl6PRJXdcDl1TPdcABSQ5elkglSVJnJHkRsKOqbtxTu6raWFVTVTW1YsWKIUWncbKoMatJTgH+qao+P+OlQ4B7p21vb/b1O8aGJFuSbNm5c+diwpAkSe05FjglyZeBS4Hjk7y/3ZA0jhZcrCZ5IvAm4M39Xu6zr+9yWn7SkiRpdFXVG6tqZVWtAU4DPllVL205LI2h+YxZnenfAocCn08CsBK4KcnR9HpSV01ruxK4b6lBSpIkaTItuGe1qr5QVQdV1Zrm09R24Kiq+gqwCXh5MyvAMcBDVXX/8oYsSZK6pKquraoXtR2HxtN8pq76IPD3wDOTbE9y5h6aXwXcDWwD3gP8xrJEKUmSpIk05zCAqjp9jtfXTHtewKuWHpYkSZLkClaaADOXAkxyaJLrk9yV5LIk+7YdoyRJ6s9iVZNg5lKAbwPOq6q1wNeBPQ1tkQaq3yqBSf4oyReblQA/kuSANmOUpDZZrGqszVwKML0pLI4HrmiaXAy8uJ3oJKC3SuC6Gfs2A0dU1bOBO4E3DjsoSeoKi1WNu91LAX6/2X4a8I2q2tVsz7pwBbh4hQav3yqBVfWJaTl6Hb1pACVpIlmsamzNshTgvBeuABevUCf8MvA3s73oBypJ485iVePsMUsB0utpPSDJ7pkwXLhCnZXkTcAu4AOztfEDlaRxZ7GqsTXLUoC/BFwDnNo0OwP4aEshSrNKcgbwIuCXmmkBJWkiWaxqEr0BeF2SbfTGsF7QcjzSoyRZRy9PT6mqb7cdjyS1ac5FAaRxUFXXAtc2z+8Gjm4zHmm3ZpXA44ADk2wHzqF39//jgc29CSy4rqp+rbUgJalFFquS1KJZVgm0t1+SGg4DkCRJUmdZrEqSJKmzLFYlSZLUWXMWqwtdtzrJG5NsS3JHkv9nUIFLkiRp/M2nZ/Ui5rludZLD6c1n+azmPX+WZK9li1aSJEkTZc5idYHrVq8HLq2q71bVl4BtOEWQJEmSFmk5xqxOX7f6EODeaa9tb/Y9hutZS5IkaS5LKlb7rFudPs36LhPoetaSJEmay6IXBZi2bvUJ09at3g6smtZsJXDf4sOTJEnSJFtUz+oe1q3eBJyW5PFJDgXWAp9depiSJEmaRHP2rC5k3eqqui3J5cDt9IYHvKqqvjeo4CVJkjTe5ixWF7pudVX9AfAHSwlKkiRJAlewkiRJUodZrEqSJKmzFj0bwDg7b/OdffefdeJhQ45E0rhLciG9mVV2VNURzb6nApcBa4AvA79QVV9vK0ZJapM9q5LUrot47JLWZwNXV9Va4OpmW5ImksWqJLWo35LW9Jauvrh5fjHw4qEGJUkdYrEqSd3z9Kq6H6D5eVDL8Uh9JXlCks8m+XyS25L8XtsxafxYrErSCEuyIcmWJFt27tzZdjiaPN8Fjq+qnwSeA6xLckzLMWnMWKxKUvd8NcnBAM3PHbM1rKqNVTVVVVMrVqwYWoASQPU83Gzu0zxqD2+RFsxiVZK6ZxNwRvP8DOCjLcYi7VGSvZLcTO9D1eaqur7tmDRenLpKklo0y5LW5wKXJzkTuAd4SXsRSnvWLKv+nCQHAB9JckRV3br79SQbgA0Aq1evXtCx25pK0iksu8ViVRoz/pEdLbMsaQ1wwlADkZaoqr6R5Fp6U7HdOm3/RmAjwNTUlEMEtGAOA5AkSYuSZEXTo0qSHwJeAHyx3ag0buYsVpNcmGRHkuld+k9NsjnJXc3PpzT7k+QdSbYluSXJUYMMXpIktepg4JoktwA30BuzemXLMWnMzGcYwEXAu4BLpu3bvbrKuUnObrbfAJwErG0ezwPOb35KrUjyBOBTwOPp5fsVVXVOkkOBS4GnAjcBL6uqR9qLdPAcHiBpuVXVLcCRbceh8TZnz+oCV1dZD1zSTGVxHXDA7ulXpJbMNgfg24DzmuUsvw6c2WKMkiRpFosdszrb6iqHAPdOa7e92Se1Yg9zAB4PXNHsdzlLSZI6arlvsEqffX3v/HPVFQ3LzDkAgX8AvlFVu5omfqiSJKmjFluszra6ynZg1bR2K4H7+h3AVVc0LFX1vap6Dr18PBr4iX7N+r3XD1WSJLVrscXqbKurbAJe3swKcAzw0O7hAlLbquobwLXAMfTGU+++wdAPVZIkddR8pq76IPD3wDOTbG9WVDkXODHJXcCJzTbAVcDdwDbgPcBvDCRqaZ5mmQNwK3ANcGrTzOUsJUnqqDmnrlrI6ipVVcCrlhpUVzn1z0g6GLg4yV70PpxdXlVXJrkduDTJfwc+B1zQZpCSJKk/l1vVWJttDsCqupve+FVJktRhLrcqSZKkzrJYlSRJUmdZrEqSJKmzLFYlSZLUWRarktRRSc5KcluSW5N8MMkT2o5JkobN2QCkjpttyjSNtySHAK8BDq+q7yS5HDgNuKjVwCRpyOxZlaTu2hv4oWa1tScyy0prkjTOLFYlqYOq6p+APwbuAe6nt3z1J2a2S7IhyZYkW3bu3DnsMCVp4CxWJamDkjwFWA8cCjwDeFKSl85sV1Ubq2qqqqZWrFgx7DAlaeAcsyp1RNfGprq8cOteAHypqnYCJPkw8FPA+1uNSpKGzJ5VSeqme4BjkjwxSYATgK0txyRJQ2exKkkdVFXXA1cANwFfoPf3emOrQUlSC5ZUrPabAzDJoUmuT3JXksuS7LtcwUrSJKmqc6rqx6vqiKp6WVV9t+2YJGnYFl2sTpsDcKqqjgD2ojcH4NuA86pqLfB14MzlCFSSJEmTZ6k3WO2eA/Bf6c0BeD9wPPCLzesXA28Bzl/ieZakazeuSJIkaX4W3bPabw5A4EbgG1W1q2m2HTik3/udG1CSJElzWXTP6ow5AL8B/BVwUp+m1e/9VbWR5maBqampvm2kUeAUT5IkDc5SbrD6wRyAVfWvwO45AA9olgYEWInLA0qSJGmRllKs9psD8HbgGuDUps0ZwEeXFqIkSZIm1VLGrM42B+AbgNcl2QY8DbhgGeKUJEnSBFrSbABVdQ5wzozddwNHL+W4kiRJEriClSRJkjrMYlWSJC1KklVJrkmytVnR8rVtx6Txs9RFASRJ0uTaBby+qm5Ksj9wY5LNVXV724FpfNizKkmSFqWq7q+qm5rn3wK2MstiQNJi2bMqSZKWLMka4Ejg+hn7NwAbAFavXj30uMBl10edPauSJGlJkuwHfAj4rar65vTXqmpjVU1V1dSKFSvaCVAjzWJVY222wf9Jnppkc5K7mp9PaTtWSRpFSfahV6h+oKo+3HY8Gj8Wqxp3uwf//wRwDPCqJIcDZwNXV9Va4OpmW+qUJAckuSLJF5sPXM9vOyZpumYFywuArVX19rbj0XhyzOoymG0szFknHjbkSDRTVd0P3N88/1aS3YP/1wPHNc0uBq6lt/qa1CV/Cnysqk5Nsi/wxLYDkmY4FngZ8IUkNzf7freqrmoxJo0Zi1VNjBmD/5/eFLJU1f1JDmoxNOkxkjwZ+GngFQBV9QjwSJsxSTNV1WeAtB2HxpvFqibCzMH/vW+u5vW+1u9iHTTvku2sfwPsBP5nkp8EbgReW1X/PL3RJOSopMnmmFWNvVkG/381ycHN6wcDO/q917tY1aK9gaOA86vqSOCf6TO22hyVNO4sVjXW9jD4fxNwRvP8DOCjw45NmsN2YHtV7Z6z8gp6xaskTZQlFav97lR1SiB1zO7B/8cnubl5nAycC5yY5C7gxGZb6oyq+gpwb5JnNrtOAFzCUtLEWeqY1X53qv4uvSmBzk1yNr2vrbzLWq2YY/D/CcOMRVqE3wQ+0Px9vRt4ZcvxSNLQLbpYne1O1SROCSRJy6Cqbgam2o5Dktq0lGEA0+9U/VyS9yZ5EjOmBAL6TgmUZEOSLUm27Ny5cwlhSJIkaVwtpVid152qs/EOVkmSJM1lKcXqbHeqzmtKIEmSJGkuiy5W93CnqlMCSZIkaVksdTaAfneqPg64PMmZwD3AS5Z4DkkdMtuKV2edeNiQI5EkTYIlFat7uFPVKYEkSZK0ZEvtWZUkTYjZetUlaZBcblWSJEmdZbEqSZKkzrJYlSRJUmdZrEqSJKmzLFYlSZLUWRarkiRJ6iyLVUmSJHWW86wOkCv9SJIkLY09q5LUYUn2SvK5JFe2HYsktcFiVZK67bXA1raDkKS2WKxKUkclWQm8EHhv27FIUlscsypJ3fUnwO8A+8/WIMkGYAPA6tWr+7Zx/LykUbbkntWZ46mSHJrk+iR3Jbksyb5LD1OSJkuSFwE7qurGPbWrqo1VNVVVUytWrBhSdJI0PMsxDGDmeKq3AedV1Vrg68CZy3AOSZo0xwKnJPkycClwfJL3txuSJA3fkorVmeOpkgQ4HriiaXIx8OKlnEOSJlFVvbGqVlbVGuA04JNV9dKWw5KkoVvqmNWZ46meBnyjqnY129uBQ/q9cT7jrMaV48c0Scx3SdJSLLpndZbxVOnTtPq933FWkjQ/VXVtVb2o7TikmZJcmGRHklvbjkXjaynDAB4znopeT+sBSXb32K4E7ltShJIkqasuAta1HYTG26KL1VnGU/0ScA1watPsDOCjS45SkiR1TlV9Cniw7Tg03gaxKMAbgNcl2UZvDOsFAziHNC/9vqJK8tQkm5vp1TYneUqbMUqSpNkty6IAVXUtcG3z/G7g6OU4rrQMLgLeBVwybd/ZwNVVdW6Ss5vtN7QQmySNvUHcUD3bjZsaTy63qrE2y1dU6+lNqwZOryZJA+UN1Voqi1VNoqdX1f0Azc+DZmuYZEOSLUm27Ny5c2gBSpKkHotVaQ/sEZCk2SX5IPD3wDOTbE/iqpVadssyZlUaMV9NcnBV3Z/kYGBH2wFJ0iiqqtPbjkHjz55VTaJN9KZVA6dXkySp0yxWNdZm+YrqXODEJHcBJzbbkiSpgxwGoLG2h6+oThhqIJIkaVHsWZUkSVJnWaxKkiSpsxwGIA3IbCusnHXiYUOORJKk0WXPqiR1UJJVSa5JsjXJbUle23ZMktQGe1Y7xJ44SdPsAl5fVTcl2R+4Mcnmqrq97cAkaZgWXawmWQVcAvwI8H1gY1X9aZKnApcBa4AvA79QVV9feqjSeJjtQ4k0XbMU8O5lgb+VZCtwCGCxKmmiLKVnte+nfuAVwNVVdW6Ss4GzgTcsPVRJmkxJ1gBHAtf3eW0DsAFg9erVCzruoD84Lde3RQs9zkKvy2+vpG5bdLG6h0/964HjmmYXA9disbokDg+QJleS/YAPAb9VVd+c+XpVbQQ2AkxNTdWQw5OkgVuWG6xmfOp/elPI7i5oD5rlPRuSbEmyZefOncsRhiSNlST70CtUP1BVH247Hklqw5KL1bk+9c+mqjZW1VRVTa1YsWKpYUjSWEkS4AJga1W9ve14JKktSypWZ/nU/9UkBzevHwzsWFqIkjSRjgVeBhyf5ObmcXLbQUnSsC1lNoDZPvVvAs4Azm1+fnRJEUrSBKqqzwBpOw5JattSZgPY/an/C0lubvb9Lr0i9fIkZwL3AC9ZWoiSJEmaVEuZDWBPn/pPWOxxJUmSpN1cwWqEOaWVJEkad8sydZUkSZI0CPasSloWLiMrSRoEe1YlSZLUWZ3uWXVMpiSNH3vhJS2EPauSJEnqrE73rM7GT+WSJEmTwZ5VSZIkdZbFqiRJkjrLYlWSJEmdZbEqSZKkzhrJG6y0OE4FJkmSRo3F6hhytgRJkjQuBlasJlkH/CmwF/Deqjp3UOeSFsMcbddyfaga528GzFGNAvNUgzaQMatJ9gLeDZwEHA6cnuTwQZxLWgxzVF1njmoUmKcahkH1rB4NbKuquwGSXAqsB24f0Pk0AGM+xtUcVdeZoxoF5qkGblDF6iHAvdO2twPPm94gyQZgQ7P5cJI7+hznQOCBgUTYTa1c7+sG3H4PZrvej1XVuuU7TV9z5iiYp7Po1PUuYz7Opq08NUfnsIff/YKudwg5NGid/ls6Tjk6hP//xtWScnRQxWr67KtHbVRtBDbu8SDJlqqaWs7AuszrHe7p++yrx+wwTx/D6x3eqfvsM0fnwesd7un77PP/+zl4vQszqHlWtwOrpm2vBO4b0LmkxTBH1XXmqEaBeaqBG1SxegOwNsmhSfYFTgM2Dehc0mKYo+o6c1SjwDzVwA1kGEBV7UryauDj9KayuLCqblvEofb4tcEY8nqHZBlzFPy9jbtWrtccXRKvd0j8/37RvN4FSNVjhkBJkiRJnTCoYQCSJEnSklmsSpIkqbMsViVJUmuSrEtyR5JtSc5uO55BSPLlJF9IcnOSLc2+pybZnOSu5udT2o5zsZJcmGRHklun7et7fel5R/P7viXJUXMd32JVkiS1YsKWa/3ZqnrOtPlGzwaurqq1wNXN9qi6CJg5uf9s13cSsLZ5bADOn+vgFquSJKktP1iutaoeAXYv1zoJ1gMXN88vBl7cYixLUlWfAh6csXu261sPXFI91wEHJDl4T8e3WJUkSW3pt1zrIS3FMkgFfCLJjc3yswBPr6r7AZqfB7UW3WDMdn0L/p0ParlVSZKkucxrWeExcGxV3ZfkIGBzki+2HVCLFvw7t2dVkiS1ZSKWa62q+5qfO4CP0Bv+8NXdX383P3e0F+FAzHZ9C/6dW6xKkqS2jP1yrUmelGT/3c+B/wTcSu86z2ianQF8tJ0IB2a269sEvLyZFeAY4KHdwwVm4zAASZLUimVeVrirng58JAn06q6/rKqPJbkBuDzJmcA9wEtajHFJknwQOA44MMl24BzgXPpf31XAycA24NvAK+c8vsutSpIkqascBiBJkqTOsliVJElSZ1msSpIkqbMsViVJktRZFquSJEnqLItVSZKkZZbk55JUkh9vO5ZRZ7EqSZK0/E4HPkNvoQMtgcWqJEnSMkqyH3AscCZNsZrkcUn+LMltSa5MclWSU5vXnpvk75LcmOTju5cpVY/FqiRJ0vJ6MfCxqroTeDDJUcB/BtYA/w74FeD5AEn2Ad4JnFpVzwUuBP6gjaC7yuVWJUmSltfpwJ80zy9ttvcB/qqqvg98Jck1zevPBI4ANjdLsu4F3D/ccLvNYlWSJGmZJHkacDxwRJKiV3wW8JHZ3gLcVlXPH1KII8dhAJIkScvnVOCSqvrRqlpTVauALwEPAD/fjF19OnBc0/4OYEWSHwwLSPKsNgLvKovVBUrypmZw9C1Jbk7yvGb/e5Mc3jx/eJb3HpPk+uZ9W5O8ZQjxntpMnTE16HOpG0YlR5O8IsnO5lw3J/mVQZ1L3TNCebo6yTVJPtfEevKgzqWxcTqP7UX9EPAMYDtwK/AXwPXAQ1X1CL0C921JPg/cDPzU8MLtvlRV2zGMjOZTz9uB46rqu0kOBPatqvtmtHu4qvbr8/47gF+oqs8n2Qt4ZlXdPsB49wf+N7Av8Oqq2jKoc6kbRilHk7wCmKqqVw/i+OquEcvTjcDnqur8poi+qqrWDOJcGn9J9quqh5uhAp8Fjq2qr7QdV9fZs7owBwMPVNV3Aarqgd1/XJNcO733Msn/SHJTkquTrGh2H0QzaLqqvrf7j2uStyR5X5JPJrkrya8uU7y/D/wh8C/LdDx136jlqCbTKOVpAU9unv8wcN8e2kpzuTLJzcCngd+3UJ0fi9WF+QSwKsmd6c2V9jOztHsScFNVHQX8HXBOs/884I4kH0nyX5M8Ydp7ng28kN5UFm9O8oyZB03y6WlfmU5/vKBP2yOBVVV15eIvVyNoZHK08fPNV6tXJFm1mAvWSBqlPH0L8NIk24GrgN9czAVLAFV1XFU9p6oOr6qL2o5nVFisLkBVPQw8F9gA7AQua77KnOn7wGXN8/cD/6F5/1uBKXp/qH8R+Ni093y0qr5TVQ8A1wBH9zn/f2ySfObjb6e3S/I4en/MX7/4q9UoGpUcbfwvYE1VPRv4W+DihV+xRtGI5enpwEVVtRI4GXhf8zdW0pA4ddUCVdX3gGuBa5N8ATgDuGiut017/z8A5yd5D7CzGbfyqDazbJPk08D+fY7/2zP+yO5Pb862a9Obs+1HgE1JTnHc6vgbkRylqr42bfM9wNvmiFFjZFTylN4KROuac/5904t7ILBjjlglLRM/HS5AkmcmWTtt13OAf+zT9HH07uyD3qf+zzTvf2Ga6hFYC3wP+EazvT7JE5o/uMcBN8w86Hx7A6rqoao6sJkyYw1wHWChOgFGJUebc01fTvAUYOu8L1QjbZTyFLgHOKE5708AT6DXGyxpSOxZXZj9gHcmOQDYBWyj9zXWTP8MPCvJjcBDwH9p9r8MOC/Jt5v3/1JVfa/5m/tZenfur6Y36NpB/FqMUcrR1yQ5pTnPg8Arlng8jY5RytPXA+9Jcha9XtpXlNPoSEPl1FUdkN4cgQ9X1R+3HYvUjzmqUWCeSuPJYQCSJEnqLHtWJUmS1Fn2rEqSJKmzLFYlSZLUWZ0oVtetW1f07rL04aPoKPPUx4xH55ijPmY8pLHQiWL1gQceaDsEaU7mqbrOHJU0jjpRrEqSJEn9WKxKkiSpsyxWJUmS1FkDK1aTnJXktiS3JvlgkicM6lzSdM264J9N8vkmB3+vT5vHJ7ksybYk1ydZM/xINamSXJhkR5JbZ3k9Sd7R5OctSY4adoyS1BUDKVaTHAK8BpiqqiOAvYDTBnEuqY/vAsdX1U8CzwHWJTlmRpszga9X1Y8B5wFvG3KMmmwXAev28PpJwNrmsQE4fwgxSVInDXIYwN7ADyXZG3gicN8AzyX9QPU83Gzu0zxmTuOyHri4eX4FcEKSDClETbiq+hTw4B6arAcuaXL5OuCAJAcPJzpJ6pa9B3HQqvqnJH8M3AN8B/hEVX1iepskG+j1GLB69eoFHf+8zXf23X/WiYctJlyNoSR7ATcCPwa8u6qun9HkEOBegKraleQh4GnAAzOOs+g8nY35q3n4QX42tjf77p/Z0ByVNO4GNQzgKfR6Bg4FngE8KclLp7epqo1VNVVVUytWrBhEGJpgVfW9qnoOsBI4OskRM5r060V9zCTa5qlaMq/8BHNU0vgb1DCAFwBfqqqdVfWvwIeBnxrQuaRZVdU3gGt57PjA7cAqgGaoyg+z569lpWH6QX42VuJQKkkTalDF6j3AMUme2IwDPAHYOqBzSY+SZEWSA5rnP0Tvw9N2/aIpAAATiUlEQVQXZzTbBJzRPD8V+GRVuTyhumIT8PJmVoBjgIeq6jFDACRpEgxqzOr1Sa4AbgJ2AZ8DNg7iXFIfBwMXN+NWHwdcXlVXJnkrsKWqNgEXAO9Lso1ej6qzVWhoknwQOA44MMl24Bx6NwJSVX8OXAWcDGwDvg28sp1IJal9AylWAarqHHp/gKWhqqpbgCP77H/ztOf/ArxkmHFJu1XV6XO8XsCrhhSOJHWaK1hJkiSpsyxWJUmS1FkWq5IkSeosi1VJkiR1lsWqJEmSOstiVZIkSZ1lsSpJkqTOsliVJElSZ1msSpIkqbMsViVJktRZFquSJEnqLItVSZIkdZbFqiRJkjpr77YD2JPzNt/ZdgiSJElqkT2rkiRJ6iyLVUmSJHWWxarGSpJVSa5JsjXJbUle26fNcUkeSnJz83hzG7FKkqS5dXrMqrQIu4DXV9VNSfYHbkyyuapun9Hu01X1ohbikyRJC2DPqsZKVd1fVTc1z78FbAUOaTcqSZK0WBarGltJ1gBHAtf3efn5ST6f5G+SPGsPx9iQZEuSLTt37hxQpJIkaTYWqxpLSfYDPgT8VlV9c8bLNwE/WlU/CbwT+OvZjlNVG6tqqqqmVqxYMbiAJUlSXxarGjtJ9qFXqH6gqj488/Wq+mZVPdw8vwrYJ8mBQw5TEy7JuiR3JNmW5Ow+r69ubhb8XJJbkpzcRpyS1DaLVY2VJAEuALZW1dtnafMjTTuSHE3v38HXhhelJl2SvYB3AycBhwOnJzl8RrP/BlxeVUcCpwF/NtwoJakbnA1A4+ZY4GXAF5Lc3Oz7XWA1QFX9OXAq8OtJdgHfAU6rqmojWE2so4FtVXU3QJJLgfXA9FkrCnhy8/yHgfuGGqEkdYTFqsZKVX0GyBxt3gW8azgRSX0dAtw7bXs78LwZbd4CfCLJbwJPAl4wnNAkqVscBiBJw9fvA9XM3v3TgYuqaiVwMvC+JI/5m+2MFZLGncWqJA3fdmDVtO2VPPZr/jOBywGq6u+BJwCPuRHQGSskjTuLVUkavhuAtUkOTbIvvRuoNs1ocw9wAkCSn6BXrNp1KmniWKxK0pBV1S7g1cDH6a2ydnlV3ZbkrUlOaZq9HvjVJJ8HPgi8whsBJU2igd1gleQA4L3AEfTGYv1y81WWJE28Zo7fq2bse/O057fTm91CkibaIGcD+FPgY1V1avM11xMHeC5JkiSNoYEUq0meDPw08AqAqnoEeGQQ55IkSdL4GtSY1X9D70aA/9ksFfjeJE+a3sDpViRJkjSXQRWrewNHAec3SwX+M/Cota+dbkWSJElzGVSxuh3YXlXXN9tX0CteJUmSpHkbSLFaVV8B7k3yzGbXCTx6zWtJkiRpToOcDeA3gQ80MwHcDbxygOeSJEnSGBpYsVpVNwNTgzq+JEmSxp8rWEmSJKmzLFYlSZLUWRarkiRJ6iyLVUmSJHWWxarGSpJVSa5JsjXJbUle26dNkrwjybYktyRxDmBJkjpqkFNXSW3YBby+qm5Ksj9wY5LNVTV9nt+TgLXN43nA+c1PSZLUMfasaqxU1f1VdVPz/FvAVuCQGc3WA5dUz3XAAUkOHnKokiRpHixWNbaSrAGOBK6f8dIhwL3Ttrfz2IJ29zE2JNmSZMvOnTsHEaYkSdoDi1WNpST7AR8Cfquqvjnz5T5vqX7HqaqNVTVVVVMrVqxY7jAlSdIcLFY1dpLsQ69Q/UBVfbhPk+3AqmnbK4H7hhGbJElaGItVjZUkAS4AtlbV22dptgl4eTMrwDHAQ1V1/9CClCRJ8+ZsABo3xwIvA76Q5OZm3+8CqwGq6s+Bq4CTgW3At4FXthCnJEmaB4tVjZWq+gz9x6ROb1PAq4YTkSRJWgqHAUiSJKmzLFYlqQVJ1iW5o1lJ7exZ2vxCktub1dj+ctgxSlIXOAxAkoYsyV7Au4ET6c1OcUOSTdNXWkuyFngjcGxVfT3JQe1EK0ntsmdVkobvaGBbVd1dVY8Al9JbWW26XwXeXVVfB6iqHUOOUZI6wWJVkoZvPquoHQYcluT/JLkuybp+B3KVNUnjzmJVkoZvPquo7Q2sBY4DTgfem+SAx7zJVdYkjTmLVUkavvmsorYd+GhV/WtVfQm4g17xKkkTxWJVkobvBmBtkkOT7AucRm9lten+GvhZgCQH0hsWcPdQo5SkDrBYlaQhq6pdwKuBjwNbgcur6rYkb01yStPs48DXktwOXAP8f1X1tXYilqT2OHWVJLWgqq6it/Tv9H1vnva8gNc1D0maWPasSpIkqbMsViVJktRZFquSJEnqLItVSZIkdZbFqiRJkjrLYlWSJEmdZbEqSZKkzhpYsZpkrySfS3LloM4h9ZPkwiQ7ktw6y+vHJXkoyc3N48392kmSpPYNclGA19JbmeXJAzyH1M9FwLuAS/bQ5tNV9aLhhCNJkhZrID2rSVYCLwTeO4jjS3tSVZ8CHmw7DkmStHSD6ln9E+B3gP1na5BkA7ABYPXq1cty0vM239l3/1knHrYsx9dYeX6SzwP3Ab9dVbf1azSIPJUkSfO37D2rSV4E7KiqG/fUrqo2VtVUVU2tWLFiucOQ9uQm4Eer6ieBdwJ/PVtD81SSpHYNYhjAscApSb4MXAocn+T9AziPtChV9c2qerh5fhWwT5IDWw5LkiT1sezFalW9sapWVtUa4DTgk1X10uU+j7RYSX4kSZrnR9P7d/C1dqOSJEn9DHI2AKkVST4IHAccmGQ7cA6wD0BV/TlwKvDrSXYB3wFOq6pqKVxJkrQHAy1Wq+pa4NpBnkOaqapOn+P1d9Gb2kqSJHWcK1hJkiSpsyxWJUmS1FkWq5IkSeosi1VJkiR1lsWqJEmSOstiVZIkSZ1lsSpJLUiyLskdSbYlOXsP7U5NUkmmhhmfJHWFxaokDVmSvYB3AycBhwOnJzm8T7v9gdcA1w83QknqDotVSRq+o4FtVXV3VT0CXAqs79Pu94E/BP5lmMFJUpdYrErS8B0C3Dtte3uz7weSHAmsqqor93SgJBuSbEmyZefOncsfqSS1zGJVkoYvffbVD15MHgecB7x+rgNV1caqmqqqqRUrVixjiJLUDRarkjR824FV07ZXAvdN294fOAK4NsmXgWOATd5kJWkSWaxK0vDdAKxNcmiSfYHTgE27X6yqh6rqwKpaU1VrgOuAU6pqSzvhSlJ7LFYlaciqahfwauDjwFbg8qq6Lclbk5zSbnSS1C17tx2AJE2iqroKuGrGvjfP0va4YcQkSV1kz6okSZI6y2JVYyfJhUl2JLl1lteT5B3NykG3JDlq2DFKkqT5sVjVOLoIWLeH108C1jaPDcD5Q4hJkiQtgsWqxk5VfQp4cA9N1gOXVM91wAFJDh5OdJIkaSG8wUqTaLbVg+6f2TDJBnq9r6xevbrvwc7bfOeyBDXbcc468bBlOb4kSaPInlVNoj2uHvSona4OJElSqyxWNYnmWj1IkiR1hMWqJtEm4OXNrADHAA9V1WOGAEiSpPY5ZlVjJ8kHgeOAA5NsB84B9gGoqj+nNxH7ycA24NvAK9uJVJIkzcViVWOnqk6f4/UCXjWkcCRJ0hI4DECSJEmdZbEqSZKkzrJYlSRJUmdZrEqSJKmzBlKsJlmV5JokW5PcluS1gziPJEmSxtugZgPYBby+qm5Ksj9wY5LNVXX7gM4nSZKkMTSQntWqur+qbmqefwvYSm/tdUmSJGneBj7PapI1wJHA9TP2bwA2AKxevXqgMZy3+c6++8868bCBnleSJElLM9AbrJLsB3wI+K2q+ub016pqY1VNVdXUihUrBhmGJEmSRtTAitUk+9ArVD9QVR8e1HkkSZI0vgY1G0CAC4CtVfX2QZxDkiRJ429QPavHAi8Djk9yc/M4eUDnkiRJ0pgayA1WVfUZIIM4tiSNgyTrgD8F9gLeW1Xnznj9dcCv0JsKcCfwy1X1j0MPVJJa5gpWkjRkSfYC3g2cBBwOnJ7k8BnNPgdMVdWzgSuAPxxulJLUDRarkjR8RwPbquruqnoEuBRYP71BVV1TVd9uNq8DVg45RknqBItVSRq+Q4B7p21vZ88Lp5wJ/E2/F5JsSLIlyZadO3cuY4iS1A0Wq5I0fP3G9FffhslLgSngj/q97pzVksadxarGTpJ1Se5Isi3J2X1ef0WSndNmqviVNuLURNsOrJq2vRK4b2ajJC8A3gScUlXfHVJsktQpA19uVRqmaTeunEivILghyaaqun1G08uq6tVDD1DquQFYm+RQ4J+A04BfnN4gyZHAXwDrqmrH8EOUpG6wZ1XjZs4bV6S2VdUu4NXAx4GtwOVVdVuStyY5pWn2R8B+wF813wBsailcSWqVPasaN/1uXHlen3Y/n+SngTuBs6rq3j5tSLIB2ACwevXqZQ5Vk6yqrgKumrHvzdOev2DoQUlSB9mzqnEznxtX/hewppm/8m+Bi2c7mDevSJLULotVjZs5b1ypqq9Nu1nlPcBzhxSbJElaIItVjZsf3LiSZF96N648aqxfkoOnbZ5Cb8ygJEnqIMesaqxU1a4ku29c2Qu4cPeNK8CWqtoEvKa5iWUX8CDwitYCliRJe2SxqrEzjxtX3gi8cdhxSZKkhXMYgCRJkjrLntVlcN7mO/vuP+vEw4YciSRJ0nixZ1WSJEmdZbEqSZKkzrJYlSRJUmdZrEqSJKmzLFYlSZLUWRarkiRJ6iyLVUmSJHWWxaokSZI6y2JVkiRJnWWxKkmSpM6yWJUkSVJnWaxKkiSpsyxWJUmS1FkWq5IkSeosi1VJkiR11sCK1STrktyRZFuSswd1HmmmuXIvyeOTXNa8fn2SNcOPUpPOPJWk+RlIsZpkL+DdwEnA4cDpSQ4fxLmk6eaZe2cCX6+qHwPOA9423Cg16cxTSZq/QfWsHg1sq6q7q+oR4FJg/YDOJU03n9xbD1zcPL8COCFJhhijZJ5K0jztPaDjHgLcO217O/C86Q2SbAA2NJsPJ7mjz3EOBB4YSITA6wZ14MUff6DX20GzXe/HqmrdIo85Z+5Nb1NVu5I8BDytXyyTkKeLYJ72dCJPh5mjHczF2ZijPUvJUakzBlWs9vv0X4/aqNoIbNzjQZItVTW1nIF1mde7PIfts68W0aa30zx9DK93eQ7bZ9+i8tQcfSyvVxovgxoGsB1YNW17JXDfgM4lTTef3PtBmyR7Az8MPDiU6KQe81SS5mlQxeoNwNokhybZFzgN2DSgc0nTzSf3NgFnNM9PBT5ZVX17VqUBMU8laZ4GMgygGV/1auDjwF7AhVV12yIOtcevtsaQ17tEs+VekrcCW6pqE3AB8L4k2+j1VJ22xNP6extv45Cn/s7G26RdryZM/KAuSZKkrnIFK0mSJHWWxaokSZI6q7PF6iQs15rky0m+kOTmJFuafU9NsjnJXc3Pp7Qd52IluTDJjiS3TtvX9/rS847m931LkqPai3x+zFFztL3I58ccHf0chfHPU2kunSxW57kU4bj42ap6zrQ58s4Grq6qtcDVzfaougiYOSH1bNd3ErC2eWwAzh9SjItijpqjmKNdMs45CmOcp9J8dLJYZbKXa52+xOLFwItbjGVJqupTPHZeyNmubz1wSfVcBxyQ5ODhRLoo5miPOdpd5mjPSOcojH2eSnPqarHabynCQ1qKZZAK+ESSG5slEwGeXlX3AzQ/D2otusGY7fpG7Xc+avEuljlqjnbdJOYojE+eSnMa1HKrSzXv5TBH3LFVdV+Sg4DNSb7YdkAtGrXf+ajFu1jm6P81ar/zUYt3sczRR5uU37smSFd7Vidiudaquq/5uQP4CL2v7b66+yub5ueO9iIciNmub9R+56MW76KYo+Zo101ojsL45Kk0p64Wq2O/XGuSJyXZf/dz4D8Bt/LoJRbPAD7aToQDM9v1bQJe3tzJegzw0O6vuDrKHO0xR7vLHO0ZxxyF8clTaU6dHAawjMu1dtnTgY8kgd7v4S+r6mNJbgAuT3ImcA/wkhZjXJIkHwSOAw5Msh04BziX/td3FXAysA34NvDKoQe8AOaoOYo52gVjn6Mw3nkqzYfLrUqSJKmzujoMQJIkSbJYlSRJUndZrEqSJKmzLFYlSZLUWRarkiRJ6iyL1SFL8nNJKsmPtx2L1I85qq4zR6XJYrE6fKcDn6E3QbfUReaous4clSaIxeoQJdkPOBY4k+aPbJLHJfmzJLcluTLJVUlObV57bpK/S3Jjko/vXlpPGhRzVF1njkqTx2J1uF4MfKyq7gQeTHIU8J+BNcC/A34FeD5Akn2AdwKnVtVzgQuBP2gjaE0Uc1RdZ45KE6aTy62OsdOBP2meX9ps7wP8VVV9H/hKkmua158JHAFsbpYS3AtwfWcNmjmqrjNHpQljsTokSZ4GHA8ckaTo/dEs4COzvQW4raqeP6QQNeHMUXWdOSpNJocBDM+pwCVV9aNVtaaqVgFfAh4Afr4Zc/V04Lim/R3AiiQ/+DorybPaCFwTwxxV15mj0gSyWB2e03nsp/8PAc8AtgO3An8BXA88VFWP0PvD/LYknwduBn5qeOFqApmj6jpzVJpAqaq2Y5h4Sfarqoebr7g+CxxbVV9pOy5pN3NUXWeOSuPLMavdcGWSA4B9gd/3D6w6yBxV15mj0piyZ1WSJEmd5ZhVSZIkdZbFqiRJkjrLYlWSJEmdZbEqSZKkzrJYlSRJUmf9/8SJjUAUDBqFAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# visualize the correlation between SibSp and Age\n", + "grid = sns.FacetGrid(df, col='SibSp', col_wrap=4, size=3.0, aspect=0.8, sharey=False)\n", + "grid.map(plt.hist, 'Age', alpha=.5, bins=range(0,105,5))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": { + "_cell_guid": "ceab6b91-e6bd-4c0f-b7c2-31847b1ee61b", + "_execution_state": "idle", + "_uuid": "612aec4215b2e4c16806aa768ddd1f565fdfe78b" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Age
SibSp
030.921766
131.058071
223.569444
316.312500
48.772727
510.166667
814.500000
\n", + "
" + ], + "text/plain": [ + " Age\n", + "SibSp \n", + "0 30.921766\n", + "1 31.058071\n", + "2 23.569444\n", + "3 16.312500\n", + "4 8.772727\n", + "5 10.166667\n", + "8 14.500000" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the mean Age for each SibSp\n", + "df[['SibSp', 'Age']].groupby(['SibSp']).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": { + "_cell_guid": "eb7a277e-0e3c-4507-ba41-2c9d223f293a", + "_execution_state": "idle", + "_uuid": "d40475272e30ea20b5eeff47d49a5664a2a8303f" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Age
SibSp
013.059751
115.974482
214.910640
311.824938
48.005545
55.192944
8NaN
\n", + "
" + ], + "text/plain": [ + " Age\n", + "SibSp \n", + "0 13.059751\n", + "1 15.974482\n", + "2 14.910640\n", + "3 11.824938\n", + "4 8.005545\n", + "5 5.192944\n", + "8 NaN" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the standard deviation of Age for each SibSp\n", + "df[['SibSp', 'Age']].groupby(['SibSp']).std()" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": { + "_cell_guid": "ab9d9000-de9f-4ca0-b817-c53184d74c9a", + "_execution_state": "idle", + "_uuid": "b2201e9af0c379e0b1bcc0e7a23e08043c241645" + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAqkAAAGoCAYAAABlknz/AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzs3X+8XXV95/vXu4DWqiOgURGI0DHaglNRT1Ev0178AQauY7TFMXSuxlYntQOPqrWPEe2tWmwfD+1UaSsWGiUX6FjQqmimjWJKddSpIgERCYhESoeYXAiEogyoE/zcP/Y6sDnZOzknZ/9Ye5/X8/HYj73Xd33X2p/F+bDy2Wt911qpKiRJkqQ2+alxByBJkiTNZZEqSZKk1rFIlSRJUutYpEqSJKl1LFIlSZLUOhapkiRJah2L1D6SPJDk2iTXJ/mbJD8zgHW+Lsm5g4hvznrfnmRrkpuSvHTQ61f7TUq+Jnl8ki8kuXcY/y9oMkxQvp6U5Ook32reXzTI9WsyTFC+Ht/EeW2SbyZ55SDXPw4Wqf3dX1XHVdUzgR8Db5zvgkkOGF5Ye3zXMcBq4FhgJfAXo/x+tcZE5CvwQ+D3gd8d4XeqfSYlX+8E/l1V/RtgDfBXI/xutcek5Ov1wExVHUenHvjLJAeO8PsHziJ1fr4MPA0gyaebX9Rbkqyd7dAcGTo7yZXAC5L8YpJ/bH7NfD3JY5uuT0nyuSQ3J/njAcS2Cri0qn5UVf8EbAWOH8B6Nblam69V9b+q6it0ilUJ2p2v36iq7c3kFuCnkzxysevVRGtzvt5XVbubyZ8GJv5pTRNdYY9C8yvkFOBzTdNvVNWuJI8Crkryyaq6C3g0cH1VvTPJI4BvA6+uqquS/Cvg/mb544BnAz8Cbkrywaq6bc53ngO8sEc4l1bVe+e0HQ58rWt6W9OmJWgC8lV60ITl668C36iqH+3n5mrCTUK+JnkesB54KvCarqJ1Ilmk9veoJNc2n78MXNB8/u2ucR5HAiuAu4AHgE827c8AdlTVVQBV9X2AJABXVNU9zfQNdBLpYUlZVW9ZQJzp0Tbxv560YJOSrxJMWL4mORZ4H3DyQpfVVJiYfK2qK4Fjk/w8cFGSz1bVxJ65skjt7/5mXMeDkpwIvAR4QVXdl+SLdA6pA/ywqh6Y7Ur/QrH7V/gD9PgbLPCX0zY6/3PMOgLYjpaaSclXCSYoX5McAVwGvLaqvtt3izTNJiZfZ1XVjUn+F/BMYHO/fm1nkbowjwPubhLy54Dn9+n3bTpjTX6xObz/WB46vL9PC/zltAH46yQfAJ5C55fc1xewvKZXG/NV6qd1+ZrkYODvgLdX1f+Y73JaEtqYr0cDt1XV7iRPpXMU99b5Lt9GFqkL8zngjUmuA27i4WNBH1RVP07yauCDzViV++n84hq4qtqS5OPADcBu4IyuX3Ba2lqXrwBJbgX+FfCIJK8ATq6qG4b1fZoYbczXM+lcJPP7SX6/aTu5qu4Y0vdpcrQxX/8tcFaS/w38BPhPVXXnkL5rJFLl8EVJkiS1i7egkiRJUutYpErSCCU5Mp2nbt3Y3F/xTU37oUk2NfdM3JTkkD7Lr2n63JxkzWijl6TR8XS/JI1QksOAw6rqmuYiiquBVwCvA3ZV1XuTnAUcUlVvm7PsoXSu1J2hc8Xw1cBzq+ruUW6DJI2CR1IlaYSqakdVXdN8/gFwI50HcKwCLmq6XUSncJ3rpcCmqtrVFKab6Dz+UJKmTiuK1JUrVxadowK+pvc1NczXJfEaiSRH0XnizJXAk6pqB0Dz/sQeixzOw2/23fcJc0nWJtmcZPOxxx477v+evqYgX0fFfezUv+atFUXqnXdO9B0StMSYrxqEJI+h81SaN88+hWY+i/Vo67nTr6p1VTVTVTOPetSj9jdMaeTcx2pWK4pUSVpKkhxEp0D9aFV9qmm+vRmvOjtutde9OH3CnKQlwyJVkkYonYd2XwDcWFUf6Jq1AVjTfF4DfKbH4pcDJyc5pLn6/+SmTZKmjkWqJI3WCcBrgBclubZ5nQq8Fzgpyc3ASc00SWaSfASgqnYB7wGual5nN22SNHV8LKokjVBVfYXeY0sBXtyj/2bgDV3T64H1w4lOmr8kRwIXA0+m8xjOdVX1Z3P6BPgz4FTgPuB1s3e3kPbFIlWSJO2P3cBbu+/5m2RTVd3Q1ecUYEXzeh5wXvMu7ZOn+yVJ0oLt5Z6/3VYBF1fH14CDZy8QlPZlSR5JPWfTd/Zoe8tJTx9DJJom5pUmSa98BXNW+2fOPX+79bu37445y68F1gIsX758WGHuYZT/H/j/3MJ5JFWSJO23fdzzd1739u2+r++yZcuGEaYmkEWqJEnaL33u+dvNe/tqv+2zSE2yPskdSa7vant3ku/NuX3K7Ly3J9ma5KYkLx1W4JIkaXz2cs/fbhuA16bj+cA9s4//lfZlPmNSLwTOpXObiW7nVNWfdDckOQZYDRwLPAX4+yRPr6oHBhCrJElqj9l7/n4rybVN2zuA5QBVdT6wkc7tp7bSuQXVr48hTk2ofRapVfWlZkD0fKwCLq2qHwH/lGQrcDzw1f2OUJIktc4+7vk726eAM0YTkabNYsaknpnkumY4wCFNW7+r+PaQZG2SzUk279y5cxFhSJIkadrsb5F6HvCvgePo3Ebi/U37vK7iA6/k0+gl+ekkX0/yzSRbkvxB0350kiuT3JzkY0keMe5YJUla6varSK2q26vqgar6CfBhOqf0wav41G4/Al5UVc+i8wNrZTOQ/310xlivAO4GXj/GGCVJEvtZpM55WsQrgdkr/zcAq5M8MsnRdB6D9vXFhSgNRvPEk3ubyYOaVwEvAj7RtF8EvGIM4UmSpC77vHAqySXAicATkmwD3gWcmOQ4Ov/A3wr8JkBVbUnyceAGOs/0PcMr+9UmSQ4ArgaeBnwI+C7wL1W1u+nScxz1uJ6GoumTZD3wMuCOqnpm0/Yx4BlNl4Pp5ORxPZa9FfgB8ACwu6pmRhK0JI3BfK7uP71H8wV76f9HwB8tJihpWJofTcclORi4DPj5Xt16LLcOWAcwMzPTc5y1NE8XMue2flX16tnPSd4P3LOX5V9YVXcOLTpJagmfOKUlqar+Bfgi8Hzg4CSzP9gcR62hqqovAbt6zWtujv7vgUtGGpQktZBFqpaMJMuaI6gkeRTwEuBG4AvAaU23NcBnxhOhxC8Bt1fVzX3mF/D5JFc3Q1D68jZ/kibdfJ44JU2Lw4CLmnGpPwV8vKr+NskNwKVJ/hD4BnsZziIN2ens/SjqCVW1PckTgU1Jvt0cmd2DQ1QkTTqLVC0ZVXUd8Owe7bfw0G3UpLFohpz8CvDcfn2qanvzfkeSy+jkbc8iVZImnaf7JakdXgJ8u6q29ZqZ5NFJHjv7GTiZh27/J0lTxyJVkkaoua3fV4FnJNmWZPbhEauZc6o/yVOSbGwmnwR8Jck36dx/+u+q6nOjiluSRs3T/ZI0Qn1u60dVva5H23bg1ObzLcCzhhqcJLWIRWrjnE3f6dn+lpOePuJIJEmS5Ol+SZIktY5FqiRJklrHIlWSJEmtY5EqSZKk1rFIlSRJUutYpEqSJKl1LFIlSZLUOhapkiRJah2LVEmSJLXOPovUJOuT3JHk+q62/5Lk20muS3JZkoOb9qOS3J/k2uZ1/jCDlyRJ0nSaz5HUC4GVc9o2Ac+sql8AvgO8vWved6vquOb1xsGEKUmSpKVkn0VqVX0J2DWn7fNVtbuZ/BpwxBBik6Sp0+fs1LuTfK/rLNSpfZZdmeSmJFuTnDW6qCVp9AYxJvU3gM92TR+d5BtJ/nuSX+q3UJK1STYn2bxz584BhCH1l+TIJF9IcmOSLUne1LTPqziQBuhC9jw7BXBO11mojXNnJjkA+BBwCnAMcHqSY4YaqSSN0YGLWTjJ7wG7gY82TTuA5VV1V5LnAp9OcmxVfX/uslW1DlgHMDMzU4uJQ5qH3cBbq+qaJI8Frk6yqZl3TlX9yRhj0xJSVV9KctR+LHo8sLWqbgFIcimwCrhhcNFJUnvs95HUJGuAlwH/oaoKoKp+VFV3NZ+vBr4LPH0QgUqLUVU7quqa5vMPgBuBw8cblfQwZzYXo65PckiP+YcDt3VNb2MvOezZKkmTbr+K1CQrgbcBL6+q+7ralzWnpEjys8AK4JZBBCoNSnMU69nAlU3TvooD/8HXsJ0H/GvgODpnpN7fo096tPU9C1VV66pqpqpmli1bNpgoJWmE5nMLqkuArwLPSLItyeuBc4HHApvm3Grql4HrknwT+ATwxqra1XPF0hgkeQzwSeDNzTCU+RQH/oOvoaqq26vqgar6CfBhOqf259oGHNk1fQSwfRTxSdI47HNMalWd3qP5gj59P0mnAJBaJ8lBdPLzo1X1KegUB13zPwz87ZjC0xKW5LCq2tFMvhK4vke3q4AVSY4GvgesBn5tRCFKe0iyns6wvzuq6pk95p8IfAb4p6bpU1V19ugi1KRb1IVT0qRIEjo/rm6sqg90tc+nOJAGpjk7dSLwhCTbgHcBJyY5js7p+1uB32z6PgX4SFWdWlW7k5wJXA4cAKyvqi1j2ARp1oV0zqxevJc+X66ql40mHE0bi1QtFScArwG+leTapu0ddG7js0dxIA3LAs9ObQdO7ZreCOxxeyppHBZxpwppXixStSRU1VfofeGJ/+BL0vC8oLlOZTvwu/2O/idZC6wFWL58+QjDU5tZpO7DOZu+s0fbW07yrlqSJO3DNcBTq+re5kEpn6Zz1589eO909WKRKkl6kD/MNSjdD/Kpqo1J/iLJE6rqznHGpckxiMeiSpIkPUySJzcXrZLkeDo1x13jjUqTxCOpkiRpwfrcqeIggKo6HzgN+K0ku4H7gdWzT6iU5sMiVZIkLVifO1V0zz+Xzi2qpP3i6X5JkiS1jkWqJEmSWsciVZIkSa1jkSpJkqTWsUiVJElS61ikSpIkqXUsUiVphJKsT3JHkuu72v5Lkm8nuS7JZUkO7rPsrUm+leTaJJtHF7UkjZ5FqiSN1oXAyjltm4BnVtUvAN8B3r6X5V9YVcdV1cyQ4pOkVphXkdrnl/+hSTYlubl5P6RpT5I/T7K1OSrwnGEFL0mTpqq+BOya0/b5qtrdTH4NOGLkgUlSy8z3SOqF7PnL/yzgiqpaAVzRTAOcAqxoXmuB8xYfpiQtGb8BfLbPvAI+n+TqJGv3tpIka5NsTrJ5586dAw9SkoZtXkVqr1/+wCrgoubzRcArutovro6vAQcnOWwQwUrSNEvye8Bu4KN9upxQVc+hczDgjCS/3G9dVbWuqmaqambZsmVDiFaShmsxY1KfVFU7AJr3JzbthwO3dfXb1rRJY5XkyCRfSHJjki1J3tS09xy6Io1SkjXAy4D/UFXVq09VbW/e7wAuA44fXYSSNFrDuHAqPdr22OF6KkpjsBt4a1X9PPB8OkeijqH/0BVpJJKsBN4GvLyq7uvT59FJHjv7GTgZuL5XX0maBgcuYtnbkxxWVTua0/l3NO3bgCO7+h0BbJ+7cFWtA9YBzMzM9DxqIA1Sc8R/9uj/D5LcSOco/yrgxKbbRcAX6RQM0sAluYROvj0hyTbgXXSu5n8ksCkJwNeq6o1JngJ8pKpOBZ4EXNbMPxD466r63ChiPmfTd/Zoe8tJTx/FV0tawhZTpG4A1gDvbd4/09V+ZpJLgecB98wOC5DaIslRwLOBK5kzdCXJE3v0X0vnQkCWL18+ukA1darq9B7NF/Tpux04tfl8C/CsIYYmSa0y31tQXQJ8FXhGkm1JXk+nOD0pyc3ASc00wEbgFmAr8GHgPw08amkRkjwG+CTw5qr6/nyW8SIUSZJGa15HUvv88gd4cY++BZyxmKCkYUlyEJ0C9aNV9ammud/QFUmSNCY+cUpLRjqD+S4AbqyqD3TNmh26Ag8fuiJJksZkMWNSpUlzAvAa4FtJrm3a3kFnqMrHm2Es/xN41ZjikyRJDYtULRlV9RV63yINegxdkSRJ4+PpfkmSJLWORaokSZJaxyJVkiRJrWORKkmSpNaZ6gunej3KT5IkSe3nkVRJkiS1jkWqJEmSWsciVZJGLMn6JHckub6r7dAkm5Lc3Lwf0mfZNU2fm5Os6dVHkqaBRaokjd6FwMo5bWcBV1TVCuCKZvphkhwKvAt4HnA88K5+xawkTbqpvnBKmiT9LvR7y0lPH3EkGraq+lKSo+Y0rwJObD5fBHwReNucPi8FNlXVLoAkm+gUu5cMKVRJGhuLVElqhydV1Q6AqtqR5Ik9+hwO3NY1va1p20OStcBagOXLlw841MHcPcUfYJL2xtP9kjQ50qOtenWsqnVVNVNVM8uWLRtyWFqKeo2tnjM/Sf48ydYk1yV5zqhj1GSzSJWkdrg9yWEAzfsdPfpsA47smj4C2D6C2KReLmTPsdXdTgFWNK+1wHkjiElTxCJVktphAzB7tf4a4DM9+lwOnJzkkOaCqZObNmnkqupLwK69dFkFXFwdXwMOnv0hJs3Hfo9JTfIM4GNdTT8LvBM4GPiPwM6m/R1VtXG/I5Qm2CAuhuq1DsfyTbYkl9C5SOoJSbbRuWL/vcDHk7we+J/Aq5q+M8Abq+oNVbUryXuAq5pVnT17EZXUQv3GUO+Y23HYY6hhOE+hHPU+fqn9e7DfRWpV3QQcB5DkAOB7wGXArwPnVNWfDCRCSZoyVXV6n1kv7tF3M/CGrun1wPohhSYN0oLGUAPrAGZmZnr20dIzqNP9Lwa+W1X/PKD1SQPX5wbq707yvSTXNq9TxxmjJE0Rx1BrUQZ1C6rVPPw+fWcmeS2wGXhrVd09d4FRHNofFu9nObEuBM4FLp7T7pF/SRq8DXTqgUvpPIDintnbrEnzsegiNckjgJcDb2+azgPeQ+eQ/nuA9wO/MXc5D+1r1PrcQF1Si3gQYHL0GVt9EEBVnQ9sBE4FtgL30RkOKM3bII6kngJcU1W3A8y+AyT5MPC3A/gOaZim+si/JA3DXsZWz84v4IwRhaMpNIgxqafTdap/zu0lXgn0vMmv1BLnAf+azkWAO+gc+d+DN0aXJGm0FnUkNcnPACcBv9nV/MdJjqNzuv/WOfOkVvHIvyRJ7bSoIrWq7gMeP6ftNYuKSBqhJId1DeT3yL8kSS0xqKv7pdbrM8j/RI/8S5LUPhapWjL6DPK/YOSBSJKkfRrUzfwlSZKkgbFIlSRJUutYpEqSJKl1LFIlqQWSPCPJtV2v7yd585w+Jya5p6vPO8cVryQNmxdOSVILVNVNdB4qQZIDgO8Bl/Xo+uWqetkoY5OkcfBIqiS1z4uB71bVP487EEkaF4tUSWqf1XQ9bnqOFyT5ZpLPJjm23wqSrE2yOcnmnTt3DidKSRoii1RJapEkjwBeDvxNj9nXAE+tqmcBHwQ+3W89VbWuqmaqambZsmXDCVaShsgiVZLa5RTgmqq6fe6Mqvp+Vd3bfN4IHJTkCaMOUJJGwSJVktrldPqc6k/y5CRpPh9PZx9+1whjk6SR8ep+SWqJJD8DnAT8ZlfbGwGq6nzgNOC3kuwG7gdWV1WNI1ZJGjaLVElqiaq6D3j8nLbzuz6fC5w76rgkaRw83S9JkqTWsUiVJElS6yz6dH+SW4EfAA8Au6tqJsmhwMeAo4BbgX9fVXcv9rskdZyz6TtDWe9bTnr6vL+vX19JkgZhUEdSX1hVx1XVTDN9FnBFVa0ArmimJUmSpHkZ1un+VcBFzeeLgFcM6XukeUuyPskdSa7vajs0yaYkNzfvh4wzRkmS1DGIq/sL+HySAv6yqtYBT6qqHQBVtSPJEwfwPa3nKdHWu5DOldEXd7XNHvV/b5Kzmum3jSE2SZLUZRBF6glVtb0pRDcl+fZ8FkqyFlgLsHz58gGE0U4Wru1RVV9KctSc5lXAic3ni4AvYpEqSdLYLfp0f1Vtb97vAC4DjgduT3IYQPN+R4/lfK602uBhR/2Bnkf9k6xNsjnJ5p07d440QEmSlqJFFalJHp3ksbOfgZOB64ENwJqm2xrgM4v5Hmnc/FElSdJoLfZ0/5OAy5pHSR8I/HVVfS7JVcDHk7we+J/Aqxb5PdKw3J7ksGbsdM+j/pIkafQWVaRW1S3As3q03wW8eDHrlkZk9qj/e/GovyRJreETp7RkJLkE+CrwjCTbmiP97wVOSnIzcFIzLUmSxmwQV/dLE6GqTu8zy6P+jWE9yUrz1+spfnPmB/gz4FTgPuB1VXXNqOOUpGGzSJWk9nlhVd3ZZ94pwIrm9TzgvOZdkqaKp/slabKsAi6ujq8BB8/e8k8apSQrk9yUZGvzMJS581+XZGeSa5vXG8YRpyaXR1IlqV16PcWv2+HAbV3T25q2Hd2dJuGBKQ4vmVxJDgA+RGcs/zbgqiQbquqGOV0/VlVnjjxATQWPpEpSu5xQVc+hc1r/jCS/PGd+eixTezR4b18N1/HA1qq6pap+DFxK5yi/NDAWqZLUIn2e4tdtG3Bk1/QRwPbRRCc9qN8R/bl+Ncl1ST6R5Mge8wGf6qfeLFIlqSX28hS/bhuA16bj+cA9s4/2lUZoPkf0/xtwVFX9AvD3wEX9VuaRf/XimFRJao9+T/F7I0BVnQ9spHP7qa10bkH162OKVUvbPo/oNw/2mfVh4H0jiEtTxCJVklpiL0/xO7/rcwFnjDIuqYergBVJjga+B6wGfq27w+wjp5vJlwM3jjZETTqLVEmStCBVtTvJmcDlwAHA+qrakuRsYHNVbQB+O8nLgd3ALuB1YwtYE8kiVZIkLVhVbaQz/KS77Z1dn98OvH3UcWl6WKRKkiZSv/usvuWkp484EknD4NX9kiRJah2PpI6Bv/4lSZL2ziOpkiRJah2LVEmSJLXOfhepSY5M8oUkNybZkuRNTfu7k3wvybXN69TBhStJkqSlYDFjUncDb62qa5rH+F2dZFMz75yq+pPFhyeNRpJbgR8ADwC7q2pmvBFJkrS07XeR2jxFYkfz+QdJbgQOH1Rg0hi8sKruHHcQkiRpQGNSkxwFPBu4smk6M8l1SdYnOaTPMmuTbE6yeefOnYMIQ5IkSVNi0begSvIY4JPAm6vq+0nOA94DVPP+fuA35i5XVeuAdQAzMzO12DikRSrg80kK+MsmPx+UZC2wFmD58uWL/rJ+tyGbJN5KTZI0TIs6kprkIDoF6ker6lMAVXV7VT1QVT8BPgwcv/gwpaE7oaqeA5wCnJHkl7tnVtW6qpqpqplly5aNJ0JNtX4Xo87pc2KSe7ouTH1nr3VJ0jTY7yOpSQJcANxYVR/oaj+sGa8K8Erg+sWFKA1fVW1v3u9IchmdH1dfGm9UWmJ6XoxaVTfM6fflqnrZGOKTpJFazOn+E4DXAN9Kcm3T9g7g9CTH0Tl9eivwm4uKcAnpdfrUU6fDl+TRwE81FwA+GjgZOHvMYWmJ2cvFqHOLVElaEhZzdf9XgPSYtXH/w9FcjvsbiScBl3VODnAg8NdV9bnxhqSlrMfFqN1ekOSbwHbgd6tqS591DHQctSSN2qIvnJImXVXdAjxr3HFIsOfFqHNmXwM8tarubR6U8mlgRa/1eHGqpEnnY1ElqSV6XYzaraq+X1X3Np83AgclecKIw5SkkbBIlaQW6Hcx6pw+T276keR4Ovvwu0YXpSSNjqf7Jakd+l2Muhygqs4HTgN+K8lu4H5gdVUtiVP503BvYUkLY5EqSS2wl4tRu/ucC5w7mogkabwsUiUNlLdSkyQNgmNSJUmS1DoWqZIkSWodi1RJkiS1ztSMSfXKT0mSpOnhkVRJkiS1ztQcSZUkqZ+FnG3zbhRSO3gkVZIkSa1jkSpJkqTWsUiVJElS6zgmdYr0G3Pl+CqNm+MBJUkLNbQiNclK4M+AA4CPVNV7h/VdS5G33Bos81VtsK88TPJI4GLgucBdwKur6tZRxynNMmc1TEM53Z/kAOBDwCnAMcDpSY4ZxndJi2W+qg3mmYevB+6uqqcB5wDvG22U0kPMWQ3bsMakHg9srapbqurHwKXAqiF9l7RY5qvaYD55uAq4qPn8CeDFSTLCGKVu5qyGalin+w8Hbuua3gY8bxAr9jT3wjkecJ+Glq/SAswnDx/sU1W7k9wDPB64cyQRSg9nzmqohlWk9vqVVA/rkKwF1jaT9ya5qccyT2A6E7m12/U7i19Fv237XFWtXPzqh8J83beRbdsAcnAh2pSv+8zDefbpdNx3zk5tvv7OIrdtxDm4EG3KVxhgzrYtXxebAwtcfo9tW8jy05yvwypStwFHdk0fAWzv7lBV64B1e1tJks1VNTP48MZrWrcLJnbbzNd9mNZta9l27TMPu/psS3Ig8DhgV6+V7StnW7btAzWt29bC7RpYzpqv07dtg9iuYY1JvQpYkeToJI8AVgMbhvRd0mKZr2qD+eThBmBN8/k04B+qqueRVGkEzFkN1VCOpDbjTs4ELqdzW4r1VbVlGN8lLZb5qjbol4dJzgY2V9UG4ALgr5JspXM0avX4ItZSZ85q2IZ2n9Sq2ghsXORq9np6dYJN63bBhG6b+bpP07ptrdquXnlYVe/s+vxD4FUD+rpWbfuATeu2tW67Rpizrdv2AZrWbVv0dsWj7pIkSWqbYY1JlSRJkvabRaokSZJaxyJVkiRJrWORKkmSpNaxSJUkSVLrWKRKkiSpdSxSJUmS1DoWqZIkSWodi1RJkiS1jkWqJEmSWsciVZIkSa1jkdpHkgeSXJvk+iR/k+RnBrDO1yU5dxDx9Vj38iT3JvndYaxf7TYp+ZrkqCT3N7Fem+T8Qa5fk2FS8rVZ7y8k+WqSLUm+leSnB/0dardJydckj0jy/zZ5+s0kJw5y/eNgkdrf/VV1XFU9E/gx8Mb5LpjkgOGF1dc5wGfH8L1qh0nK1+82sR5XVfOOU1NlIvI1yYHAfwXeWFXHAicC/3tU36/WmIh8Bf4jQFX9G+Ak4P1JJrrOm+jgR+jLwNMAknw6ydXNr+q1sx2ao5hnJ7kSeEGSX0zyj82vma8neWzT9SlJPpfk5iR/PIjgkrwCuAXYMoj1aeK1Ol+lOdqcrycD11XVNwGq6q6qemAA69XkanO+HgNcAVBVdwD/AswMYL1jc+C4A2i75pf0KcDnmqbfqKpdSR4FXJXkk1Wf3yi0AAAZBElEQVR1F/Bo4PqqemeSRwDfBl5dVVcl+VfA/c3yxwHPBn4E3JTkg1V125zvPAd4YY9wLq2q987p+2jgbXR+NXmqf4lre742jk7yDeD7wP9TVV9e3FZrUk1Avj4dqCSXA8uaPv5YW6ImIF+/CaxKcilwJPDc5v3ri9z0sbFI7e9RSa5tPn8ZuKD5/NtJXtl8PhJYAdwFPAB8sml/BrCjqq4CqKrvAyQBuKKq7mmmbwCeCjwsKavqLQuI8w+Ac6rq3mb9WpomJV93AMur6q4kzwU+neTY2e/UkjEp+Xog8G+BXwTuA65IcnVVXbGAdWjyTUq+rgd+HtgM/DPwj8DuBSzfOhap/d1fVcd1NzSDkF8CvKCq7kvyRWB2EP0Pu04DBag+6/1R1+cH6PE3WOAvp+cBpzWnCg4GfpLkh1U1lAu01FoTka9V9aPZdVbV1Um+S+do1eb+m6YpNBH5CmwD/ntV3dksuxF4Ds0pVS0ZE5GvVbUbeLCoTfKPwM19vnsiWKQuzOOAu5uE/Dng+X36fZvOWJNfbA7vP5aHDu/v00J+OVXVL81+TvJu4F4LVDVal69JlgG7quqBJD9L58jDLfNdXlOtdfkKXA7853Su5v4x8H/SuUhVal2+NnmaqvpfSU4CdlfVDfNdvo0sUhfmc8Abk1wH3AR8rVenqvpxklcDH2zGqtxP5xeXNEptzNdfBs5OspvOkYM3VtWuIX2XJkvr8rWq7k7yAeAqOkfDNlbV3w3juzRxWpevwBOBy5P8BPge8Johfc/IpKrfUWhJkiRpPLwFlSRJklrHIlWSJEmtY5EqSZKk1rFIlSRJUutYpEqSJKl1WlGkrly5sujc3sPX9L6mhvm6JF5TxZyd+tdUMV+n/jVvrShS77zzznGHIM2b+apJY85qkpivmtWKIlWSJEnqZpEqSZKk1llQkZrkyCRfSHJjki1J3tSjT5L8eZKtSa5L8pzBhSsNT5IDknwjyd+OOxYtPe5fJenhFnokdTfw1qr6eeD5wBlJjpnT5xRgRfNaC5y36Cil0XgTcOO4g9CS5f5VkrosqEitqh1VdU3z+Qd0/kE/fE63VcDF1fE14OAkhw0kWmlIkhwB/F/AR8Ydi5Ym96+S9HAH7u+CSY4Cng1cOWfW4cBtXdPbmrYdc5ZfS+dIAMuXL9/fMKbeOZu+07P9LSc9fcSRTL0/Bf4z8NheMyc5X82hybPY/WuzjonN2V565bE5LE23/bpwKsljgE8Cb66q78+d3WORPe6LVVXrqmqmqmaWLVu2P2FIA5HkZcAdVXV1vz7mq0ZlEPtXMGclTb4FF6lJDqKzA/1oVX2qR5dtwJFd00cA2/cvPGkkTgBenuRW4FLgRUn+63hD0lLk/lWSHrLQq/sDXADcWFUf6NNtA/Da5irU5wP3VNUep6Kktqiqt1fVEVV1FLAa+Ieq+r/HHJaWGPevkvRwCx2TegLwGuBbSa5t2t4BLAeoqvOBjcCpwFbgPuDXBxOqJE0196+S1GVBRWpVfYXeY6K6+xRwxmKCksalqr4IfHHMYWgJcv8qSQ/nE6ckSZLUOhapkiRJah2LVEmSJLWORaokSZJaxyJVkiRJrWORKkmSpNaxSJUkSX0lWZnkpiRbk5zVY/4jk3ysmX9lkqPmzF+e5N4kvzuqmDUdLFIlSVJPSQ4APgScAhwDnJ7kmDndXg/cXVVPA84B3jdn/jnAZ4cdq6aPRaokSerneGBrVd1SVT8GLgVWzemzCrio+fwJ4MXNY35J8grgFmDLiOLVFLFIlSRJ/RwO3NY1va1p69mnqnYD9wCPT/Jo4G3AH+zrS5KsTbI5yeadO3cOJHBNPotUSZLUT69H9dY8+/wBcE5V3buvL6mqdVU1U1Uzy5Yt248wNY0OHHcAkiSptbYBR3ZNHwFs79NnW5IDgccBu4DnAacl+WPgYOAnSX5YVecOP2xNA4tUSZLUz1XAiiRHA98DVgO/NqfPBmAN8FXgNOAfqqqAX5rtkOTdwL0WqFoIi1RJktRTVe1OciZwOXAAsL6qtiQ5G9hcVRuAC4C/SrKVzhHU1eOLWNPEIlWSJPVVVRuBjXPa3tn1+YfAq/axjncPJThNNS+ckiRJUutYpEqSJKl1LFIlSZLUOhapkiRJah2LVEmSJLWORaokSZJaxyJVkiRJrWORKkmSpNaxSJUkSVLrWKRKkiSpdSxSJUmS1DoWqZIkSWodi1RJkiS1jkWqJEmSWsciVZIkSa1jkSpJkqTWWVCRmmR9kjuSXN9n/olJ7klybfN652DClIYnyU8n+XqSbybZkuQPxh2Tlib3sZL0kAMX2P9C4Fzg4r30+XJVvWy/I5JG70fAi6rq3iQHAV9J8tmq+tq4A9OScyHuYyUJWOCR1Kr6ErBrSLFIY1Ed9zaTBzWvGmNIWqLcx0rSQ4YxJvUFzWnTzyY5tl+nJGuTbE6yeefOnUMIQ5q/JAckuRa4A9hUVVfOmW++qi3cx0paEgZdpF4DPLWqngV8EPh0v45Vta6qZqpqZtmyZQMOQ1qYqnqgqo4DjgCOT/LMOfPNV7WB+1hJS8ZAi9Sq+v7sadOq2ggclOQJg/wOaZiq6l+ALwIrxxyKtAf3sZKWkoEWqUmenCTN5+Ob9d81yO+QBi3JsiQHN58fBbwE+PZ4o5L25D5W45BkZZKbkmxNclaP+Y9M8rFm/pVJjmraT0pydZJvNe8vGnXsmmwLuro/ySXAicATkmwD3kXnIhOq6nzgNOC3kuwG7gdWV5UXoKjtDgMuSnIAnX/0P15VfzvmmLQEuY9V2zT7xQ8BJwHbgKuSbKiqG7q6vR64u6qelmQ18D7g1cCdwL+rqu3NEKrLgcNHuwWaZAsqUqvq9H3MP5fO7VOkiVFV1wHPHncckvtYtdDxwNaqugUgyaXAKqC7SF0FvLv5/Ang3CSpqm909dkC/HSSR1bVj4YftqaBT5ySJEn9HA7c1jW9jT2Phj7Yp6p2A/cAj5/T51eBb/QrUL0bhXqxSJUkSf2kR9vcISZ77dPcKu19wG/2+xLvRqFeLFIlSVI/24Aju6aPALb365PkQOBxNA+lSHIEcBnw2qr67tCj1VSxSJUkSf1cBaxIcnSSRwCrgQ1z+mwA1jSfTwP+oaqquWvK3wFvr6r/MbKINTUsUiVJUk/NGNMz6VyZfyOdu59sSXJ2kpc33S4AHp9kK/A7wOxtqs4Engb8fpJrm9cTR7wJmmALurpfkiQtLc2DIzbOaXtn1+cfAq/qsdwfAn849AA1tTySKkmSpNaxSJUkSVLrWKRKkiSpdSxSJUmS1DoWqZIkSWodi1RJkiS1jkWqJEmSWsciVZIkSa1jkSpJkqTWsUiVJElS61ikSpIkqXUsUiVJktQ6FqmSJElqHYtUSZIktY5FqiRJklrHIlWSJEmtY5EqSZKk1rFIlSRJUutYpEqSJKl1LFIlSZLUOhapkiRJah2LVEmS1FeSlUluSrI1yVk95j8yycea+VcmOapr3tub9puSvHSUcWvyWaRKkqSekhwAfAg4BTgGOD3JMXO6vR64u6qeBpwDvK9Z9hhgNXAssBL4i2Z90rxYpEqSpH6OB7ZW1S1V9WPgUmDVnD6rgIuaz58AXpwkTfulVfWjqvonYGuzPmleLFIlSVI/hwO3dU1va9p69qmq3cA9wOPnuSwASdYm2Zxk886dOwcUuibdgovUJOuT3JHk+j7zk+TPmzEo1yV5zuLDlIYnyZFJvpDkxiRbkrxp3DFpaXL/qhZKj7aaZ5/5LNtprFpXVTNVNbNs2bIFhqhptT9HUi+kM7akn1OAFc1rLXDefnyHNEq7gbdW1c8DzwfO6DHmShqFC3H/qnbZBhzZNX0EsL1fnyQHAo8Dds1zWamvBRepVfUlOsnXzyrg4ur4GnBwksP2N0Bp2KpqR1Vd03z+AXAjfU5JScPk/lUtdBWwIsnRSR5B50KoDXP6bADWNJ9PA/6hqqppX91c/X80nR9XXx9R3JoCBw5hnf3GoOzo7pRkLZ0jASxfvnwIYSzMOZu+07P9LSc9fdHrWMw6NVrNrVOeDVw5p71V+ToIvfLV3Gy9ee1fYTpzVqNXVbuTnAlcDhwArK+qLUnOBjZX1QbgAuCvkmyl8yNrdbPsliQfB26gc8bqjKp6YCwbook0jCJ1XmNQqmodsA5gZmam5xgVaZSSPAb4JPDmqvp+9zzzVS2xoDF+mLMagKraCGyc0/bOrs8/BF7VZ9k/Av5oqAFqag3j6n7HoGjiJDmIToH60ar61Ljjkfpw/yppyRhGkboBeG1zFerzgXuqao9TUVJbNPfzuwC4sao+MO54pL1w/yppyVjw6f4klwAnAk9Isg14F3AQQFWdT+eUwKl0btp7H/DrgwpWGpITgNcA30pybdP2juYUlzQy7l8l6SELLlKr6vR9zC/gjP2OSBqxqvoKvcf6SSPl/lWSHuITpyRJktQ6FqmSJElqHYtUSZIktY5FqiRJklrHIlWSJEmtY5EqSZKk1rFIlSRJUutYpEqSJKl1LFIlSZLUOhapkiRJah2LVEmSJLWORaokSZJaxyJVkiRJrWORKkmSpNaxSJUkSVLrWKRKkiSpdSxSJUmS1DoWqZIkaQ9JDk2yKcnNzfshffqtafrcnGRN0/YzSf4uybeTbEny3tFGr2lgkSpJkno5C7iiqlYAVzTTD5PkUOBdwPOA44F3dRWzf1JVPwc8GzghySmjCVvTwiJVkiT1sgq4qPl8EfCKHn1eCmyqql1VdTewCVhZVfdV1RcAqurHwDXAESOIWVPEIlWSJPXypKraAdC8P7FHn8OB27qmtzVtD0pyMPDv6ByNlebtwHEHIEmSxiPJ3wNP7jHr9+a7ih5t1bX+A4FLgD+vqlv2EsdaYC3A8uXL5/nVmnYWqZIkLVFV9ZJ+85LcnuSwqtqR5DDgjh7dtgEndk0fAXyxa3odcHNV/ek+4ljX9GVmZqb21ldLh6f7JUlSLxuANc3nNcBnevS5HDg5ySHNBVMnN20k+UPgccCbRxCrppBFqiRJ6uW9wElJbgZOaqZJMpPkIwBVtQt4D3BV8zq7qnYlOYLOkIFjgGuSXJvkDePYCE0uT/dLkqQ9VNVdwIt7tG8G3tA1vR5YP6fPNnqPV5XmzSOpkiRJah2LVEmSJLWORaokSZJaxyJVkiRJrWORKkmSpNZZcJGaZGWSm5JsTXJWj/mvS7Kzud2Et5xQ6yVZn+SOJNePOxYtbe5fJekhC7oFVZIDgA/RuV/aNuCqJBuq6oY5XT9WVWcOKEZp2C4EzgUuHnMcWsLcv0rSwy30SOrxwNaquqWqfgxcCqwafFjS6FTVl4Bd445DS577V0nqstAi9XDgtq7pbU3bXL+a5Lokn0hyZK8VJVmbZHOSzTt37lxgGNJoma8agYHtX8GclTT5Flqk9np6RM2Z/m/AUVX1C8DfAxf1WlFVrauqmaqaWbZs2QLDkEbLfNUIDGz/CuaspMm30CJ1G9D9y/0IYHt3h6q6q6p+1Ex+GHju/ocnSUuG+1dJ6rLQIvUqYEWSo5M8AlgNbOjukOSwrsmXAzcuLkRJWhLcv0pSlwVd3V9Vu5OcCVwOHACsr6otSc4GNlfVBuC3k7wc2E3nYpTXDThmaaCSXAKcCDwhyTbgXVV1wXij0lLj/lWSHm5BRSpAVW0ENs5pe2fX57cDb198aNJoVNXp445BAvevktTNJ05JkiSpdSxSJUmS1DoWqZIkSWodi1RJkiS1jkWqJEmSWsciVZIkSa1jkSpJkqTWsUiVJElS61ikSpKkPSQ5NMmmJDc374f06bem6XNzkjU95m9Icv3wI9a0sUiVJEm9nAVcUVUrgCua6YdJcijwLuB5wPHAu7qL2SS/Atw7mnA1bSxSJUlSL6uAi5rPFwGv6NHnpcCmqtpVVXcDm4CVAEkeA/wO8IcjiFVTyCJVkiT18qSq2gHQvD+xR5/Dgdu6prc1bQDvAd4P3LevL0qyNsnmJJt37ty5uKg1NQ4cdwCSJGk8kvw98OQes35vvqvo0VZJjgOeVlVvSXLUvlZSVeuAdQAzMzM1z+/WlLNIlSRpiaqql/Sbl+T2JIdV1Y4khwF39Oi2DTixa/oI4IvAC4DnJrmVTq3xxCRfrKoTkebJ0/2SJKmXDcDs1fprgM/06HM5cHKSQ5oLpk4GLq+q86rqKVV1FPBvge9YoGqhLFIlSVIv7wVOSnIzcFIzTZKZJB8BqKpddMaeXtW8zm7apEXzdL8kSdpDVd0FvLhH+2bgDV3T64H1e1nPrcAzhxCippxHUiVJktQ6FqmSJElqHYtUSZIktY5FqiRJklrHIlWSJEmtY5EqSZKk1rFIlSRJUutYpEqSJKl1LFIlSZLUOhapkiRJah2LVEmSJLWORaokSZJaxyJVkiRJrWORKkmSpNZZcJGaZGWSm5JsTXJWj/mPTPKxZv6VSY4aRKDSMO0rr6VRcP8qSQ9ZUJGa5ADgQ8ApwDHA6UmOmdPt9cDdVfU04BzgfYMIVBqWeea1NFTuXyXp4RZ6JPV4YGtV3VJVPwYuBVbN6bMKuKj5/AngxUmyuDCloZpPXkvD5v5VkrocuMD+hwO3dU1vA57Xr09V7U5yD/B44M7uTknWAmubyXuT3NTj+54wd7lR+53hrHPR2zWMuAak37Z9rqpWjjqYedpnXk9Kvi7WXvJq4retjzbl68D2rzCvnJ34v6n5+qA2718X7Oqrr74zyT/PaZ7WvylM77YtOl8XWqT2+sVe+9GHqloHrNvrlyWbq2pm/uFNhmndLpjYbdtnzi7lfIXp3baWbdfA9q+w75xt2bYP1LRu27Ru11xVtWxu2zRv+7Ru2yC2a6Gn+7cBR3ZNHwFs79cnyYHA44Bd+xugNALzyWtp2Ny/SlKXhRapVwErkhyd5BHAamDDnD4bgDXN59OAf6iqnr/0pZaYT15Lw+b+VZK6LOh0fzMG6kzgcuAAYH1VbUlyNrC5qjYAFwB/lWQrnV/4qxcR315Pr06wad0umMBt65fX+7Gqidv2BZjWbWvNdrl/Hahp3bZp3a75mOZtn9ZtW/R2xR/hkiRJahufOCVJkqTWsUiVJElS67SySJ22R1QmuTXJt5Jcm2Rz03Zokk1Jbm7eDxl3nPORZH2SO5Jc39XWc1vS8efN3/G6JM8ZX+TDY762l/na2zTlrPlqvk4S83Vh+dq6IjXT+4jKF1bVcV33DDsLuKKqVgBXNNOT4EJg7k14+23LKcCK5rUWOG9EMY6M+dp6F2K+PsyU5qz5ar5OEvN1nvnauiKVpfOIyu7HG14EvGKMscxbVX2JPe/L2G9bVgEXV8fXgIOTHDaaSEfGfG0x87WnpZCz5uv0MF9bahT52sYitdejAQ8fUyyDUsDnk1ydzqMKAZ5UVTsAmvcnji26xeu3LdP4t5xrGrfRfO2Yhr9lL9O2neZrx6T/HfuZtu00Xzvm9Xdc6GNRR2Hej/2bICdU1fYkTwQ2Jfn2uAMakWn8W841jdtovj5k0v+WvUzbdpqvD5nkv2M/07ad5utD9vl3bOOR1Kl7RGVVbW/e7wAuo3P64vbZQ93N+x3ji3DR+m3L1P0te5i6bTRfHzTxf8s+pmo7zdcHTfTfcS+majvN1wfN6+/YxiJ1qh5RmeTRSR47+xk4Gbiehz/ecA3wmfFEOBD9tmUD8Nrmqr7nA/fMngaYIubr5FnK+QpTlLPmq/k6SczX/cjXqmrdCzgV+A7wXeD3xh3PIrflZ4FvNq8ts9sDPJ7OlW83N++HjjvWeW7PJcAO4H/T+WX0+n7bQufw/oeav+O3gJlxxz+k/ybma0tf5mvf/y5TkbPmq/k6SS/zdeH56mNRJUmS1DptPN0vSZKkJc4iVZIkSa1jkSpJkqTWsUiVJElS61ikSpIkqXUsUockySuTVJKfG3cs0r6Yr5ok5qsmifm6/yxSh+d04Ct0bjwstZ35qklivmqSmK/7ySJ1CJI8BjiBzo1tVzdtP5XkL5JsSfK3STYmOa2Z99wk/z3J1Ukun32kmDQK5qsmifmqSWK+Lo5F6nC8AvhcVX0H2JXkOcCvAEcB/wZ4A/ACgCQHAR8ETquq5wLrgT8aR9BassxXTRLzVZPEfF2EA8cdwJQ6HfjT5vOlzfRBwN9U1U+A/y/JF5r5zwCeCWxKAnAAnceMSaNivmqSmK+aJObrIlikDliSxwMvAp6ZpOgkWQGX9VsE2FJVLxhRiNKDzFdNEvNVk8R8XTxP9w/eacDFVfXUqjqqqo4E/gm4E/jVZizKk4ATm/43AcuSPHi4P8mx4whcS5L5qklivmqSmK+LZJE6eKez56+kTwJPAbYB1wN/CVwJ3FNVP6aTyO9L8k3gWuD/+P/btWMbhIEYgKJ2Q5WCfRiPlZiCSTIAvWkoKSJFSmzlvQlc/MK683HjcnF6ZRK9Moled8qqOnuGy8jMpao+vy+Ad0Q8qmo9ey74R69Molcm0es2blKP9crMe0TcIuIpSJrTK5PolUn0uoGXVAAA2nGTCgBAO5ZUAADasaQCANCOJRUAgHYsqQAAtPMF30U0961/H98AAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# visualize the correlation between Parch and Age\n", + "grid = sns.FacetGrid(df, col='Parch', col_wrap=4, size=3.0, aspect=0.8, sharey=False)\n", + "grid.map(plt.hist, 'Age', alpha=.5, bins=range(0,105,5))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": { + "_cell_guid": "38a4399d-3e36-44ab-8269-a10d6b6aecc8", + "_execution_state": "idle", + "_uuid": "42db5280471f9bd3acefe047daccfefcd3bf1a00" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Age
Parch
031.969401
124.965625
218.975979
338.875000
447.600000
539.333333
641.500000
9NaN
\n", + "
" + ], + "text/plain": [ + " Age\n", + "Parch \n", + "0 31.969401\n", + "1 24.965625\n", + "2 18.975979\n", + "3 38.875000\n", + "4 47.600000\n", + "5 39.333333\n", + "6 41.500000\n", + "9 NaN" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the mean Age for each Parch\n", + "df[['Parch', 'Age']].groupby(['Parch']).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": { + "_cell_guid": "4e914c54-23d4-46dc-9a7e-fc9e0e038254", + "_execution_state": "idle", + "_uuid": "79cb7ffd9ea2ac77fe8bae5d7264986be56604ec" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Age
Parch
012.422617
118.410885
214.564066
316.295815
414.432602
51.032796
62.121320
9NaN
\n", + "
" + ], + "text/plain": [ + " Age\n", + "Parch \n", + "0 12.422617\n", + "1 18.410885\n", + "2 14.564066\n", + "3 16.295815\n", + "4 14.432602\n", + "5 1.032796\n", + "6 2.121320\n", + "9 NaN" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the standard deviation of Age for each Parch\n", + "df[['Parch', 'Age']].groupby(['Parch']).std() " + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": { + "_cell_guid": "020c9de0-ff1c-4eed-be20-cbff19cac018", + "_execution_state": "idle", + "_uuid": "9334adf52175fe53abf8138378f4a83efaf77ffb" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\HP-PC\\Anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py:190: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + " self._setitem_with_indexer(indexer, value)\n" + ] + }, + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# notice that instead of using Title, we should use its corresponding dummy variables \n", + "df_sub = df[['Age','Master','Miss','Mr','Mrs','Others','Fare-bin','SibSp']]\n", + "\n", + "X_train = df_sub.dropna().drop('Age', axis=1)\n", + "y_train = df['Age'].dropna()\n", + "X_test = df_sub.loc[np.isnan(df.Age)].drop('Age', axis=1)\n", + "\n", + "regressor = RandomForestRegressor(n_estimators = 300)\n", + "regressor.fit(X_train, y_train)\n", + "y_pred = np.round(regressor.predict(X_test),1)\n", + "df.Age.loc[df.Age.isnull()] = y_pred\n", + "\n", + "df.Age.isnull().sum(axis=0) # no more NAN now" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": { + "_cell_guid": "8a4ce8e1-b212-4d6a-8d8b-ad56bf8acf72", + "_execution_state": "idle", + "_uuid": "5f9658c201cd82bf031d49803d4045fe847d81e8" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Age-binSurvived
010.675000
120.452381
230.402597
340.315789
450.427083
560.375000
670.125000
\n", + "
" + ], + "text/plain": [ + " Age-bin Survived\n", + "0 1 0.675000\n", + "1 2 0.452381\n", + "2 3 0.402597\n", + "3 4 0.315789\n", + "4 5 0.427083\n", + "5 6 0.375000\n", + "6 7 0.125000" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bins = [ 0, 4, 12, 18, 30, 50, 65, 100] # This is somewhat arbitrary\n", + "age_index = (1,2,3,4,5,6,7) #('baby','child','teenager','young','mid-age','over-50','senior')\n", + "df['Age-bin'] = pd.cut(df.Age, bins, labels=age_index).astype(int)\n", + "\n", + "df[['Age-bin', 'Survived']].groupby(['Age-bin'],as_index=False).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": { + "_cell_guid": "4f3f0831-83e7-44e4-a69e-5f5fe024e122", + "_execution_state": "idle", + "_uuid": "a7a0e7795ab628123de85df8cb6c2eac84185165" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TicketSurvived
010.630137
120.464481
230.239203
340.200000
450.000000
560.166667
670.111111
780.000000
891.000000
9A0.068966
10C0.340426
11F0.571429
12L0.250000
13P0.646154
14S0.323077
15W0.153846
\n", + "
" + ], + "text/plain": [ + " Ticket Survived\n", + "0 1 0.630137\n", + "1 2 0.464481\n", + "2 3 0.239203\n", + "3 4 0.200000\n", + "4 5 0.000000\n", + "5 6 0.166667\n", + "6 7 0.111111\n", + "7 8 0.000000\n", + "8 9 1.000000\n", + "9 A 0.068966\n", + "10 C 0.340426\n", + "11 F 0.571429\n", + "12 L 0.250000\n", + "13 P 0.646154\n", + "14 S 0.323077\n", + "15 W 0.153846" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[['Ticket', 'Survived']].groupby(['Ticket'], as_index=False).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": { + "_cell_guid": "7b0bc205-5f92-4ba1-9a52-8c6b2bb91093", + "_execution_state": "idle", + "_uuid": "1c5a38e61a105b5ad40fc1f159725c30c65e71bc" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "3 429\n", + "2 278\n", + "1 210\n", + "P 98\n", + "S 98\n", + "C 77\n", + "A 42\n", + "W 19\n", + "7 13\n", + "F 13\n", + "4 11\n", + "6 9\n", + "L 5\n", + "5 3\n", + "9 2\n", + "8 2\n", + "Name: Ticket, dtype: int64" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Ticket'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": { + "_cell_guid": "58fe643e-abf9-4761-bba1-674d3304aba2", + "_execution_state": "idle", + "_uuid": "7f12fa7d7d6ee803835e5768b9da2a92e1468969" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TicketSurvived
010.630137
120.464481
230.239203
340.166667
4C0.340426
5P0.646154
6S0.323077
\n", + "
" + ], + "text/plain": [ + " Ticket Survived\n", + "0 1 0.630137\n", + "1 2 0.464481\n", + "2 3 0.239203\n", + "3 4 0.166667\n", + "4 C 0.340426\n", + "5 P 0.646154\n", + "6 S 0.323077" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Ticket'] = df['Ticket'].replace(['A','W','F','L','5','6','7','8','9'], '4')\n", + "\n", + "# check the correlation again\n", + "df[['Ticket', 'Survived']].groupby(['Ticket'], as_index=False).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": { + "_cell_guid": "76238516-9a91-4238-8fc7-abdd64af5435", + "_execution_state": "idle", + "_uuid": "ac5457d8c3d71e35495d2aeaca2008cabc1b0bae" + }, + "outputs": [], + "source": [ + "# dummy encoding\n", + "df = pd.get_dummies(df,columns=['Ticket'])" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AgeFareParchPassengerIdPclassSexSibSpSurvivedTitleMaster...FamilyFare-binAge-binTicket_1Ticket_2Ticket_3Ticket_4Ticket_CTicket_PTicket_S
022.07.2500013010.0Mr0...2140001000
138.071.2833021111.0Mrs0...2550000010
226.07.9250033101.0Miss0...1240000001
335.053.1000041111.0Mrs0...2551000000
435.08.0500053000.0Mr0...1250010000
528.88.4583063000.0Mr0...1240010000
654.051.8625071000.0Mr0...1561000000
72.021.0750183030.0Master1...0310010000
827.011.1333293101.0Mrs0...3340010000
914.030.07080102111.0Mrs0...2430100000
104.016.70001113111.0Miss0...3310000010
1158.026.55000121101.0Miss0...1461000000
1220.08.05000133000.0Mr0...1240001000
1339.031.27505143010.0Mr0...0450010000
1414.07.85420153100.0Miss0...1130010000
1555.016.00000162101.0Mrs0...1360100000
162.029.12501173040.0Master1...0410010000
1732.313.00000182001.0Mr0...1350100000
1831.018.00000193110.0Mrs0...2350010000
1933.07.22500203101.0Mrs0...1150100000
2035.026.00000212000.0Mr0...1450100000
2134.013.00000222001.0Mr0...1350100000
2215.08.02920233101.0Miss0...1230010000
2328.035.50000241001.0Mr0...1441000000
248.021.07501253130.0Miss0...0320010000
2538.031.38755263111.0Mrs0...0450010000
2629.67.22500273000.0Mr0...1140100000
2719.0263.00002281030.0Mr0...0541000000
2824.37.87920293101.0Miss0...1240010000
2928.87.89580303000.0Mr0...1240010000
..................................................................
127921.07.750001280300NaNMr0...1140010000
12806.021.075011281303NaNMaster1...0320010000
128123.093.500001282100NaNMr0...1541000000
128251.039.400011283110NaNMrs0...2460000010
128313.020.250021284300NaNMaster1...3330000100
128447.010.500001285200NaNMr0...1250000100
128529.022.025011286303NaNMr0...0440010000
128618.060.000001287111NaNMrs0...2531000000
128724.07.250001288300NaNMr0...1140010000
128848.079.200011289111NaNMrs0...3551000000
128922.07.775001290300NaNMr0...1140010000
129031.07.733301291300NaNMr0...1150100000
129130.0164.866701292110NaNMiss0...1540010000
129238.021.000001293201NaNMr0...2350100000
129322.059.400011294110NaNMiss0...2541000000
129417.047.100001295100NaNMr0...1531000000
129543.027.720801296101NaNMr0...2451000000
129620.013.862501297200NaNMr0...1340000001
129723.010.500001298201NaNMr0...2240100000
129850.0211.500011299101NaNMr0...3551000000
129923.07.720801300310NaNMiss0...1140010000
13003.013.775011301311NaNMiss0...3310000001
130123.07.750001302310NaNMiss0...1140010000
130237.090.000001303111NaNMrs0...2551000000
130328.07.775001304310NaNMiss0...1140010000
130428.88.050001305300NaNMr0...1240001000
130539.0108.900001306110NaNOthers0...1550000010
130638.57.250001307300NaNMr0...1150000001
130728.88.050001308300NaNMr0...1240010000
13083.622.358311309301NaNMaster1...3410100000
\n", + "

1309 rows × 24 columns

\n", + "
" + ], + "text/plain": [ + " Age Fare Parch PassengerId Pclass Sex SibSp Survived \\\n", + "0 22.0 7.2500 0 1 3 0 1 0.0 \n", + "1 38.0 71.2833 0 2 1 1 1 1.0 \n", + "2 26.0 7.9250 0 3 3 1 0 1.0 \n", + "3 35.0 53.1000 0 4 1 1 1 1.0 \n", + "4 35.0 8.0500 0 5 3 0 0 0.0 \n", + "5 28.8 8.4583 0 6 3 0 0 0.0 \n", + "6 54.0 51.8625 0 7 1 0 0 0.0 \n", + "7 2.0 21.0750 1 8 3 0 3 0.0 \n", + "8 27.0 11.1333 2 9 3 1 0 1.0 \n", + "9 14.0 30.0708 0 10 2 1 1 1.0 \n", + "10 4.0 16.7000 1 11 3 1 1 1.0 \n", + "11 58.0 26.5500 0 12 1 1 0 1.0 \n", + "12 20.0 8.0500 0 13 3 0 0 0.0 \n", + "13 39.0 31.2750 5 14 3 0 1 0.0 \n", + "14 14.0 7.8542 0 15 3 1 0 0.0 \n", + "15 55.0 16.0000 0 16 2 1 0 1.0 \n", + "16 2.0 29.1250 1 17 3 0 4 0.0 \n", + "17 32.3 13.0000 0 18 2 0 0 1.0 \n", + "18 31.0 18.0000 0 19 3 1 1 0.0 \n", + "19 33.0 7.2250 0 20 3 1 0 1.0 \n", + "20 35.0 26.0000 0 21 2 0 0 0.0 \n", + "21 34.0 13.0000 0 22 2 0 0 1.0 \n", + "22 15.0 8.0292 0 23 3 1 0 1.0 \n", + "23 28.0 35.5000 0 24 1 0 0 1.0 \n", + "24 8.0 21.0750 1 25 3 1 3 0.0 \n", + "25 38.0 31.3875 5 26 3 1 1 1.0 \n", + "26 29.6 7.2250 0 27 3 0 0 0.0 \n", + "27 19.0 263.0000 2 28 1 0 3 0.0 \n", + "28 24.3 7.8792 0 29 3 1 0 1.0 \n", + "29 28.8 7.8958 0 30 3 0 0 0.0 \n", + "... ... ... ... ... ... ... ... ... \n", + "1279 21.0 7.7500 0 1280 3 0 0 NaN \n", + "1280 6.0 21.0750 1 1281 3 0 3 NaN \n", + "1281 23.0 93.5000 0 1282 1 0 0 NaN \n", + "1282 51.0 39.4000 1 1283 1 1 0 NaN \n", + "1283 13.0 20.2500 2 1284 3 0 0 NaN \n", + "1284 47.0 10.5000 0 1285 2 0 0 NaN \n", + "1285 29.0 22.0250 1 1286 3 0 3 NaN \n", + "1286 18.0 60.0000 0 1287 1 1 1 NaN \n", + "1287 24.0 7.2500 0 1288 3 0 0 NaN \n", + "1288 48.0 79.2000 1 1289 1 1 1 NaN \n", + "1289 22.0 7.7750 0 1290 3 0 0 NaN \n", + "1290 31.0 7.7333 0 1291 3 0 0 NaN \n", + "1291 30.0 164.8667 0 1292 1 1 0 NaN \n", + "1292 38.0 21.0000 0 1293 2 0 1 NaN \n", + "1293 22.0 59.4000 1 1294 1 1 0 NaN \n", + "1294 17.0 47.1000 0 1295 1 0 0 NaN \n", + "1295 43.0 27.7208 0 1296 1 0 1 NaN \n", + "1296 20.0 13.8625 0 1297 2 0 0 NaN \n", + "1297 23.0 10.5000 0 1298 2 0 1 NaN \n", + "1298 50.0 211.5000 1 1299 1 0 1 NaN \n", + "1299 23.0 7.7208 0 1300 3 1 0 NaN \n", + "1300 3.0 13.7750 1 1301 3 1 1 NaN \n", + "1301 23.0 7.7500 0 1302 3 1 0 NaN \n", + "1302 37.0 90.0000 0 1303 1 1 1 NaN \n", + "1303 28.0 7.7750 0 1304 3 1 0 NaN \n", + "1304 28.8 8.0500 0 1305 3 0 0 NaN \n", + "1305 39.0 108.9000 0 1306 1 1 0 NaN \n", + "1306 38.5 7.2500 0 1307 3 0 0 NaN \n", + "1307 28.8 8.0500 0 1308 3 0 0 NaN \n", + "1308 3.6 22.3583 1 1309 3 0 1 NaN \n", + "\n", + " Title Master ... Family Fare-bin Age-bin Ticket_1 Ticket_2 \\\n", + "0 Mr 0 ... 2 1 4 0 0 \n", + "1 Mrs 0 ... 2 5 5 0 0 \n", + "2 Miss 0 ... 1 2 4 0 0 \n", + "3 Mrs 0 ... 2 5 5 1 0 \n", + "4 Mr 0 ... 1 2 5 0 0 \n", + "5 Mr 0 ... 1 2 4 0 0 \n", + "6 Mr 0 ... 1 5 6 1 0 \n", + "7 Master 1 ... 0 3 1 0 0 \n", + "8 Mrs 0 ... 3 3 4 0 0 \n", + "9 Mrs 0 ... 2 4 3 0 1 \n", + "10 Miss 0 ... 3 3 1 0 0 \n", + "11 Miss 0 ... 1 4 6 1 0 \n", + "12 Mr 0 ... 1 2 4 0 0 \n", + "13 Mr 0 ... 0 4 5 0 0 \n", + "14 Miss 0 ... 1 1 3 0 0 \n", + "15 Mrs 0 ... 1 3 6 0 1 \n", + "16 Master 1 ... 0 4 1 0 0 \n", + "17 Mr 0 ... 1 3 5 0 1 \n", + "18 Mrs 0 ... 2 3 5 0 0 \n", + "19 Mrs 0 ... 1 1 5 0 1 \n", + "20 Mr 0 ... 1 4 5 0 1 \n", + "21 Mr 0 ... 1 3 5 0 1 \n", + "22 Miss 0 ... 1 2 3 0 0 \n", + "23 Mr 0 ... 1 4 4 1 0 \n", + "24 Miss 0 ... 0 3 2 0 0 \n", + "25 Mrs 0 ... 0 4 5 0 0 \n", + "26 Mr 0 ... 1 1 4 0 1 \n", + "27 Mr 0 ... 0 5 4 1 0 \n", + "28 Miss 0 ... 1 2 4 0 0 \n", + "29 Mr 0 ... 1 2 4 0 0 \n", + "... ... ... ... ... ... ... ... ... \n", + "1279 Mr 0 ... 1 1 4 0 0 \n", + "1280 Master 1 ... 0 3 2 0 0 \n", + "1281 Mr 0 ... 1 5 4 1 0 \n", + "1282 Mrs 0 ... 2 4 6 0 0 \n", + "1283 Master 1 ... 3 3 3 0 0 \n", + "1284 Mr 0 ... 1 2 5 0 0 \n", + "1285 Mr 0 ... 0 4 4 0 0 \n", + "1286 Mrs 0 ... 2 5 3 1 0 \n", + "1287 Mr 0 ... 1 1 4 0 0 \n", + "1288 Mrs 0 ... 3 5 5 1 0 \n", + "1289 Mr 0 ... 1 1 4 0 0 \n", + "1290 Mr 0 ... 1 1 5 0 1 \n", + "1291 Miss 0 ... 1 5 4 0 0 \n", + "1292 Mr 0 ... 2 3 5 0 1 \n", + "1293 Miss 0 ... 2 5 4 1 0 \n", + "1294 Mr 0 ... 1 5 3 1 0 \n", + "1295 Mr 0 ... 2 4 5 1 0 \n", + "1296 Mr 0 ... 1 3 4 0 0 \n", + "1297 Mr 0 ... 2 2 4 0 1 \n", + "1298 Mr 0 ... 3 5 5 1 0 \n", + "1299 Miss 0 ... 1 1 4 0 0 \n", + "1300 Miss 0 ... 3 3 1 0 0 \n", + "1301 Miss 0 ... 1 1 4 0 0 \n", + "1302 Mrs 0 ... 2 5 5 1 0 \n", + "1303 Miss 0 ... 1 1 4 0 0 \n", + "1304 Mr 0 ... 1 2 4 0 0 \n", + "1305 Others 0 ... 1 5 5 0 0 \n", + "1306 Mr 0 ... 1 1 5 0 0 \n", + "1307 Mr 0 ... 1 2 4 0 0 \n", + "1308 Master 1 ... 3 4 1 0 1 \n", + "\n", + " Ticket_3 Ticket_4 Ticket_C Ticket_P Ticket_S \n", + "0 0 1 0 0 0 \n", + "1 0 0 0 1 0 \n", + "2 0 0 0 0 1 \n", + "3 0 0 0 0 0 \n", + "4 1 0 0 0 0 \n", + "5 1 0 0 0 0 \n", + "6 0 0 0 0 0 \n", + "7 1 0 0 0 0 \n", + "8 1 0 0 0 0 \n", + "9 0 0 0 0 0 \n", + "10 0 0 0 1 0 \n", + "11 0 0 0 0 0 \n", + "12 0 1 0 0 0 \n", + "13 1 0 0 0 0 \n", + "14 1 0 0 0 0 \n", + "15 0 0 0 0 0 \n", + "16 1 0 0 0 0 \n", + "17 0 0 0 0 0 \n", + "18 1 0 0 0 0 \n", + "19 0 0 0 0 0 \n", + "20 0 0 0 0 0 \n", + "21 0 0 0 0 0 \n", + "22 1 0 0 0 0 \n", + "23 0 0 0 0 0 \n", + "24 1 0 0 0 0 \n", + "25 1 0 0 0 0 \n", + "26 0 0 0 0 0 \n", + "27 0 0 0 0 0 \n", + "28 1 0 0 0 0 \n", + "29 1 0 0 0 0 \n", + "... ... ... ... ... ... \n", + "1279 1 0 0 0 0 \n", + "1280 1 0 0 0 0 \n", + "1281 0 0 0 0 0 \n", + "1282 0 0 0 1 0 \n", + "1283 0 0 1 0 0 \n", + "1284 0 0 1 0 0 \n", + "1285 1 0 0 0 0 \n", + "1286 0 0 0 0 0 \n", + "1287 1 0 0 0 0 \n", + "1288 0 0 0 0 0 \n", + "1289 1 0 0 0 0 \n", + "1290 0 0 0 0 0 \n", + "1291 1 0 0 0 0 \n", + "1292 0 0 0 0 0 \n", + "1293 0 0 0 0 0 \n", + "1294 0 0 0 0 0 \n", + "1295 0 0 0 0 0 \n", + "1296 0 0 0 0 1 \n", + "1297 0 0 0 0 0 \n", + "1298 0 0 0 0 0 \n", + "1299 1 0 0 0 0 \n", + "1300 0 0 0 0 1 \n", + "1301 1 0 0 0 0 \n", + "1302 0 0 0 0 0 \n", + "1303 1 0 0 0 0 \n", + "1304 0 1 0 0 0 \n", + "1305 0 0 0 1 0 \n", + "1306 0 0 0 0 1 \n", + "1307 1 0 0 0 0 \n", + "1308 0 0 0 0 0 \n", + "\n", + "[1309 rows x 24 columns]" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": { + "_cell_guid": "e859e686-72db-4608-a89e-24a9e9b52e47", + "_execution_state": "idle", + "_uuid": "df4d6e82a6a6fac2c995d55f829085f9afee3267" + }, + "outputs": [], + "source": [ + "df = df.drop(labels=['SibSp','Parch','Age','Fare','Title'], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [], + "source": [ + "df1 = df[0:891]" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [], + "source": [ + "X = df1[0:891].drop(['Survived','PassengerId'], axis=1).values\n", + "Y = df1[0:891]['Survived'].values" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state = 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\HP-PC\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:196: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n", + " \"avoid this warning.\", FutureWarning)\n" + ] + }, + { + "data": { + "text/plain": [ + "SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n", + " decision_function_shape='ovr', degree=3, gamma='auto_deprecated',\n", + " kernel='rbf', max_iter=-1, probability=False, random_state=None,\n", + " shrinking=True, tol=0.001, verbose=False)" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.svm import SVC\n", + "svclassifier = SVC(kernel='rbf')\n", + "svclassifier.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = svclassifier.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0.0 0.81 0.91 0.85 160\n", + " 1.0 0.83 0.68 0.74 108\n", + "\n", + " micro avg 0.81 0.81 0.81 268\n", + " macro avg 0.82 0.79 0.80 268\n", + "weighted avg 0.82 0.81 0.81 268\n", + "\n" + ] + } + ], + "source": [ + "from sklearn.metrics import classification_report\n", + "C_report = classification_report(y_test,y_pred)\n", + "print(C_report)" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.8134328358208955\n" + ] + } + ], + "source": [ + "from sklearn.metrics import accuracy_score\n", + "Accuracy = accuracy_score(y_test,y_pred)\n", + "print(\"Accuracy:\", Accuracy)" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ActualPredicted
01.00.0
10.00.0
21.01.0
30.00.0
41.01.0
50.00.0
61.00.0
70.00.0
81.00.0
90.00.0
100.00.0
111.01.0
121.01.0
131.00.0
141.00.0
151.01.0
161.01.0
170.00.0
180.01.0
190.00.0
200.00.0
211.01.0
220.00.0
230.01.0
240.00.0
251.01.0
261.01.0
270.00.0
280.00.0
290.00.0
.........
2380.00.0
2391.01.0
2400.00.0
2410.00.0
2421.00.0
2431.00.0
2440.00.0
2450.00.0
2461.01.0
2470.01.0
2480.00.0
2491.01.0
2500.00.0
2510.00.0
2521.01.0
2530.00.0
2541.00.0
2550.00.0
2560.00.0
2571.00.0
2580.00.0
2591.01.0
2600.00.0
2610.00.0
2620.00.0
2631.01.0
2640.00.0
2650.00.0
2660.00.0
2670.01.0
\n", + "

268 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " Actual Predicted\n", + "0 1.0 0.0\n", + "1 0.0 0.0\n", + "2 1.0 1.0\n", + "3 0.0 0.0\n", + "4 1.0 1.0\n", + "5 0.0 0.0\n", + "6 1.0 0.0\n", + "7 0.0 0.0\n", + "8 1.0 0.0\n", + "9 0.0 0.0\n", + "10 0.0 0.0\n", + "11 1.0 1.0\n", + "12 1.0 1.0\n", + "13 1.0 0.0\n", + "14 1.0 0.0\n", + "15 1.0 1.0\n", + "16 1.0 1.0\n", + "17 0.0 0.0\n", + "18 0.0 1.0\n", + "19 0.0 0.0\n", + "20 0.0 0.0\n", + "21 1.0 1.0\n", + "22 0.0 0.0\n", + "23 0.0 1.0\n", + "24 0.0 0.0\n", + "25 1.0 1.0\n", + "26 1.0 1.0\n", + "27 0.0 0.0\n", + "28 0.0 0.0\n", + "29 0.0 0.0\n", + ".. ... ...\n", + "238 0.0 0.0\n", + "239 1.0 1.0\n", + "240 0.0 0.0\n", + "241 0.0 0.0\n", + "242 1.0 0.0\n", + "243 1.0 0.0\n", + "244 0.0 0.0\n", + "245 0.0 0.0\n", + "246 1.0 1.0\n", + "247 0.0 1.0\n", + "248 0.0 0.0\n", + "249 1.0 1.0\n", + "250 0.0 0.0\n", + "251 0.0 0.0\n", + "252 1.0 1.0\n", + "253 0.0 0.0\n", + "254 1.0 0.0\n", + "255 0.0 0.0\n", + "256 0.0 0.0\n", + "257 1.0 0.0\n", + "258 0.0 0.0\n", + "259 1.0 1.0\n", + "260 0.0 0.0\n", + "261 0.0 0.0\n", + "262 0.0 0.0\n", + "263 1.0 1.0\n", + "264 0.0 0.0\n", + "265 0.0 0.0\n", + "266 0.0 0.0\n", + "267 0.0 1.0\n", + "\n", + "[268 rows x 2 columns]" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2 = pd.DataFrame({'Actual': y_test.flatten(), 'Predicted': y_pred.flatten()})\n", + "df2" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[145 15]\n", + " [ 35 73]]\n" + ] + } + ], + "source": [ + "from sklearn.metrics import confusion_matrix\n", + "print(confusion_matrix(y_test,y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [], + "source": [ + "cm = confusion_matrix(y_test,y_pred)\n", + "cm_df = pd.DataFrame(cm,\n", + " index = ['survived','Not'], \n", + " columns = ['survived','Not'])" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAEyCAYAAADjiYtYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAHfhJREFUeJzt3Xm8VXW9//HX+zA4IIKzoKVGmpUlipo5l0pqgwNpmhilRWqlZZGW3NKGX+WY5pCYA2g5lPLLK92SvKY5iyiI14FQuIIMIioKOBzO5/6xF7Q9HM7Za3PW3vt7zvvpYz3O3t+19vp+jhw+fM9nfdd3KSIwM7P0NNU7ADMzq44TuJlZopzAzcwS5QRuZpYoJ3Azs0Q5gZuZJcoJ3MwsUU7gVneS9pJ0v6TXJC2SdJ+kvSUtkdS3jeMfk/RNSVtLCkmTW+3fWNLbkmbW7JswqwMncKsrSesDtwO/ATYEtgDOBl4DZgPDWh2/A/Ah4Iay5j5Z+wpfBJ4vMGyzhuAEbvW2HUBE3BARyyNiWUTcERFTgbHAl1od/yVgQkS8XNZ2HTCi1THjigzarBE4gVu9PQsslzRW0sGSNijbdx2wt6T3AkhqojS6bp2crweOltRD0geBvsBDNYjdrK6cwK2uImIxsBcQwJXAS5Juk7RZRLwA3A0Mzw7fH1gbmNDqNLOBZ4ADKI3EPfq2bsEJ3OouIp6KiC9HxJbADsBA4NfZ7vIyynHAHyLinTZOMw74MnAMpRG5WZfnBG4NJSKeBq6llMgBbgW2kPQJ4AhWP7q+Bfg08FxEzCo6TrNG0LPeAVj3Jml7Son3poiYLek9lEbRDwJExBJJfwKuAWZFxKS2zpMd90nglRqFblZ3HoFbvb0OfAx4SNISSol7GvDdsmPGAlvRQW07IiZFxIyiAjVrNPIDHczM0uQRuJlZopzAzcwS5QRuZpYoJ3Azs0Q5gZuZJaph54G/s/A5T4+xVawzcO96h2ANqPntOVrTc+TNOb02ft8a97mmPAI3M0tUw47AzcxqqmV5vSPIzQnczAwgWuodQW5O4GZmAC1O4GZmSYrlzfUOITcncDMzcAnFzCxZvohpZpYoj8DNzBLli5hmZmkKj8DNzBLlEbiZWaI8AjczS5RnoZiZJcojcDOzRLkGbmaWKI/AzcwS5RG4mVmaInwR08wsTS6hmJklyiUUM7NEeQRuZpYo38hjZpYoj8DNzBLlGriZWaI8AjczS1SzH2psZpYk38hjZpYq18DNzBLlGriZWaI8AjczS5RH4GZmifII3MwsUQmOwJvqHYCZWUNoacm3dUDS1ZIWSJpW1naupKclTZU0XlL/rH1rScskPZ5tv60kZCdwMzPo9AQOXAsc1KptIrBDRHwUeBb4Qdm+GRExONtOrKQDJ3AzMyiVUPJsHZ0u4h5gUau2OyJixS2fDwJbrknITuBmZlDECLwjxwP/VfZ+G0mPSbpb0t6VnMAXMc3MIPdFTEkjgZFlTWMiYkyFnz0TaAZ+nzXNBd4bES9LGgL8f0kfjojF7Z3HCdzMDHKPqrNkXVHCLidpBPAZYP+IiOxcbwFvZa8flTQD2A6Y1N65nMDNzKAm0wglHQScDuwbEUvL2jcBFkXEcknvA7YFnuvofE7gZmbQ6TfySLoB2A/YWNJs4MeUZp2sBUyUBPBgNuNkH+AnkpqB5cCJEbGozROXcQI3M4NOT+ARcUwbzVet5thbgFvy9uEEbmYGUCpHJ8UJ3MwMvBaKmVmynMDNzBKV4GJWTuBmZgDL/UxMM7M0uYRiZpYoJ3Azs0S5Bm5mlqZo8TxwM7M0uYRiZpYol1DMzBLlEoqZWaISLKH4kWoNYPT/u4B9Pn00hw1f9Tmm1/zhT+yw58G88uprADw8eSq7Dx3GsBHfYNiIb3D51b9f5TPW9Vw55nxenD2Fxx+7c2Xbj/7jNGY9P4lJj9zBpEfu4OCDPlnHCLuA2j9SbY15BN4ADjvkQL447HP88Kfnvat97vyXeOCRxxiw2abvat95xx247Nyzaxmi1dm4cTdz2WXXcM01F72r/aKLr+SCC6+oU1RdTIKrEXoE3gB2GfwR+q3fd5X2cy6+gtNOPoHSuu/Wnf3z3odY9Mqr9Q6ja0twBO4E3qDu+ueDbLrJxmy/7ftW2Tdl2lMcMeJkTvzuf/Cv52bVITprFCef9BUmPzqRK8ecT//+/eodTtpaIt/WAApJ4JJel7R4dVsRfXYly958kzHjbuSbXz1ulX0f+sAgJt4yllvHXsYXh32WU37wkzpEaI3gt1eMY7vt92DILkOZN28B557zo3qHlLZoybc1gEISeET0jYj1gV8DZwBbAFtSepjnz1b3OUkjJU2SNOl3424oIrQkvDBnLnNenMewESczdNgI5r+0kCOP/xYLX17Een36sO666wCwzx670dzcvPICp3UvCxYspKWlhYjgd1f9nl13HVzvkNKW4Ai86IuYn4qIj5W9v1zSQ8A5bR0cEWOAMQDvLHyuMf4P1cF2g7bhngk3rnw/dNgIbrrqYjbo34+FLy9iow03QBJP/M8ztETQv9/6dYzW6mXzzTdl3rwFABx26ME8+eQzdY4obdEgde08ik7gyyUdC9wIBHAMpScuW5lRP/4ljzw2lVdfXcz+hw3n5BOOY9hnP9XmsXfcdS83jZ9Aj549WLt3b849+wzkq5xd3vXXXcq++3ycjTfekJnPTeLsn5zHvvvuwY47foiIYNas2Zx08un1DjNtDTKqzkNR4NQZSVsDFwF7Ukrg9wHfjoiZHX22O4/AbfXWGbh3vUOwBtT89pw1HsUs+dnwXDmnz+jr6z5yKnQEniXqQ4vsw8ysUyQ4Ai90GqGk7STdKWla9v6jkkYX2aeZWVU8D3wVVwI/AN4BiIipwNEF92lmlp9noaxi3Yh4uNVFtuaC+zQzy69B5nbnUXQCXyhpEKULmEj6PDC34D7NzHKL5vQmyBWdwL9BaV739pLmAM8Dxxbcp5lZfg1SFsmj6AQ+KyIOkNQHaIqI1wvuz8ysOgkm8KIvYj4vaQywO/BGwX2ZmVXPa6Gs4gPA3ymVUp6XdImkvQru08wsvwRnoRSawCNiWUTcHBFHADsB6wN3F9mnmVk1oiVybY2g8PXAJe0r6TJgMrA2cFTRfZqZ5ZbgCLzQi5iSngceB24GRkXEkiL7MzOrWoPcXZlH0bNQdowIP8DBzBpfg4yq8ygkgUv6fkScA/xc0ir/VyLilCL6NTOrmhP4Sk9lXycVdH4zs05V5NLaRSkkgUfEf2Yvp0bEY0X0YWbWqRIcgRc9C+UCSU9L+qmkDxfcl5lZ9Tp5FoqkqyUtWLGcdta2oaSJkqZnXzfI2iXpYkn/kjRV0s6VhFz0PPBPAPsBLwFjJD3h9cDNrBEVMA/8WuCgVm1nAHdGxLbAndl7gIOBbbNtJHB5JR0UPg88IuZFxMXAiZSmFP6o6D7NzHLr5BF4RNwDLGrVfCgwNns9FjisrH1clDwI9Jc0oKM+in4izwclnZX9CnEJcD+wZZF9mplVpSXnVp3NImIuQPZ106x9C+CFsuNmZ23tKnoe+DXADcDQiHix4L7MzKqW9/Z4SSMplTtWGBMRY6rsvq0HJHcYUGEJXFIPYEZEXFRUH2ZmnSZnAs+Sdd6EPV/SgIiYm5VIFmTts4H3lB23JdDhoLewEkpELAc2ktS7qD7MzDpNbUootwEjstcjgD+XtX8pm42yO/DailJLewp/oANwn6TbgJXroETEBQX3a2aWS2evMCjpBkqz8DaWNBv4MfBL4GZJJwD/CxyZHf4X4BDgX8BS4CuV9FF0An8x25qAvgX3ZWZWvU5eyyoijlnNrv3bODYoPTchl0ITeEScXeT5zcw6SzSndydm0cvJ3kUbV1Ij4pNF9mtmlleDPCUtl6JLKN8re702MAxoLrhPM7P8nMDfLSIebdV0nyQ/Us3MGo5H4K1I2rDsbROwC7B5kX2amVXFCXwVj1KqgQt4B5gJnFBwn2ZmuaU4Ai96MavTgcERsQ1wHaW54EsL7tPMLLdoybc1gqIT+OiIWCxpL+BASssrVrRMoplZLTmBr2p59vXTwG8j4s+Ab603s8YTyrc1gNXWwCWt394HK3za/BxJVwAHAL+StBY1WIPczCyvRhlV59HeRcwn+fcFyBVWvA/gvRWc/yhKT6Q4LyJezVbfGlVlrGZmhYmWxhhV57HaBB4R71ndvkpFxFLg1rL3c4EOV9gyM6u1FEfgFZUzJB0t6YfZ6y0lDSk2LDOz2opQrq0RdJjAJV0CfAI4LmtaCvy2yKDMzGotxVkoldzIs0dE7CzpMYCIWOSHNJhZV9OlauBl3pHURLaqoKSNSPKmUzOz1Yv0VpOtKIFfCtwCbCLpbEozS7zOt5l1KV1yBB4R4yQ9SmkuN8CRETGt2LDMzGqrSybwTA9Ki1EFvhHHzLqgFEsolcxCORO4ARhI6VH3f5D0g6IDMzOrpWhRrq0RVDICHw4MyW7KQdLPKS0T+4siAzMzq6VGmdudRyUJfFar43oCzxUTjplZfTTK3O482lvM6kJKNe+lwJOS/pa9HwrcW5vwzMxqY3lLepf32huBr5hp8iQwoaz9weLCMTOrj0apa+fR3mJWV9UyEDOzekpxFkqHNXBJg4CfAx8C1l7RHhHbFRiXmVlNpTgCr6Tocy1wDaV1wA8GbgZuLDAmM7Oaawnl2hpBJQl83Yj4G0BEzIiI0ZRWJzQz6zJSXE62kmmEb0kSMEPSicAcYNNiwzIzq60uWQMHvgOsB5xCqRbeDzi+yKDMzGqtUcoieVSymNVD2cvX+fdDHczMupRGKYvk0d6NPOPJ1gBvS0QcUUhEZmZ10NVKKJfULIo27LfjV+vZvTWo2zbYu94hWBfVpUooEXFnLQMxM6unLlVCMTPrTrrUCNzMrDtJsAReeQKXtFZEvFVkMGZm9ZLiCLySJ/LsJukJYHr2fkdJvyk8MjOzGkrxTsxKbqW/GPgM8DJAREzBt9KbWRfTknPriKQPSHq8bFss6duSzpI0p6z9kGpjrqSE0hQRs0p306+0vNoOzcwaUdC5o+qIeAYYDCCpB6VlSMYDXwEujIjz1rSPShL4C5J2AyIL4lvAs2vasZlZI2kp9irm/sCMNgbDa6SSEspJwGnAe4H5wO5Zm5lZl9GCcm05HQ3cUPb+m5KmSrpa0gbVxtxhAo+IBRFxdERsnG1HR8TCajs0M2tEgXJtkkZKmlS2jWzrvJJ6A58D/pg1XQ4MolRemQucX23MlTyR50ramCIZEW0Ga2aWouU5R9URMQYYU8GhBwOTI2J+9rn5K3Zk+fX2XB2XqaQG/vey12sDhwMvVNuhmVkjqmRmSZWOoax8ImlARMzN3h7Ovx8gn1sly8neVP5e0nXAxGo7NDNrREUkcEnrAgcCXy9rPkfSYEqVjZmt9uVSza302wBbVduhmVkj6uxphAARsRTYqFVbpz1XoZIa+Cv8uwbeBCwCzuisAMzMGkGCD6VvP4Fnz8LckdIEdICWiBSXPTcza18VUwPrrt1phFmyHh8Ry7PNydvMuqTIuTWCSm7keVjSzoVHYmZWR529FkottPdMzJ4R0QzsBXxN0gxgCSBKg3MndTPrMlo68Rb3WmmvBv4wsDNwWI1iMTOrm0Ypi+TRXgIXQETMqFEsZmZ10yhlkTzaS+CbSDptdTsj4oIC4jEzq4uuNo2wB7AeJDi3xswspxSnEbaXwOdGxE9qFomZWR11yRq4mVl30NVKKPvXLAozszrrUhcxI2JRLQMxM6unrlZCMTPrNrpaCcXMrNvoUiUUM7PuxAnczCxR4RKKmVmaPAI3M0vU8noHUAUncDMzPAvFzCxZLqGYmSXKCdzMLFG+E9PMLFGugZuZJcolFDOzRLmEYmaWqJYEU7gTuJkZLqGYmSUrvfG3E7iZGeARuJlZsjyN0MwsUb6IaWaWqPTStxO4mRngGriZWbJcQjEzS1R66dsJ3MwMcAnFzCxZLqGYmSWqiPQtaSbwOqVHbjZHxC6SNgRuArYGZgJHRcQr1Zy/qXPCNDNL23Ii15bDJyJicETskr0/A7gzIrYF7szeV8UJ3MyMUg08z7YGDgXGZq/HAodVeyIncDMzSjXwPFuFArhD0qOSRmZtm0XEXIDs66bVxuwaeIPpvVYvLr3lInqt1YuePXpw14S7uer8sZx54fcZvPuOLHl9CQA//86vmP7kjDpHa7XSZ9AAdhpz6sr362y1KdPP+SO9NujLZgcNgZbgrYWLmXrK5bw1v6pyareXtwaeJeSRZU1jImJMq8P2jIgXJW0KTJT09BoF2YoTeIN5+613OOWo01i29E169OzB5eMv5sG7Hgbg0p9dwT8m3FPnCK0elsyYy737Z6XSJrH/lMuZ95dHaH51CdN/dTMAW331ILb97hFM+/5VdYw0XXlnoWTJunXCbn3Mi9nXBZLGA7sB8yUNiIi5kgYAC6oMudgSiqRtKmmzd1u29E0AevbsSc9ePYlIb3qTFWfjvT/CkpnzeXP2QprfWLayvee6a+Eflep1dg1cUh9JfVe8BoYC04DbgBHZYSOAP1cbc9E18FvaaPtTwX0mr6mpiWvvGMPtU2/lkXsm8T+PlX7r+vrpJzB24pWcctbJ9Ordq85RWr0MOPzjzB1//8r32/3gC3xi8qUMHLYX08+5uY6RpS1y/leBzYB7JU0BHgYmRMRfgV8CB0qaDhyYva+KihjdSdoe+DBwDjCqbNf6wKiI+HBH59hzi092+7HEeuv34RdX/YQLRv+Gxa8s5uUFi+jVuxenn3Mac2a+yDW/vq7eIdbcmc1VX+/pEtSrB/tPuZx79h3F2y+99q59g045lKa1ejH93O43Rjpk/o1rvJr38Vt/PlfOuXrmn+q+gnhRI/APAJ8B+gOfLdt2Br62ug9JGilpkqRJ85a8WFBo6Xhj8RIm3z+F3ffbjZcXLALgnbffYcJNf+WDO21f5+isHjbZfzCvPTFzleQNMOfW+9j8Mx+rQ1RdQwEj8MIVchEzIv4M/FnSxyPigRyfW3lRoLuOwPtv2I/m5mbeWLyE3mv3Zte9d+b6y25ko003XJnE9zloL557emZ9A7W6GHj4nswdf9/K9+tuszlLn58HwGafGsIb0z3wqZbXQlnVC9mV1z0pzdK5Fzg1ImYX3G+yNtpsI0b/+nSamppoamriv//zH9z/9we5+Obz6b9hPyQx/cl/ce4ZF9Y7VKuxpnV6s/E+H2Ha965c2bb96GPo8/6BREsLy2YvZNqo39UxwrS1JHgFuJAa+MqTSxOBPwArirXDgWMj4sCOPttdR+DWvu5eA7e2dUYNfPhWR+TKOdfPurXL1sBX2DQiromI5my7Ftik4D7NzHIr6E7MQhWdwF+SNFxSj2wbDrxccJ9mZrmleBGz6AR+PHAUMA+YC3w+azMzayg1XMyq0xR6ETMi/hf4XJF9mJl1hkYpi+RRSAKX9KN2dkdE/LSIfs3MqtUoZZE8ihqBL2mjrQ9wArAR4ARuZg2lUcoieRR1I8/5K15ni7mcCnwFuBE4f3WfMzOrlxQXjSusBp499+004FhKT53YudrnvpmZFc018Iykc4EjKN0W/5GIeKOIfszMOkuKJZSiphF+FxgIjAZelLQ4216XtLigPs3MqpbiPPCiauB+1qaZJWV5pDcG9yPVzMxIs4TiBG5mhueBm5kly7NQzMwS5XngZmaJ8gjczCxRroGbmSUqxUeqOYGbmUGC428ncDMzwDVwM7NkOYGbmSXK0wjNzBLlEbiZWaI8jdDMLFEuoZiZJcolFDOzRHkEbmaWKI/AzcwS5YuYZmaJ8looZmaJ8gjczCxRfqixmVmiXEIxM0tUiiWUpnoHYGbWCFoicm0dkfQeSXdJekrSk5JOzdrPkjRH0uPZdki1MXsEbmZGISPwZuC7ETFZUl/gUUkTs30XRsR5a9qBE7iZGRCdfBEzIuYCc7PXr0t6CtiiM/twCcXMjNKdmHm2PCRtDewEPJQ1fVPSVElXS9qg2pidwM3MKK2FkmeTNFLSpLJtZFvnlbQecAvw7YhYDFwODAIGUxqhn19tzC6hmJmRfy2UiBgDjGnvGEm9KCXv30fErdnn5pftvxK4PXewGSdwMzM6fzVCSQKuAp6KiAvK2gdk9XGAw4Fp1fbhBG5mRiE38uwJHAc8IenxrO2HwDGSBgMBzAS+Xm0HTuBmZnT+NMKIuBdQG7v+0ll9OIGbmeEHOpiZJcsPdDAzS5RH4GZmifJqhGZmifII3MwsUa6Bm5klyiNwM7NEuQZuZpaoFJ/I4wRuZoZH4GZmyWrxU+nNzNLki5hmZolyAjczS1R66RuU4r863Y2kkdnTP8xW8s+F+ZmYaWjzWXvW7fnnoptzAjczS5QTuJlZopzA0+A6p7XFPxfdnC9impklyiNwM7NEOYEnQNLnJJ3RSed6ozPOY8WTFJLOL3v/PUlndfCZwyR9qPDgrCE4gTcISau9qSoibouIX9YyHmsIbwFHSNo4x2cOA5zAuwkn8E4mqY+kCZKmSJom6QuSZq74SyhpF0n/yF6fJWmMpDuAcZIekvThsnP9Q9IQSV+WdImkftm5mrL960p6QVIvSYMk/VXSo5L+KWn77JhtJD0g6RFJP639/xFbA82ULlR+p/UOSVtJulPS1OzreyXtAXwOOFfS45IG1Tpgqy0n8M53EPBiROwYETsAf+3g+CHAoRHxReBG4CgASQOAgRHx6IoDI+I1YAqwb9b0WeBvEfEOpb/o34qIIcD3gMuyYy4CLo+IXYF5nfENWk1dChwrqV+r9kuAcRHxUeD3wMURcT9wGzAqIgZHxIwax2o15gTe+Z4ADpD0K0l7Z0m3PbdFxLLs9c3Akdnro4A/tnH8TcAXstdHAzdJWg/YA/ijpMeBK4AB2TF7Ajdkr6/L/d1YXUXEYmAccEqrXR8H/pC9vg7Yq5ZxWWPwYladLCKelTQEOAT4RVYeaebf/1iu3eojS8o+O0fSy5I+SilJf72NLm7LzrshpdH7fwN9gFcjYvDqwqr6G7JG8GtgMnBNO8f4z7gb8gi8k0kaCCyNiOuB84CdgZmUki3AsA5OcSPwfaBfRDzRemdEvAE8TKk0cntELM9Gac9LOjKLQZJ2zD5yH6WROsCxVX9jVjcRsYjSb2cnlDXfz7v/XO/NXr8O9K1ddFZPTuCd7yPAw1kp40zgZ8DZwEWS/gks7+Dzf6L0F/Pmdo65CRiefV3hWOAESVOAJ4FDs/ZTgW9IegRoXUe1dJwPlM9GOQX4iqSpwHGU/pyhNAAYJekxX8Ts+nwnpplZojwCNzNLlBO4mVminMDNzBLlBG5mligncDOzRDmBW7skLc/W1Zgm6Y+S1l2Dc+0n6fbsdbsrLErqL+nkKvo4S9L3Km1vdcy1kj6fo6+tJU3LG6NZZ3ECt44sy9bV2AF4GzixfGd201Dun6MKVljsD+RO4GbdiRO45fFP4P3ZyPMpSZdRusX7PZKGZqseTs5G6usBSDpI0tOS7gWOWHGiFSssZq83kzQ+W8FxSraq3i+BQdno/9zsuFHZqopTJZ1ddq4zJT0j6e/ABzr6JiR9LTvPFEm3tPqt4oBsNcdnJX0mO76HpHPL+m5riQOzmnMCt4qotF75wZQW64JSohwXETtRWs9lNHBAROwMTAJOk7Q2cCWlVRP3BjZfzekvBu6OiB0pLT3wJHAGMCMb/Y+SNBTYFtgNGAwMkbRPtu7M0cBOlP6B2LWCb+fWiNg16+8p3n2L+taUVnv8NPDb7Hs4AXgtW9FxV+BrkrapoB+zQnkxK+vIOtmyAFAagV8FDARmRcSDWfvulB4icJ8kgN7AA8D2wPMRMR1A0vXAyDb6+CTwJYCIWA68JmmDVscMzbbHsvfrUUrofYHxEbE06+O2Cr6nHST9jFKZZj3gb2X7bo6IFmC6pOey72Eo8NGy+ni/rO9nK+jLrDBO4NaRZa1XOcyS9JLyJmBiRBzT6rjBdN4qeQJ+ERFXtOrj21X0cS1wWERMkfRlYL+yfa3PFVnf34qI8kSPpK1z9mvWqVxCsc7wILCnpPfDyicFbQc8DWxTtqjSMav5/J3ASdlne0han1VX1fsbcHxZbX0LSZsC9wCHS1pHUl9K5ZqO9AXmSurFqis0HimpKYv5fcAzWd8nZccjaTtJfSrox6xQHoHbGouIl7KR7A2S1sqaR2dro48EJkhaSGnJ0x3aOMWpwBhJJ1BarfGkiHhA0n3ZNL3/yurgHwQeyH4DeAMYHhGTJd0EPA7MolTm6ch/AA9lxz/Bu/+heAa4G9gMODEi3pT0O0q18ckqdf4SpWdPmtWVVyM0M0uUSyhmZolyAjczS5QTuJlZopzAzcwS5QRuZpYoJ3Azs0Q5gZuZJcoJ3MwsUf8HW+sraxYJIM4AAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.8134328358208955\n" + ] + } + ], + "source": [ + "plt.figure(figsize=(6,4.5))\n", + "sns.heatmap(cm_df, annot=True, fmt=\"d\")\n", + "plt.title('SVM'.format(accuracy_score(y_test, y_pred)))\n", + "plt.ylabel('True label')\n", + "plt.xlabel('Predicted label')\n", + "plt.savefig('plot_SVM.png', dpi=500, bbox_inches='tight')\n", + "plt.show()\n", + "print(\"Accuracy:\",accuracy_score(y_test, y_pred))" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.6002386634844868" + ] + }, + "execution_count": 90, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.metrics import cohen_kappa_score\n", + "cohen_kappa_score(y_test, y_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/titanic-NN.ipynb b/titanic-NN.ipynb new file mode 100644 index 000000000..c653d775d --- /dev/null +++ b/titanic-NN.ipynb @@ -0,0 +1,10169 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "_cell_guid": "9e319230-1cc2-4b19-ad66-06a07e07fc60", + "_execution_state": "idle", + "_uuid": "03de849fb872e11b3db2a2c99563783ce6ba784f" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using TensorFlow backend.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(891, 12) (418, 11) ['PassengerId' 'Survived' 'Pclass' 'Name' 'Sex' 'Age' 'SibSp' 'Parch'\n", + " 'Ticket' 'Fare' 'Cabin' 'Embarked']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\HP-PC\\Anaconda3\\lib\\site-packages\\pandas\\core\\frame.py:6692: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n", + "of pandas will change to not sort by default.\n", + "\n", + "To accept the future behavior, pass 'sort=False'.\n", + "\n", + "To retain the current behavior and silence the warning, pass 'sort=True'.\n", + "\n", + " sort=sort)\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "%matplotlib inline\n", + "\n", + "# ignore Deprecation Warning\n", + "import warnings\n", + "warnings.filterwarnings(\"ignore\", category=DeprecationWarning) \n", + "\n", + "# Random Forest Regressor\n", + "from sklearn.ensemble import RandomForestRegressor\n", + "\n", + "# Neural Network\n", + "import keras \n", + "from keras.models import Sequential \n", + "from keras.layers import Dense\n", + "\n", + "# load the data\n", + "df_train = pd.read_csv('data/train.csv')\n", + "df_test = pd.read_csv('data/test.csv')\n", + "df = df_train.append(df_test , ignore_index = True)\n", + "\n", + "# some quick inspections\n", + "print(df_train.shape, df_test.shape, df_train.columns.values)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
57557603Patchett, Mr. Georgemale19.00035858514.5000NaNS
54854903Goldsmith, Mr. Frank Johnmale33.01136329120.5250NaNS
56556603Davies, Mr. Alfred Jmale24.020A/4 4887124.1500NaNS
12312412Webber, Miss. Susanfemale32.5002726713.0000E101S
75675703Carlsson, Mr. August Sigfridmale28.0003500427.7958NaNS
78378403Johnston, Mr. Andrew GmaleNaN12W./C. 660723.4500NaNS
58458503Paulner, Mr. UschermaleNaN0034118.7125NaNC
55855911Taussig, Mrs. Emil (Tillie Mandelbaum)female39.01111041379.6500E67S
909103Christmann, Mr. Emilmale29.0003432768.0500NaNS
76376411Carter, Mrs. William Ernest (Lucile Polk)female36.012113760120.0000B96 B98S
\n", + "
" + ], + "text/plain": [ + " PassengerId Survived Pclass Name \\\n", + "575 576 0 3 Patchett, Mr. George \n", + "548 549 0 3 Goldsmith, Mr. Frank John \n", + "565 566 0 3 Davies, Mr. Alfred J \n", + "123 124 1 2 Webber, Miss. Susan \n", + "756 757 0 3 Carlsson, Mr. August Sigfrid \n", + "783 784 0 3 Johnston, Mr. Andrew G \n", + "584 585 0 3 Paulner, Mr. Uscher \n", + "558 559 1 1 Taussig, Mrs. Emil (Tillie Mandelbaum) \n", + "90 91 0 3 Christmann, Mr. Emil \n", + "763 764 1 1 Carter, Mrs. William Ernest (Lucile Polk) \n", + "\n", + " Sex Age SibSp Parch Ticket Fare Cabin Embarked \n", + "575 male 19.0 0 0 358585 14.5000 NaN S \n", + "548 male 33.0 1 1 363291 20.5250 NaN S \n", + "565 male 24.0 2 0 A/4 48871 24.1500 NaN S \n", + "123 female 32.5 0 0 27267 13.0000 E101 S \n", + "756 male 28.0 0 0 350042 7.7958 NaN S \n", + "783 male NaN 1 2 W./C. 6607 23.4500 NaN S \n", + "584 male NaN 0 0 3411 8.7125 NaN C \n", + "558 female 39.0 1 1 110413 79.6500 E67 S \n", + "90 male 29.0 0 0 343276 8.0500 NaN S \n", + "763 female 36.0 1 2 113760 120.0000 B96 B98 S " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_train.sample(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Pclass" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "_cell_guid": "8ac53d3d-bf63-4b0a-b4dd-cf69ebb1b189", + "_execution_state": "idle", + "_uuid": "7a92a95fd0642b65500aa0fe2d5453f8f9cff24c" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# check if there is any NAN\n", + "df['Pclass'].isnull().sum(axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "_cell_guid": "366c466d-d5f1-41f2-9568-3abc426bfc75", + "_execution_state": "idle", + "_uuid": "d380019bd6179b6ddcde472ce10771788ae204f4" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PclassSurvived
010.629630
120.472826
230.242363
\n", + "
" + ], + "text/plain": [ + " Pclass Survived\n", + "0 1 0.629630\n", + "1 2 0.472826\n", + "2 3 0.242363" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the correlation between Pclass and Survived\n", + "df[['Pclass', 'Survived']].groupby(['Pclass'], as_index=False).mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "631684de-c518-485b-9456-34eff56235e0", + "_execution_state": "idle", + "_uuid": "12fb136756bac860d6e5ef19a0cd5083ccceba69" + }, + "source": [ + "We can see that a higher class (lower value) has a higher survival rate. This should be a very useful feature." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "cf76f8ac-cc51-4ffb-8066-b605c5aa971c", + "_execution_state": "idle", + "_uuid": "617016b19a60d3ae9b2b1dbfefb73f4fb56e13a9" + }, + "source": [ + "### Name" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "_cell_guid": "fa11d6a2-ad3d-443b-8579-de525fdd20c1", + "_execution_state": "idle", + "_uuid": "a40e18c8e9fe2692dec94722cc6ab316d71b5003" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 Braund, Mr. Owen Harris\n", + "1 Cumings, Mrs. John Bradley (Florence Briggs Th...\n", + "2 Heikkinen, Miss. Laina\n", + "3 Futrelle, Mrs. Jacques Heath (Lily May Peel)\n", + "4 Allen, Mr. William Henry\n", + "5 Moran, Mr. James\n", + "6 McCarthy, Mr. Timothy J\n", + "7 Palsson, Master. Gosta Leonard\n", + "8 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)\n", + "9 Nasser, Mrs. Nicholas (Adele Achem)\n", + "Name: Name, dtype: object" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Name.head(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "_cell_guid": "bb1fc2fe-1947-4f89-99fa-d621711615fc", + "_execution_state": "idle", + "_uuid": "93e6890cecf2d3ae919ec30b24588d780092720d" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Mr 757\n", + "Miss 260\n", + "Mrs 197\n", + "Master 61\n", + "Rev 8\n", + "Dr 8\n", + "Col 4\n", + "Ms 2\n", + "Major 2\n", + "Mlle 2\n", + "Capt 1\n", + "Lady 1\n", + "Dona 1\n", + "Mme 1\n", + "Jonkheer 1\n", + "the Countess 1\n", + "Sir 1\n", + "Don 1\n", + "Name: Title, dtype: int64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Title'] = df.Name.map( lambda x: x.split(',')[1].split( '.' )[0].strip())\n", + "\n", + "# inspect the amount of people for each title\n", + "df['Title'].value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "55379dcf-99ec-4919-a624-05e06254859b", + "_execution_state": "idle", + "_uuid": "0070b1a5c3e802ebdad33ca5b960653be96c5281" + }, + "source": [ + "Looks like the main ones are \"Master\", \"Miss\", \"Mr\", \"Mrs\". Some of the others can be be merged into some of these four categories. For the rest, I'll just call them \"Others\"" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "_cell_guid": "67b67929-d7c3-4d52-9cde-eb37b4ef256d", + "_execution_state": "idle", + "_uuid": "a7840964bab69268bcc0e2516770f7faf00aad1d" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\HP-PC\\Anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py:190: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + " self._setitem_with_indexer(indexer, value)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TitleSurvived
0Master0.575000
1Miss0.701087
2Mr0.156673
3Mrs0.796875
4Others0.318182
\n", + "
" + ], + "text/plain": [ + " Title Survived\n", + "0 Master 0.575000\n", + "1 Miss 0.701087\n", + "2 Mr 0.156673\n", + "3 Mrs 0.796875\n", + "4 Others 0.318182" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Title'] = df['Title'].replace('Mlle', 'Miss')\n", + "df['Title'] = df['Title'].replace(['Mme','Lady','Ms'], 'Mrs')\n", + "df.Title.loc[ (df.Title != 'Master') & (df.Title != 'Mr') & (df.Title != 'Miss') \n", + " & (df.Title != 'Mrs')] = 'Others'\n", + "\n", + "# inspect the correlation between Title and Survived\n", + "df[['Title', 'Survived']].groupby(['Title'], as_index=False).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "_cell_guid": "18a55b3d-677f-4611-a41e-be3f9ff49c67", + "_execution_state": "idle", + "_uuid": "77a517250b3a9b32da2436b8522e864e4e3e7016" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Mr 757\n", + "Miss 262\n", + "Mrs 201\n", + "Master 61\n", + "Others 28\n", + "Name: Title, dtype: int64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the amount of people for each title\n", + "df['Title'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "_cell_guid": "516ceeba-2dab-4fff-8f5d-4fe94f4eddc8", + "_execution_state": "idle", + "_uuid": "3f031015c4e518d5b0b1874bac37a0d899eb2a60" + }, + "outputs": [], + "source": [ + "df = pd.concat([df, pd.get_dummies(df['Title'])], axis=1).drop(labels=['Name'], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AgeCabinEmbarkedFareParchPassengerIdPclassSexSibSpSurvivedTicketTitleMasterMissMrMrsOthers
022.0NaNS7.2500013male10.0A/5 21171Mr00100
138.0C85C71.2833021female11.0PC 17599Mrs00010
226.0NaNS7.9250033female01.0STON/O2. 3101282Miss01000
335.0C123S53.1000041female11.0113803Mrs00010
435.0NaNS8.0500053male00.0373450Mr00100
5NaNNaNQ8.4583063male00.0330877Mr00100
654.0E46S51.8625071male00.017463Mr00100
72.0NaNS21.0750183male30.0349909Master10000
827.0NaNS11.1333293female01.0347742Mrs00010
914.0NaNC30.07080102female11.0237736Mrs00010
104.0G6S16.70001113female11.0PP 9549Miss01000
1158.0C103S26.55000121female01.0113783Miss01000
1220.0NaNS8.05000133male00.0A/5. 2151Mr00100
1339.0NaNS31.27505143male10.0347082Mr00100
1414.0NaNS7.85420153female00.0350406Miss01000
1555.0NaNS16.00000162female01.0248706Mrs00010
162.0NaNQ29.12501173male40.0382652Master10000
17NaNNaNS13.00000182male01.0244373Mr00100
1831.0NaNS18.00000193female10.0345763Mrs00010
19NaNNaNC7.22500203female01.02649Mrs00010
2035.0NaNS26.00000212male00.0239865Mr00100
2134.0D56S13.00000222male01.0248698Mr00100
2215.0NaNQ8.02920233female01.0330923Miss01000
2328.0A6S35.50000241male01.0113788Mr00100
248.0NaNS21.07501253female30.0349909Miss01000
2538.0NaNS31.38755263female11.0347077Mrs00010
26NaNNaNC7.22500273male00.02631Mr00100
2719.0C23 C25 C27S263.00002281male30.019950Mr00100
28NaNNaNQ7.87920293female01.0330959Miss01000
29NaNNaNS7.89580303male00.0349216Mr00100
......................................................
127921.0NaNQ7.7500012803male0NaN364858Mr00100
12806.0NaNS21.0750112813male3NaN349909Master10000
128123.0B24S93.5000012821male0NaN12749Mr00100
128251.0D28S39.4000112831female0NaNPC 17592Mrs00010
128313.0NaNS20.2500212843male0NaNC.A. 2673Master10000
128447.0NaNS10.5000012852male0NaNC.A. 30769Mr00100
128529.0NaNS22.0250112863male3NaN315153Mr00100
128618.0C31S60.0000012871female1NaN13695Mrs00010
128724.0NaNQ7.2500012883male0NaN371109Mr00100
128848.0B41C79.2000112891female1NaN13567Mrs00010
128922.0NaNS7.7750012903male0NaN347065Mr00100
129031.0NaNQ7.7333012913male0NaN21332Mr00100
129130.0C7S164.8667012921female0NaN36928Miss01000
129238.0NaNS21.0000012932male1NaN28664Mr00100
129322.0NaNC59.4000112941female0NaN112378Miss01000
129417.0NaNS47.1000012951male0NaN113059Mr00100
129543.0D40C27.7208012961male1NaN17765Mr00100
129620.0D38C13.8625012972male0NaNSC/PARIS 2166Mr00100
129723.0NaNS10.5000012982male1NaN28666Mr00100
129850.0C80C211.5000112991male1NaN113503Mr00100
1299NaNNaNQ7.7208013003female0NaN334915Miss01000
13003.0NaNS13.7750113013female1NaNSOTON/O.Q. 3101315Miss01000
1301NaNNaNQ7.7500013023female0NaN365237Miss01000
130237.0C78Q90.0000013031female1NaN19928Mrs00010
130328.0NaNS7.7750013043female0NaN347086Miss01000
1304NaNNaNS8.0500013053male0NaNA.5. 3236Mr00100
130539.0C105C108.9000013061female0NaNPC 17758Others00001
130638.5NaNS7.2500013073male0NaNSOTON/O.Q. 3101262Mr00100
1307NaNNaNS8.0500013083male0NaN359309Mr00100
1308NaNNaNC22.3583113093male1NaN2668Master10000
\n", + "

1309 rows × 17 columns

\n", + "
" + ], + "text/plain": [ + " Age Cabin Embarked Fare Parch PassengerId Pclass \\\n", + "0 22.0 NaN S 7.2500 0 1 3 \n", + "1 38.0 C85 C 71.2833 0 2 1 \n", + "2 26.0 NaN S 7.9250 0 3 3 \n", + "3 35.0 C123 S 53.1000 0 4 1 \n", + "4 35.0 NaN S 8.0500 0 5 3 \n", + "5 NaN NaN Q 8.4583 0 6 3 \n", + "6 54.0 E46 S 51.8625 0 7 1 \n", + "7 2.0 NaN S 21.0750 1 8 3 \n", + "8 27.0 NaN S 11.1333 2 9 3 \n", + "9 14.0 NaN C 30.0708 0 10 2 \n", + "10 4.0 G6 S 16.7000 1 11 3 \n", + "11 58.0 C103 S 26.5500 0 12 1 \n", + "12 20.0 NaN S 8.0500 0 13 3 \n", + "13 39.0 NaN S 31.2750 5 14 3 \n", + "14 14.0 NaN S 7.8542 0 15 3 \n", + "15 55.0 NaN S 16.0000 0 16 2 \n", + "16 2.0 NaN Q 29.1250 1 17 3 \n", + "17 NaN NaN S 13.0000 0 18 2 \n", + "18 31.0 NaN S 18.0000 0 19 3 \n", + "19 NaN NaN C 7.2250 0 20 3 \n", + "20 35.0 NaN S 26.0000 0 21 2 \n", + "21 34.0 D56 S 13.0000 0 22 2 \n", + "22 15.0 NaN Q 8.0292 0 23 3 \n", + "23 28.0 A6 S 35.5000 0 24 1 \n", + "24 8.0 NaN S 21.0750 1 25 3 \n", + "25 38.0 NaN S 31.3875 5 26 3 \n", + "26 NaN NaN C 7.2250 0 27 3 \n", + "27 19.0 C23 C25 C27 S 263.0000 2 28 1 \n", + "28 NaN NaN Q 7.8792 0 29 3 \n", + "29 NaN NaN S 7.8958 0 30 3 \n", + "... ... ... ... ... ... ... ... \n", + "1279 21.0 NaN Q 7.7500 0 1280 3 \n", + "1280 6.0 NaN S 21.0750 1 1281 3 \n", + "1281 23.0 B24 S 93.5000 0 1282 1 \n", + "1282 51.0 D28 S 39.4000 1 1283 1 \n", + "1283 13.0 NaN S 20.2500 2 1284 3 \n", + "1284 47.0 NaN S 10.5000 0 1285 2 \n", + "1285 29.0 NaN S 22.0250 1 1286 3 \n", + "1286 18.0 C31 S 60.0000 0 1287 1 \n", + "1287 24.0 NaN Q 7.2500 0 1288 3 \n", + "1288 48.0 B41 C 79.2000 1 1289 1 \n", + "1289 22.0 NaN S 7.7750 0 1290 3 \n", + "1290 31.0 NaN Q 7.7333 0 1291 3 \n", + "1291 30.0 C7 S 164.8667 0 1292 1 \n", + "1292 38.0 NaN S 21.0000 0 1293 2 \n", + "1293 22.0 NaN C 59.4000 1 1294 1 \n", + "1294 17.0 NaN S 47.1000 0 1295 1 \n", + "1295 43.0 D40 C 27.7208 0 1296 1 \n", + "1296 20.0 D38 C 13.8625 0 1297 2 \n", + "1297 23.0 NaN S 10.5000 0 1298 2 \n", + "1298 50.0 C80 C 211.5000 1 1299 1 \n", + "1299 NaN NaN Q 7.7208 0 1300 3 \n", + "1300 3.0 NaN S 13.7750 1 1301 3 \n", + "1301 NaN NaN Q 7.7500 0 1302 3 \n", + "1302 37.0 C78 Q 90.0000 0 1303 1 \n", + "1303 28.0 NaN S 7.7750 0 1304 3 \n", + "1304 NaN NaN S 8.0500 0 1305 3 \n", + "1305 39.0 C105 C 108.9000 0 1306 1 \n", + "1306 38.5 NaN S 7.2500 0 1307 3 \n", + "1307 NaN NaN S 8.0500 0 1308 3 \n", + "1308 NaN NaN C 22.3583 1 1309 3 \n", + "\n", + " Sex SibSp Survived Ticket Title Master Miss Mr \\\n", + "0 male 1 0.0 A/5 21171 Mr 0 0 1 \n", + "1 female 1 1.0 PC 17599 Mrs 0 0 0 \n", + "2 female 0 1.0 STON/O2. 3101282 Miss 0 1 0 \n", + "3 female 1 1.0 113803 Mrs 0 0 0 \n", + "4 male 0 0.0 373450 Mr 0 0 1 \n", + "5 male 0 0.0 330877 Mr 0 0 1 \n", + "6 male 0 0.0 17463 Mr 0 0 1 \n", + "7 male 3 0.0 349909 Master 1 0 0 \n", + "8 female 0 1.0 347742 Mrs 0 0 0 \n", + "9 female 1 1.0 237736 Mrs 0 0 0 \n", + "10 female 1 1.0 PP 9549 Miss 0 1 0 \n", + "11 female 0 1.0 113783 Miss 0 1 0 \n", + "12 male 0 0.0 A/5. 2151 Mr 0 0 1 \n", + "13 male 1 0.0 347082 Mr 0 0 1 \n", + "14 female 0 0.0 350406 Miss 0 1 0 \n", + "15 female 0 1.0 248706 Mrs 0 0 0 \n", + "16 male 4 0.0 382652 Master 1 0 0 \n", + "17 male 0 1.0 244373 Mr 0 0 1 \n", + "18 female 1 0.0 345763 Mrs 0 0 0 \n", + "19 female 0 1.0 2649 Mrs 0 0 0 \n", + "20 male 0 0.0 239865 Mr 0 0 1 \n", + "21 male 0 1.0 248698 Mr 0 0 1 \n", + "22 female 0 1.0 330923 Miss 0 1 0 \n", + "23 male 0 1.0 113788 Mr 0 0 1 \n", + "24 female 3 0.0 349909 Miss 0 1 0 \n", + "25 female 1 1.0 347077 Mrs 0 0 0 \n", + "26 male 0 0.0 2631 Mr 0 0 1 \n", + "27 male 3 0.0 19950 Mr 0 0 1 \n", + "28 female 0 1.0 330959 Miss 0 1 0 \n", + "29 male 0 0.0 349216 Mr 0 0 1 \n", + "... ... ... ... ... ... ... ... .. \n", + "1279 male 0 NaN 364858 Mr 0 0 1 \n", + "1280 male 3 NaN 349909 Master 1 0 0 \n", + "1281 male 0 NaN 12749 Mr 0 0 1 \n", + "1282 female 0 NaN PC 17592 Mrs 0 0 0 \n", + "1283 male 0 NaN C.A. 2673 Master 1 0 0 \n", + "1284 male 0 NaN C.A. 30769 Mr 0 0 1 \n", + "1285 male 3 NaN 315153 Mr 0 0 1 \n", + "1286 female 1 NaN 13695 Mrs 0 0 0 \n", + "1287 male 0 NaN 371109 Mr 0 0 1 \n", + "1288 female 1 NaN 13567 Mrs 0 0 0 \n", + "1289 male 0 NaN 347065 Mr 0 0 1 \n", + "1290 male 0 NaN 21332 Mr 0 0 1 \n", + "1291 female 0 NaN 36928 Miss 0 1 0 \n", + "1292 male 1 NaN 28664 Mr 0 0 1 \n", + "1293 female 0 NaN 112378 Miss 0 1 0 \n", + "1294 male 0 NaN 113059 Mr 0 0 1 \n", + "1295 male 1 NaN 17765 Mr 0 0 1 \n", + "1296 male 0 NaN SC/PARIS 2166 Mr 0 0 1 \n", + "1297 male 1 NaN 28666 Mr 0 0 1 \n", + "1298 male 1 NaN 113503 Mr 0 0 1 \n", + "1299 female 0 NaN 334915 Miss 0 1 0 \n", + "1300 female 1 NaN SOTON/O.Q. 3101315 Miss 0 1 0 \n", + "1301 female 0 NaN 365237 Miss 0 1 0 \n", + "1302 female 1 NaN 19928 Mrs 0 0 0 \n", + "1303 female 0 NaN 347086 Miss 0 1 0 \n", + "1304 male 0 NaN A.5. 3236 Mr 0 0 1 \n", + "1305 female 0 NaN PC 17758 Others 0 0 0 \n", + "1306 male 0 NaN SOTON/O.Q. 3101262 Mr 0 0 1 \n", + "1307 male 0 NaN 359309 Mr 0 0 1 \n", + "1308 male 1 NaN 2668 Master 1 0 0 \n", + "\n", + " Mrs Others \n", + "0 0 0 \n", + "1 1 0 \n", + "2 0 0 \n", + "3 1 0 \n", + "4 0 0 \n", + "5 0 0 \n", + "6 0 0 \n", + "7 0 0 \n", + "8 1 0 \n", + "9 1 0 \n", + "10 0 0 \n", + "11 0 0 \n", + "12 0 0 \n", + "13 0 0 \n", + "14 0 0 \n", + "15 1 0 \n", + "16 0 0 \n", + "17 0 0 \n", + "18 1 0 \n", + "19 1 0 \n", + "20 0 0 \n", + "21 0 0 \n", + "22 0 0 \n", + "23 0 0 \n", + "24 0 0 \n", + "25 1 0 \n", + "26 0 0 \n", + "27 0 0 \n", + "28 0 0 \n", + "29 0 0 \n", + "... ... ... \n", + "1279 0 0 \n", + "1280 0 0 \n", + "1281 0 0 \n", + "1282 1 0 \n", + "1283 0 0 \n", + "1284 0 0 \n", + "1285 0 0 \n", + "1286 1 0 \n", + "1287 0 0 \n", + "1288 1 0 \n", + "1289 0 0 \n", + "1290 0 0 \n", + "1291 0 0 \n", + "1292 0 0 \n", + "1293 0 0 \n", + "1294 0 0 \n", + "1295 0 0 \n", + "1296 0 0 \n", + "1297 0 0 \n", + "1298 0 0 \n", + "1299 0 0 \n", + "1300 0 0 \n", + "1301 0 0 \n", + "1302 1 0 \n", + "1303 0 0 \n", + "1304 0 0 \n", + "1305 0 1 \n", + "1306 0 0 \n", + "1307 0 0 \n", + "1308 0 0 \n", + "\n", + "[1309 rows x 17 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "58e43a50-20cc-40d4-bae8-ab455cd52887", + "_execution_state": "idle", + "_uuid": "60486db5b7e05da01f617a8555289670e1ab9c3a" + }, + "source": [ + "### Sex" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "_cell_guid": "79870c7d-e04f-4269-bf4d-ff314bfc1e96", + "_execution_state": "idle", + "_uuid": "c8262045e243302a4d4e84426c4b8c2932a0702b" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# check if there is any NAN\n", + "df.Sex.isnull().sum(axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "_cell_guid": "fbf9e34d-01cd-4004-a509-cbc94b66edbc", + "_execution_state": "idle", + "_uuid": "ff1e3b6673013d9ab3d16f113c3bc57520572d4a" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SexSurvived
0female0.742038
1male0.188908
\n", + "
" + ], + "text/plain": [ + " Sex Survived\n", + "0 female 0.742038\n", + "1 male 0.188908" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the correlation between Sex and Survived\n", + "df[['Sex', 'Survived']].groupby(['Sex'], as_index=False).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "_cell_guid": "b083a394-d2d4-4449-b745-77854545c825", + "_execution_state": "idle", + "_uuid": "4d5e24fded5c0296f278a2b8100c896c88050635" + }, + "outputs": [], + "source": [ + "# map the two genders to 0 and 1\n", + "df.Sex = df.Sex.map({'male':0, 'female':1})" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "a2e1256c-73d3-4f73-b51c-eb7aaca1790a", + "_execution_state": "idle", + "_uuid": "025f3222286d5591c0600ef07ed6ed0c8f1a3309" + }, + "source": [ + "### Age" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "_cell_guid": "8ba1f221-c912-4eac-a944-6b0afa88a382", + "_execution_state": "idle", + "_uuid": "30bc1b17267fb3eb668f4d383b649ecca05051a3" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "263" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# check if there is any NAN\n", + "df.Age.isnull().sum(axis=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "441e1d2e-1b66-4482-ba21-987ac1d96b34", + "_execution_state": "idle", + "_uuid": "b812f4eb80b8b2d4116dafbc70991f6c9a6d8c1e" + }, + "source": [ + "### SibSp and Parch" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "_cell_guid": "a4e895a6-47cf-4641-8228-14ea595a9ea2", + "_execution_state": "idle", + "_uuid": "a6cc2c00d763ac3f55f349f21d5679dfd9b5359b" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(0, 0)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# check if there is any NAN\n", + "df.SibSp.isnull().sum(axis=0), df.Parch.isnull().sum(axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "_cell_guid": "cca00b7a-da36-4903-a025-ef4c50dab61e", + "_execution_state": "idle", + "_uuid": "03437dedc00ad11570635e56ae79faee105bd808" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FamilySurvived
010.303538
120.552795
230.578431
340.724138
450.200000
560.136364
670.333333
780.000000
8110.000000
\n", + "
" + ], + "text/plain": [ + " Family Survived\n", + "0 1 0.303538\n", + "1 2 0.552795\n", + "2 3 0.578431\n", + "3 4 0.724138\n", + "4 5 0.200000\n", + "5 6 0.136364\n", + "6 7 0.333333\n", + "7 8 0.000000\n", + "8 11 0.000000" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# create a new feature \"Family\"\n", + "df['Family'] = df['SibSp'] + df['Parch'] + 1\n", + "\n", + "# inspect the correlation between Family and Survived\n", + "df[['Family', 'Survived']].groupby(['Family'], as_index=False).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "_cell_guid": "5217e017-3029-4fbe-afc6-3315d5937431", + "_execution_state": "idle", + "_uuid": "5099b2a2783647bce53f71acb07af846641b3c87" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1 790\n", + "2 235\n", + "3 159\n", + "4 43\n", + "6 25\n", + "5 22\n", + "7 16\n", + "11 11\n", + "8 8\n", + "Name: Family, dtype: int64" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the amount of people for each Family size\n", + "df['Family'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "_cell_guid": "88730abd-facd-4842-ba22-6fab5e2ec332", + "_execution_state": "idle", + "_uuid": "2df0bad861e11098213fcbb90a7787504c793a9b" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FamilySurvived
000.161290
110.303538
220.552795
330.578431
440.724138
\n", + "
" + ], + "text/plain": [ + " Family Survived\n", + "0 0 0.161290\n", + "1 1 0.303538\n", + "2 2 0.552795\n", + "3 3 0.578431\n", + "4 4 0.724138" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Family = df.Family.map(lambda x: 0 if x > 4 else x)\n", + "df[['Family', 'Survived']].groupby(['Family'], as_index=False).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "_cell_guid": "d26de4ea-d9c9-460d-8a2f-f3e388de9b83", + "_execution_state": "idle", + "_uuid": "b4055838fa086398aa33e6804cfc2ec871d72fc3" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1 790\n", + "2 235\n", + "3 159\n", + "0 82\n", + "4 43\n", + "Name: Family, dtype: int64" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Family'].value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "380e18ed-ce81-4a4c-ac47-bc8ac4db8122", + "_execution_state": "idle", + "_uuid": "9b02b9181f1224830ad4a93cb0477aa278ec4d38" + }, + "source": [ + "### Ticket" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "_cell_guid": "3e355a5b-65dc-4072-9cf8-2aa9e2c5c311", + "_execution_state": "idle", + "_uuid": "09ad80a90051b181a3ffd1dd7deb7671dbce32b8" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# check if there is any NAN\n", + "df.Ticket.isnull().sum(axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "_cell_guid": "dce618df-3f32-45a2-814b-ef392c2248b3", + "_execution_state": "idle", + "_uuid": "c8f76441795cf10e50a38c538da303a488741d08" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 A/5 21171\n", + "1 PC 17599\n", + "2 STON/O2. 3101282\n", + "3 113803\n", + "4 373450\n", + "5 330877\n", + "6 17463\n", + "7 349909\n", + "8 347742\n", + "9 237736\n", + "10 PP 9549\n", + "11 113783\n", + "12 A/5. 2151\n", + "13 347082\n", + "14 350406\n", + "15 248706\n", + "16 382652\n", + "17 244373\n", + "18 345763\n", + "19 2649\n", + "Name: Ticket, dtype: object" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Ticket.head(20)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "_cell_guid": "78913d9b-373b-41ad-bb86-2c4e3b18e183", + "_execution_state": "idle", + "_uuid": "4200860d04ec1319a5fa456a11ffe1f7138b7eb8" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TicketSurvived
010.630137
120.464481
230.239203
340.200000
450.000000
560.166667
670.111111
780.000000
891.000000
9A0.068966
10C0.340426
11F0.571429
12L0.250000
13P0.646154
14S0.323077
15W0.153846
\n", + "
" + ], + "text/plain": [ + " Ticket Survived\n", + "0 1 0.630137\n", + "1 2 0.464481\n", + "2 3 0.239203\n", + "3 4 0.200000\n", + "4 5 0.000000\n", + "5 6 0.166667\n", + "6 7 0.111111\n", + "7 8 0.000000\n", + "8 9 1.000000\n", + "9 A 0.068966\n", + "10 C 0.340426\n", + "11 F 0.571429\n", + "12 L 0.250000\n", + "13 P 0.646154\n", + "14 S 0.323077\n", + "15 W 0.153846" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Ticket = df.Ticket.map(lambda x: x[0])\n", + "\n", + "# inspect the correlation between Ticket and Survived\n", + "df[['Ticket', 'Survived']].groupby(['Ticket'], as_index=False).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "_cell_guid": "efd9bf4b-03bb-4f87-b709-f9662a1b2e51", + "_execution_state": "idle", + "_uuid": "e0a4426fc762e668c82b46fe9bd41fdb92786fa4" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "3 429\n", + "2 278\n", + "1 210\n", + "P 98\n", + "S 98\n", + "C 77\n", + "A 42\n", + "W 19\n", + "7 13\n", + "F 13\n", + "4 11\n", + "6 9\n", + "L 5\n", + "5 3\n", + "9 2\n", + "8 2\n", + "Name: Ticket, dtype: int64" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the amount of people for each type of tickets\n", + "df['Ticket'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "_cell_guid": "7d2ecc98-5a2a-4066-87ca-1fe5ca6594d2", + "_execution_state": "idle", + "_uuid": "7c911250fe2a3ab1ed23f97acce1cdb855cbc181" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TicketFare
0165.771211
1220.235194
2315.380682
3416.764036
4519.262500
5619.153711
679.197438
788.397900
897.750000
9A10.189681
10C28.152273
11F24.677246
12L1.515000
13P119.698253
14S17.542900
15W31.056579
\n", + "
" + ], + "text/plain": [ + " Ticket Fare\n", + "0 1 65.771211\n", + "1 2 20.235194\n", + "2 3 15.380682\n", + "3 4 16.764036\n", + "4 5 19.262500\n", + "5 6 19.153711\n", + "6 7 9.197438\n", + "7 8 8.397900\n", + "8 9 7.750000\n", + "9 A 10.189681\n", + "10 C 28.152273\n", + "11 F 24.677246\n", + "12 L 1.515000\n", + "13 P 119.698253\n", + "14 S 17.542900\n", + "15 W 31.056579" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[['Ticket', 'Fare']].groupby(['Ticket'], as_index=False).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "_cell_guid": "21e8a215-01a0-45d0-bc41-20f4a25aa509", + "_execution_state": "idle", + "_uuid": "d809da11e565b2524707b4086121c45f880a5554" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TicketPclass
011.147619
122.309353
232.904429
343.000000
452.333333
562.111111
673.000000
783.000000
893.000000
9A3.000000
10C2.558442
11F1.846154
12L3.000000
13P1.102041
14S2.561224
15W2.315789
\n", + "
" + ], + "text/plain": [ + " Ticket Pclass\n", + "0 1 1.147619\n", + "1 2 2.309353\n", + "2 3 2.904429\n", + "3 4 3.000000\n", + "4 5 2.333333\n", + "5 6 2.111111\n", + "6 7 3.000000\n", + "7 8 3.000000\n", + "8 9 3.000000\n", + "9 A 3.000000\n", + "10 C 2.558442\n", + "11 F 1.846154\n", + "12 L 3.000000\n", + "13 P 1.102041\n", + "14 S 2.561224\n", + "15 W 2.315789" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[['Ticket', 'Pclass']].groupby(['Ticket'], as_index=False).mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "4eab8333-25fd-46d1-a307-ac60f4c3f277", + "_execution_state": "idle", + "_uuid": "08aba02f0f1b6560033ddfcde7796ea2bdb5da7b" + }, + "source": [ + "### Fare" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "_cell_guid": "c245d17f-4245-4afa-a779-9256ce72057b", + "_execution_state": "idle", + "_uuid": "2f3a3c7dd4140e492f7b56ea21a25b0e54b3af8c" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# check if there is any NAN\n", + "df.Fare.isnull().sum(axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "_cell_guid": "fcedc1d2-1fc9-4d15-bda6-4e0c7a8c1b79", + "_execution_state": "idle", + "_uuid": "279a4049e39c1350bbfb47815f4dc8bd64d84d76" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1043 3\n", + "Name: Ticket, dtype: object" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Ticket[df.Fare.isnull()]" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "_cell_guid": "4809801d-6cae-47d3-bd02-9a9c615f99c6", + "_execution_state": "idle", + "_uuid": "056b56f133204ca80786d24925efbc93c3b7f399" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1043 3\n", + "Name: Pclass, dtype: int64" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Pclass[df.Fare.isnull()]" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "_cell_guid": "9a90fe36-97a8-4378-8a1b-a11324d429e5", + "_execution_state": "idle", + "_uuid": "7ca9047deed61252837c0c85afa81ffcac8766c1" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1043 NaN\n", + "Name: Cabin, dtype: object" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Cabin[df.Fare.isnull()]" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "_cell_guid": "2d51f02f-bcb3-424d-a18b-a251e63aacc9", + "_execution_state": "idle", + "_uuid": "481f6e7941608bff1133d8adf1354229b1a115fc" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1043 S\n", + "Name: Embarked, dtype: object" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.Embarked[df.Fare.isnull()]" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "_cell_guid": "6f9d5841-adf8-4c9c-9c59-39715827a0ad", + "_execution_state": "idle", + "_uuid": "0835a6f504f77c8d42245b5a9346e169bdc302a3" + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEKCAYAAAAIO8L1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAFgJJREFUeJzt3X+w3XV95/HnKwEEid2IRsMGqNZkq+isqHeQ6kyGameLbkd+bGVgKKbd1FAGBrPDzm50tI0rbN1uETbVMoSBNWYoFgupGcbRFSqlrhVM5If8qMutVbxCIYg/SAkoyXv/uN+r94Yvyb14v/d7bs7zMXPmfD+f8znf8z73QF7n8/11UlVIkrS3BX0XIEkaTAaEJKmVASFJamVASJJaGRCSpFYGhCSpVWcBkeTQJLcnuSvJvUk+3PS/MsltSR5I8pdJDmn6X9C0R5vHX9FVbZKk/etyBvE08Laqej1wHHBSkhOA/wFcWlUrgB8Aq5vxq4EfVNVy4NJmnCSpJ50FRI3b2TQPbm4FvA34q6Z/E3BKs3xy06Z5/O1J0lV9kqR9O6jLlSdZCGwHlgOfAP4R+GFVPdMMGQOWNcvLgO8CVNUzSX4EvAR4bK91rgHWABx++OFvevWrX93lW5CkA8727dsfq6ol+xvXaUBU1W7guCSLgS3Aa9qGNfdts4VnXQekqjYCGwFGRkZq27Zts1StJA2HJN+Zzrg5OYqpqn4I3AKcACxOMhFMRwEPNctjwNEAzeP/Cnh8LuqTJD1bl0cxLWlmDiQ5DPgN4H7gS8BvN8NWAZ9tlrc2bZrH/6a8kqAk9abLTUxHApua/RALgOuq6sYk9wGfTnIRcAdwVTP+KmBzklHGZw5ndFibJGk/OguIqrobeENL/7eA41v6nwLe3VU9kqSZ8UxqSVIrA0KS1MqAkCS1MiAkSa0MCElSKwNCktTKgJAktTIgJEmtDAhJUisDQpLUyoCQJLUyICRJrQwISVIrA0KS1MqAkCS1MiAkSa0MCElSKwNiQG3evJmVK1dy7bXX9l2KpCFlQAyoK6+8EoDLL7+850okDSsDYgBt3rx5SttZhKQ+GBADaGL2MMFZhKQ+GBCSpFYGhCSplQEhSWplQAygs846a0p71apVPVUiaZgZEAPonHPOmdJevXp1T5VIGmYGxICamEU4e5DUl1RVNytOjgY+BSwF9gAbq+p/JVkPvBfY0Qz9QFV9rnnO+4HVwG7ggqr6wr5eY2RkpLZt29ZJ/ZJ0oEqyvapG9jeuyxnEM8CFVfUa4ATgvCTHNo9dWlXHNbeJcDgWOAN4LXAS8OdJFnZY30C7/fbbOfHEE9m+fXvfpUgaUp0FRFU9XFVfb5afAO4Hlu3jKScDn66qp6vqn4BR4Piu6ht069evZ8+ePXzoQx/quxRJQ2pO9kEkeQXwBuC2puv8JHcnuTrJi5u+ZcB3Jz1tjH0HygHr9ttvZ+fOnQDs3LnTWYSkXnQeEEkWAdcDa6vqx8DlwKuA44CHgUsmhrY8/Vk7SJKsSbItybYdO3a0PGX+W79+/ZS2swhJfeg0IJIczHg4XFNVNwBU1SNVtbuq9gBX8vPNSGPA0ZOefhTw0N7rrKqNVTVSVSNLlizpsvzeTMwenqstSXOhs4BIEuAq4P6q+tik/iMnDTsVuKdZ3gqckeQFSV4JrABu76q+QbZo0aJ9tiVpLnQ5g3grcDbwtiR3Nrd3An+S5BtJ7gZ+HfhPAFV1L3AdcB/weeC8qtrdYX0Da+9NTB/5yEf6KUTSUDuoqxVX1Zdp36/wuX0852Lg4q5qmi+OP/54Fi1axM6dO1m0aBFvetOb+i5J0hDyTOoBtX79ehYsWODsQVJvOptB6Bdz/PHHc8stt/RdhqQh5gxCktTKgJAktTIgJEmtDAhJUisDQpLUyoAYUFu2bGHlypVs3bq171IkDSkDYkBddtllAFxyySX7GSlJ3TAgBtCWLVuY+KW/qnIWIakXBsQAmpg9THAWIakPBsQA2vt3wrv63XBJ2hcDYgCNXyn9uduSNBcMiAG0du3aKe0LL7ywp0okDTMv1vcL2rBhA6Ojo52+xk033cRNN900a+tbvnw5F1xwwaytT9KByRnEgDr44IMBWLZsWc+VSBpWziB+QV19E59Y74YNGzpZvyTtjzMISVIrA0KS1MqAkCS1MiAkSa0MCElSKwNCktTKgJAktTIgJEmtDAhJUisDQpLUyoCQJLXqLCCSHJ3kS0nuT3Jvkvc1/Uck+WKSB5r7Fzf9SbIhyWiSu5O8savaJEn71+UM4hngwqp6DXACcF6SY4F1wM1VtQK4uWkDvANY0dzWAJd3WJskaT86C4iqeriqvt4sPwHcDywDTgY2NcM2Aac0yycDn6pxXwUWJzmyq/okSfs2J/sgkrwCeANwG/DyqnoYxkMEeFkzbBnw3UlPG2v69l7XmiTbkmzbsWNHl2VL0lDrPCCSLAKuB9ZW1Y/3NbSlr57VUbWxqkaqamTJkiWzVaYkaS+dBkSSgxkPh2uq6oam+5GJTUfN/aNN/xhw9KSnHwU81GV9kqTn1uVRTAGuAu6vqo9NemgrsKpZXgV8dlL/e5qjmU4AfjSxKUqSNPe6/MnRtwJnA99IcmfT9wHgo8B1SVYDDwLvbh77HPBOYBR4Evi9DmuTJO1HZwFRVV+mfb8CwNtbxhdwXlf1SJJmxjOpJUmtDAhJUisDQpLUyoCQJLUyICRJrQwISVIrA0KS1MqAkCS1MiAkSa0MCElSKwNCktTKgJAktTIgJEmtDAhJUisDQpLUyoCQJLUyICRJrQwISVIrA0KS1MqAkCS1MiAkSa0MCElSq2kFRMb9TpI/bNrHJDm+29IkSX2a7gziz4FfA85s2k8An+ikIknSQDhomuPeXFVvTHIHQFX9IMkhHdYlSerZdGcQP02yECiAJEuAPZ1VJUnq3XQDYgOwBXhZkouBLwP/vbOqJEm9m1ZAVNU1wH8B/hh4GDilqj6zr+ckuTrJo0numdS3Psn3ktzZ3N456bH3JxlN8s0kv/n83o4kabbsdx9EkgXA3VX1OuAfZrDuTwIfBz61V/+lVfWne73GscAZwGuBfw3clOTfVNXuGbyeJGkW7XcGUVV7gLuSHDOTFVfVrcDj0xx+MvDpqnq6qv4JGAU8jFaSejTdo5iOBO5NcjvwLxOdVfWu5/Ga5yd5D7ANuLCqfgAsA746acxY0/csSdYAawCOOWZGmSVJmoHpBsSHZ+n1Lgc+wvjRUB8BLgH+I5CWsdW2gqraCGwEGBkZaR0jSfrFTSsgqupvZ+PFquqRieUkVwI3Ns0x4OhJQ48CHpqN15QkPT/TvdTGCUm+lmRnkp8k2Z3kxzN9sSRHTmqeCkwc4bQVOCPJC5K8ElgB3D7T9UuSZs90NzF9nPGjjD4DjADvYfwf8eeU5FrgROClScaAPwJOTHIc45uPvg2cA1BV9ya5DrgPeAY4zyOYJKlf0w0Iqmo0ycLmH+7/neQr+xl/Zkv3VfsYfzFw8XTrkSR1a7oB8WRz7aU7k/wJ4yfLHd5dWZKkvk33UhtnN2PPZ/ww16OB/9BVUZKk/u1zBpHkmKp6sKq+03Q9xewd8ipJGmD7m0H89cRCkus7rkWSNED2FxCTT2D7lS4LkSQNlv0FRD3HsiTpALe/o5he35wQF+CwSSfHBaiq+qVOq5Mk9WafAVFVC+eqEEnSYJnuYa6SpCFjQEiSWhkQkqRWBoQkqZUBIUlqZUBIkloZEJKkVgaEJKmVASFJamVASJJaGRCSpFYGhCSplQEhSWplQEiSWhkQkqRWBoQkqdX+flHugLBhwwZGR0f7LmNGHnjgAQAuuOCCniuZmeXLl8+7miW1G4qAGB0d5Y5v3MeeFx7RdynTlp+M/wT49n/8554rmb4FTz7edwmSZtFQBATAnhcewVPH/lbfZRzQDr3vxr5LkDSLOtsHkeTqJI8muWdS3xFJvpjkgeb+xU1/kmxIMprk7iRv7KouSdL0dLmT+pPASXv1rQNurqoVwM1NG+AdwIrmtga4vMO6JEnT0FlAVNWtwN4bpU8GNjXLm4BTJvV/qsZ9FVic5MiuapMk7d9cH+b68qp6GKC5f1nTvwz47qRxY03fsyRZk2Rbkm07duzotFhJGmaDch5EWvqqbWBVbayqkaoaWbJkScdlSdLwmuuAeGRi01Fz/2jTPwYcPWncUcBDc1ybJGmSuQ6IrcCqZnkV8NlJ/e9pjmY6AfjRxKYoSVI/OjsPIsm1wInAS5OMAX8EfBS4Lslq4EHg3c3wzwHvBEaBJ4Hf66ouSdL0dBYQVXXmczz09paxBZzXVS2SpJkblJ3UkqQBY0BIkloZEJKkVgaEJKmVASFJamVASJJaGRCSpFYGhCSplQEhSWplQEiSWhkQUgeuuOIKVq5cyVVXXdV3KdLzZkBIHbjmmmsA2LRp035GSoPLgJBm2RVXXDGl7Sxiflm3bh0rV67kgx/8YN+l9M6AkGbZxOxhgrOI+eUrX/kKALfeemvPlfTPgJCkxrp166a0h30WYUBIUmNi9jBh2GcRBoQ0y84666wp7VWrVj3HSGmwGRDSLDvnnHOmtFevXt1TJdIvxoCQZtmWLVumtLdu3dpTJZqpt7zlLVPaK1eu7KmSwWBASLPssssum9K+5JJLeqpEM/XRj350Svuiiy7qqZLBYEBIs6yq9tnWYJuYRQz77AHgoL4LkA40SaaEQpIeq9FM7T2LGGZDERBjY2MsePJHHHrfjX2XckBb8OT3GRt7pu8yerd27VouvfTSn7UvvPDCHqvRTF122WXccMMNnH766Zx//vl9l9MrNzFJs+zUU0/92awhCe9617t6rkgzccMNNwBw3XXX9VxJ/4ZiBnHUUUfxyNMH8dSxv9V3KQe0Q++7kaOOWtp3GQNhYhbh7GF+2fsAg49//ONDPYtwBiF14NRTT+XWW2919jDPTMweJgz7LMKAkCS16mUTU5JvA08Au4FnqmokyRHAXwKvAL4NnF5VP+ijPklSvzOIX6+q46pqpGmvA26uqhXAzU1bkubMaaedNqV9+umn91TJYBikTUwnAxMXzt8EnNJjLZKG0Nq1a6e0h3kHNfQXEAX8nyTbk6xp+l5eVQ8DNPcv66k2SRL9BcRbq+qNwDuA85JM+5z2JGuSbEuybceOHd1VKGnotB3mOsx6CYiqeqi5fxTYAhwPPJLkSIDm/tHneO7GqhqpqpElS5bMVcmShoCHuU415wGR5PAkL5pYBv4dcA+wFZj4ZZVVwGfnujZJ0s/1cZjry4EtzaUIDgL+oqo+n+RrwHVJVgMPAu/uoTZJUmPOA6KqvgW8vqX/+8Db57oeSZpw2mmnTdnMNOyHuQ7FtZikNhs2bGB0dLSTdY+NjQHj1wGbbcuXL+eCCy6Y9fUKDjvssH22h80gnQchHTB27drFrl27+i5DM3TNNddMaW/atOk5Rg4HZxAaWl1+C59Y94YNGzp7DalrziAkSa0MCElqjIyMTGmfcMIJPVUyGAwISWps3759Svu2227rqZLBYEBIUqOq9tkeNgaEJKnV0BzFtODJxzn0vhv7LmPa8tSPAahDf6nnSqZvwZOPA/4mtXSgGIqAWL58ed8lzNgDDzwBwIpXzad/cJfOy7+1pHZDERDz8axTj6OX1LehCAjNb11eEqMrDzzwADC/vpzMt0t4zNV/F7P9N5lPf2cDQgNvdHSU/3fP1zlm0e6+S5m2Q346fvzHU9/+Ws+VTM+DOxf2XYIGkAGheeGYRbv54MjOvss4YF20bVHfJcxYF9/CN2/ezJVXXvmz9rnnnsuZZ545668zX3iYqyQ1zj777CntYQ4HMCAkaYqlS8ePHDz33HN7rqR/bmKSpEmWLl3K0qVLh372AM4gJEnPwYCQJLVyE5MG3tjYGP/yxMJ5eaTNfPGdJxZyePMzqbNtvp3HMh/PYYFuzq8wIDQvPL07fOeJ+XOs/k/3BICDF8yPq4E+vTsc3tG6R0dHuePeO2BxRy8w2/aM393xvTv6rWMmftjNag0IDbwTTzxxXn0DhZ9/C12xYkXPlUxfp9fRWgx7TtzT3fqH3IJbutlbYEBo4M23qT54LS0dGNxJLUlq5QxCUqfGxsbg+7Dgr+fJ99GJS37Nn11e8AyM1ewfZGBASOrU4sWL2bVrV99lTNtErYcdcljPlczAIeN/59lmQEjq1NVXX913CTPi/qOfmydzPknSXBu4gEhyUpJvJhlNsq7veiRpWA1UQCRZCHwCeAdwLHBmkmP7rUqShlOqBudMzyS/Bqyvqt9s2u8HqKo/bhs/MjJS27Ztm8MKn62rywh0eaLVfPrJwy51eQkIP7/uzcf/92AwPr8k26tqZL/jBiwgfhs4qap+v2mfDby5qs6fNGYNsKZp/irwzTkvdO68FHis7yL0vPn5zV8H+mf3y1W1ZH+DBu0oprT0TUmwqtoIbJybcvqVZNt0Ul6Dyc9v/vKzGzdQ+yCAMeDoSe2jgId6qkWShtqgBcTXgBVJXpnkEOAMYGvPNUnSUBqoTUxV9UyS84EvMH6i+9VVdW/PZfVpKDalHcD8/OYvPzsGbCe1JGlwDNomJknSgDAgJEmtDIgBlOTqJI8muafvWjQzSY5O8qUk9ye5N8n7+q5J05fk0CS3J7mr+fw+3HdNfXIfxABKshLYCXyqql7Xdz2aviRHAkdW1deTvAjYDpxSVff1XJqmIUmAw6tqZ5KDgS8D76uqr/ZcWi+cQQygqroVeLzvOjRzVfVwVX29WX4CuB9Y1m9Vmq4at7NpHtzchvZbtAEhdSTJK4A3ALf1W4lmIsnCJHcCjwJfrKqh/fwMCKkDSRYB1wNrq+rHfdej6auq3VV1HONXcjg+ydBu5jUgpFnWbLu+Hrimqm7oux49P1X1Q+AW4KSeS+mNASHNomYn51XA/VX1sb7r0cwkWZJkcbN8GPAbwD/0W1V/DIgBlORa4O+BX00ylmR13zVp2t4KnA28Lcmdze2dfRelaTsS+FKSuxm/NtwXq+rGnmvqjYe5SpJaOYOQJLUyICRJrQwISVIrA0KS1MqAkCS1MiCkfUiyuzlU9Z4kn0nywn2MXZ/kP89lfVKXDAhp33ZV1XHNVXV/AvxB3wVJc8WAkKbv74DlAEnek+Tu5ncDNu89MMl7k3ytefz6iZlHknc3s5G7ktza9L22+Q2CO5t1rpjTdyU9B0+Uk/Yhyc6qWpTkIMavr/R54FbgBuCtVfVYkiOq6vEk64GdVfWnSV5SVd9v1nER8EhV/VmSbwAnVdX3kiyuqh8m+TPgq1V1TZJDgIVVtauXNyxN4gxC2rfDmks/bwMeZPw6S28D/qqqHgOoqrbf7nhdkr9rAuEs4LVN//8FPpnkvcDCpu/vgQ8k+a/ALxsOGhQH9V2ANOB2NZd+/pnmgnz7m3p/kvFfkrsrye8CJwJU1R8keTPw74E7kxxXVX+R5Lam7wtJfr+q/maW34c0Y84gpJm7GTg9yUsAkhzRMuZFwMPNpb/PmuhM8qqquq2q/hB4DDg6ya8A36qqDcBW4N92/g6kaXAGIc1QVd2b5GLgb5PsBu4AfnevYR9i/JfkvgN8g/HAAPifzU7oMB40dwHrgN9J8lPgn4H/1vmbkKbBndSSpFZuYpIktTIgJEmtDAhJUisDQpLUyoCQJLUyICRJrQwISVKr/w9pkw+R5PmaqAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# use boxplot to visualize the distribution of Fare for each Pclass\n", + "sns.boxplot('Pclass','Fare',data=df)\n", + "plt.ylim(0, 300) # ignore one data point with Fare > 500\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "_cell_guid": "137bcdac-da05-49e6-8aed-6b4061f40c16", + "_execution_state": "idle", + "_uuid": "13c768161562d3c2c77a3f0ec8647e9dbf3eb146" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Fare
Pclass
187.508992
221.179196
313.302889
\n", + "
" + ], + "text/plain": [ + " Fare\n", + "Pclass \n", + "1 87.508992\n", + "2 21.179196\n", + "3 13.302889" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the correlation between Pclass and Fare\n", + "df[['Pclass', 'Fare']].groupby(['Pclass']).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "_cell_guid": "95b35c28-1a17-4995-8d76-0160b07f4a68", + "_execution_state": "idle", + "_uuid": "a187382bb74abb543ec58f1b56c78a3c7767564b" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Fare
Pclass
10.919302
20.642476
30.864050
\n", + "
" + ], + "text/plain": [ + " Fare\n", + "Pclass \n", + "1 0.919302\n", + "2 0.642476\n", + "3 0.864050" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# divide the standard deviation by the mean. A lower ratio means a tighter \n", + "# distribution of Fare in each Pclass\n", + "df[['Pclass', 'Fare']].groupby(['Pclass']).std() / df[['Pclass', 'Fare']].groupby(['Pclass']).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "_cell_guid": "9a40bab3-eee7-43e6-acdc-d70066aa8a34", + "_execution_state": "idle", + "_uuid": "47d8265715f80a20ede4648c53021ad5a509889e" + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEKCAYAAAAIO8L1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3X2cFNWd7/HPbxhABhQjoDAIGAJ59GZVWGPWXW6I0UGuxHg3rkg2MT4su74wJndXMHJNYu4qPidZDctdVsnirgQ0xg0gEUwkS9xoDPiAKG4cJmJgjMCAKA5PM/PbP6p66B6qp3umu6Zrur/v12te06e66vRvoLt/dU6dc8rcHRERkY6qSh2AiIgkkxKEiIhEUoIQEZFIShAiIhJJCUJERCIpQYiISKTYEoSZHWNmz5rZi2b2spl9O9z+fjP7tZm9ZmbLzKxfuL1/WK4Pnz8lrthERCS3OFsQB4FPu/sfAacBU8zsLOB24LvuPh7YA1wZ7n8lsMfdxwHfDfcTEZESiS1BeGBfWOwb/jjwaeBH4fbFwOfCxxeGZcLnzzEziys+ERHpXHWclZtZH2ADMA6YD2wB3nb3lnCXbcDI8PFI4PcA7t5iZnuBIcCuDnXOBGYCDBw4cMKHP/zhOP8EEZGys2HDhl3uPizXfrEmCHdvBU4zs+OBR4GPRO0W/o5qLRy1Doi7LwQWAkycONHXr19fpGhFRCqDmW3NZ78eGcXk7m8DvwDOAo43s1RiOhloDB9vA0YBhM8PBnb3RHwiInK0OEcxDQtbDpjZAOAzwGZgLfD5cLfLgJ+Ej5eHZcLnn3StJCgiUjJxdjGNABaH1yGqgIfcfaWZvQIsNbObgeeB+8P97wf+1czqCVoO02OMTUREcogtQbj7RuD0iO0NwJkR2w8AF8cVj4iIdI1mUouISCQlCBERiaQEISIikZQgREQkkhKEiIhEUoIQEZFIShAiIhJJCUJERCIpQYiISCQlCBERiaQEISIikZQgREQkkhKEiIhEUoIQEZFIShAiIhJJCUJERCIpQYiISCQliDK2dOlS6urqePjhh0sdioj0QkoQZewHP/gBAPfdd1+JIxGR3kgJokwtXbo0o6xWhIh0lRJEmUq1HlLUihCRrqoudQC9wYIFC2hoaMjYtn37dgBGjhzZvm3s2LFcffXVPRqbiEhclCC66cCBA6UOQUQkVkoQeYhqFcyePRuAO++8s6fDycvll1+e0c101VVXlTAaEemNdA2iTE2fPj2jfPHFF5coEhHprZQgytjll18OqPUgIt2jLqYyNn369KNaEiIi+YqtBWFmo8xsrZltNrOXzeyr4fabzGy7mb0Q/kxNO+YGM6s3s/8ys7q4YhMRkdzibEG0AH/n7s+Z2bHABjN7Inzuu+5+V/rOZvZRYDrwMaAW+JmZfdDdW2OMUUREsoitBeHub7r7c+Hjd4HNwMhODrkQWOruB939d0A9cGZc8YmISOd65CK1mZ0CnA78Otx0jZltNLNFZva+cNtI4Pdph22j84QiIiIxij1BmNkg4BHga+7+DrAA+ABwGvAmcHdq14jDPaK+mWa23szW79y5M6aoRUQk1gRhZn0JksOD7v5jAHd/y91b3b0N+GeOdCNtA0alHX4y0NixTndf6O4T3X3isGHD4gxfRKSixTmKyYD7gc3u/p207SPSdrsI2BQ+Xg5MN7P+ZvZ+YDzwbFzxiYhI5+IcxXQ28EXgJTN7Idw2F7jUzE4j6D56HfhrAHd/2cweAl4hGAE1SyOYRERKJ7YE4e5PEX1dYVUnx9wC3BJXTCIikj8ttSEiIpGUIEREJJIShIiIRFKCEBGRSEoQIiISSQlCREQiKUGIiEgkJQgREYmkBCEiIpGUIEREJJIShIiIRFKCEBGRSEoQIiISSQlCREQiKUGIiEgkJQgREYmkBCEiIpGUIEREJJIShIiIRFKCKGMrVqygrq6OVauy3gY8EdauXUtdXR3r1q0rdSgikkYJoozNnz8fgHvuuafEkXTurrvuAuD2228vcSQikk4JokytWLECdwfA3RPbili7di0tLS0AtLS0qBUhkiBKEGUq1XpISWorItV6SFErQiQ5lCDKVKr1kK2cFKnWQ7ayiJSOEkSZMrNOy0lRXV3daVlESkcJokzNmjUro3zttdeWKJLOXXfddRnl66+/vkSRiEhHShBlatq0ae2tBjNj6tSpJY4o2uTJk9tbDdXV1UyaNKnEEYlIihJEGUu1IpLaekhJtSLUehBJltg6fM1sFPAAMBxoAxa6+z+Y2QnAMuAU4HXgL9x9jwWnu/8ATAWagS+7+3NxxVcJpk2bxrRp00odRk6TJ09m8uTJpQ5DRDqIswXRAvydu38EOAuYZWYfBb4O/NzdxwM/D8sA5wPjw5+ZwIIYYxMRkRxiSxDu/maqBeDu7wKbgZHAhcDicLfFwOfCxxcCD3jgGeB4MxsRV3wiItK5HrkGYWanAKcDvwZOcvc3IUgiwInhbiOB36cdti3c1rGumWa23szW79y5M86wRUQqWuwJwswGAY8AX3P3dzrbNWLbUbO73H2hu09094nDhg0rVpgiItJBrAnCzPoSJIcH3f3H4ea3Ul1H4e8d4fZtwKi0w08GGuOMT0REsostQYSjku4HNrv7d9KeWg5cFj6+DPhJ2vYvWeAsYG+qK0pERHpenOsanA18EXjJzF4It80FbgMeMrMrgTeAi8PnVhEMca0nGOZ6eYyxiYhIDrElCHd/iujrCgDnROzvwKyIfUVEpAQ0k1pERCIpQYiISCQlCBERiaQEISIikZQgREQkkhKEiIhEUoIQEZFIShAiIhJJCUJERCIpQYiISCQlCBERiaQEISIikZQgREQkkhKEiIhEUoIQEZFIShDSJWvXrqWuro5169aVOhQRiZkShHTJXXfdBcDtt99e4khEJG5KEJK3tWvX0tLSAkBLS4taESJlTglC8pZqPaSoFSFS3pQgJG+p1kO2soiUFyUIyVt1dXWnZREpL0oQkrfrrrsuo3z99deXKBIR6Ql5JQgL/KWZfTMsjzazM+MNTZJm8uTJ7a2G6upqJk2aVOKIRCRO+bYg/hH4JHBpWH4XmB9LRJJoqVaEWg8i5S/fTuRPuPsZZvY8gLvvMbN+McYlCTV58mQmT55c6jBEpAfk24I4bGZ9AAcws2FAW2xRiYhIyeWbIO4BHgVONLNbgKeAebFFJSIiJZdXF5O7P2hmG4BzAAM+5+6bOzvGzBYBFwA73P3UcNtNwF8BO8Pd5rr7qvC5G4ArgVbgWndf3fU/pzgWLFhAQ0NDp/ts2bIFgNmzZ2fdZ+zYsVx99dVFjU1EpKfkTBBmVgVsDL/kX+1C3f8CfB94oMP277p7xpRcM/soMB34GFAL/MzMPujurV14vaJpaGjgtVc2MXrwgKz79Gs9CMDB7Vsin39j7/5YYutMx8S2fft2AEaOHJmxnxKXiOQjZ4Jw9zYze9HMRrv7G/lW7O7rzOyUPHe/EFjq7geB35lZPXAm8HS+r1dsowcP4IY/Hd/t42996rUiRtM9Bw4cKHUIItKL5TuKaQTwspk9C7yX2ujun+3Ga15jZl8C1gN/5+57gJHAM2n7bAu3HcXMZgIzAUaPHt2Nly9fHVsFqe6vO++8sxThiEgvl2+C+HaRXm8B8PcEo6H+HrgbuILgukZHHlWBuy8EFgJMnDgxch8RESlcvhep/6MYL+bub6Uem9k/AyvD4jZgVNquJwONxXhNERHpnnyX2jjLzH5jZvvM7JCZtZrZO119MTMbkVa8CNgUPl4OTDez/mb2fmA88GxX6xcRkeLJt4vp+wSjjB4GJgJfIvgSz8rMfgh8ChhqZtuAbwGfMrPTCLqPXgf+GsDdXzazh4BXgBZgVqlGMImISCDv9Zrdvd7M+oRf3D8ws1/l2P/SiM33d7L/LcAt+cYjIiLxyjdBNIdrL71gZncAbwID4wtLRERKLd+lNr4Y7nsNwTDXUcCfxxWUiIiUXqctiNTkOHffGm46QPGGvIqISILlakH8e+qBmT0ScywiIpIguRJE+gS2sXEGIiIiyZIrQXiWxyIiUuZyjWL6o3BCnAED0ibHGeDuflys0YmISMl0miDcvU9PBSIiIsmS7zBXERGpMEoQIiISSQlCREQiKUGIiEgkJQgREYmkBCEiIpGUIEREJJIShIiIRFKCEBGRSEoQ0iUbNmzg/PPP5/nnny91KCISMyUI6ZJ58+bR1tbGzTffXOpQRCRmShCStw0bNrBv3z4A9u3bp1aESJlTgpC8zZs3L6OsVoRIeVOCkLylWg/ZyiJSXpQgJG+DBg3qtCwi5UUJQvI2d+7cjPKNN95YokhEpCcoQUjeJkyY0N5qGDRoEKeffnqJIxKROClBSJfMnTuXqqoqtR5EKkCue1KLZJgwYQI//elPSx2GiPSA2FoQZrbIzHaY2aa0bSeY2RNm9lr4+33hdjOze8ys3sw2mtkZccUlIiL5ibOL6V+AKR22fR34ubuPB34elgHOB8aHPzOBBTHGJSIieYgtQbj7OmB3h80XAovDx4uBz6Vtf8ADzwDHm9mIuGITEZHcevoi9Unu/iZA+PvEcPtI4Pdp+20Ltx3FzGaa2XozW79z585YgxURqWRJGcVkEds8akd3X+juE9194rBhw2IOS0SkcvV0gngr1XUU/t4Rbt8GjErb72SgsYdjExGRND2dIJYDl4WPLwN+krb9S+FoprOAvamuKBERKY3Y5kGY2Q+BTwFDzWwb8C3gNuAhM7sSeAO4ONx9FTAVqAeagcvjiktERPITW4Jw90uzPHVOxL4OzIorFhER6bqkXKQWEZGEUYIQEZFIShDSJfX19Vx00UU0NDSUOhQRiZkShHTJTTfdRHNzM9/61reKVqeSjkgyKUFI3urr60nNXt+xY0fRvtDvuOMOmpubue2224pSn4gUhxKE5O2mm27KKBejFVFfX8/WrVsB2Lp1q1oRIgmiBCF567j21Y4dO7Lsmb877rgjo6xWhEhyKEFISaVaD9nKIlI6uqNchMbGRt7bu59bn3qt23W8sXc/Ay3e5aQWLFjQaZfMli1bAJg9e3an9YwdO5arr7465+v179+fgwcPZpQLNWbMmIykMGbMmILrFJHiUILoxRoaGnh180aGHp9lh7bg1643N2atY9fb+b9eTU1NRoIYOHBg/gdnMWfOHGbNOjKJ/utf/3one4tIT1KCiFBbW8tB388Nfzq+23Xc+tRr9K+t7dIxixYtYtmyZcyYMYPLLrss9wHA0OPhc5/ufk/hvz/Zlve+e/bsySjv3t3xflBdN27cuPZWxJgxYxg7dmzBdYpIcegaRIIsW7YMgCVLlpQ4kp41Z84campqKrL1oDkgkmRKEAmxaNGijPLixYuz7Fk6w4cPzyiPGFGcu8KOGzeORx99tCJbD5oDIkmmBJEQqdZDShJbEd/4xjcyyt/85jdLFEl50BwQSTolCMnbuHHj2lsRI0aMqMgz/mLSHBBJOl2kli75xje+wezZsxPXeug45Hf79u0AjBw5sn1bvsN5e4rmgEjSqQWREJdccklGecaMGSWKpHNxXC9YsWIFdXV1rFq1qmh1HjhwgAMHDhStvjh0nPOhOSCSNGpBJMQVV1yRcR0i32Gu5WD+/PkA3HPPPUydOrVbdXRsGaQmB955552FBRcjzQGRpFMLIkFSrYikth7isGLFCoI7zoK7F7UVkXSpOSCA5oBIIilBJMgVV1zB6tWrK7L1kHLPPfeUKJLSqOQ5IJJ86mKSkkq1HrKVy13qmo5IEilBSEmZWUZSMLMSRhO/3jjaSiqXupikpNIv0gJce+21JYqkNHrDaCupXGpBSElNmzaN+fPn4+6YWbdHMfUWvXG0lVQutSCk5FKtiEprPYgknRKElNy0adNYvXp12bceejOtOluZlCBEJCetOluZSnINwsxeB94FWoEWd59oZicAy4BTgNeBv3D3PdnqkODWqO/s7dpNfzra9TYc8nhvjSq9W9Sqs5rUVxlK2YKY7O6nufvEsPx14OfuPh74eVgWkRLTqrOVK0mjmC4EPhU+Xgz8Ari+VMH0BrW1tfSzXQXfcnToiK7dGlUqi1adrVylShAOrDEzB/7J3RcCJ7n7mwDu/qaZnVii2EQkTeqe4enlcqbJjEeUqovpbHc/AzgfmGVmk/I90Mxmmtl6M1u/c+fO+CIUESBYLypdpa0bVcmTGUvSgnAProq6+w4zexQ4E3jLzEaErYcRwI4sxy4EFgJMnDixshbuESmB1KqzW7durYhVZzWZ8Ygeb0GY2UAzOzb1GDgP2AQsB1LLmF4G/KSnYyu1tWvXUldXx7p160odikgGrTpbmUrRxXQS8JSZvQg8Czzm7o8DtwHnmtlrwLlhuaLcddddANx+++0ljiS7RYsWUVdXx+LFi0sdivSgOO4kKMnX4wnC3Rvc/Y/Cn4+5+y3h9iZ3P8fdx4e/d/d0bKW0du1aWlpaAGhpaUlsKyJ117slS5aUOBIRiZtmUidEqvWQksRWxKJFizLKakWIlDcliIRItR6ylZMg/Z7ZoFaESLlL0kS5ilZVVUVbW1tGuVxpnLlI76AEkRB9+/bl4MGDGeVK0Z0x5h2TTEdbtmwBjgxRzEaJSCQ7JYiESE8OUeUk+OxnP8vy5cvbyxdddFG36inGOPOGhgY2vroJhh6TZY9DAGzcVZ+9kl2VOflJJF9KEAkxaNAg9u3bl1FOmubm5ozyu+++W6JIQkOPoc+F3R922fqT+O9tUIyWjlo5UipKEFm8sXc/tz71Wtbnd7wXnOGfOLB/1uPHj4x8KtLcuXOZO3due/nGG2/M/+Ae8rOf/eyocq4unEoXtHQ2Y0NOiHzePVgM4KWdb0U/31RRo70lYZQgIuQzGehQeObXf+QHIp8fPzK/elImTJjAgAED2L9/PzU1NZx++ul5HyvJZkNOoPqCum4d27JydZGjEcmfEkSEfJrzcazPcuyxx7J///5Edi+JSOWpiASxYcMGbrzxRubNm5fYM/P6+np27AjWJ9yxY0fed+3a9Xb2O8rtDS9pDO4k3+x6G4aO6HK4IlIBKiJBzJs3j7a2Nm6++WYeeeSRUocTKequXQsXLuz0mFwJZG/YDTZ0RHQ3WPBc17rCRKRylH2C2LBhQ/vooH379vH8888nohXRcXRL1F27Zs+e3ekIllxdYUlZplhzFoqr479nY2Mj+/fv7/SYAQMGUFt75M6BHf8t46izN8j13oTCR5pFvUZvmRxa9gli3rx5GeWktiL69++fMfehf//o0VE9LdcHKPWh6ezN3dDQwKZXNzJgSHQdB8O7emzZuTHr6+xvyi/eShCMjHoVGzIMAG/eDy2HOz3mPYemncE/ojcdfaOtoM7f0mdI0N/Y1nwIb2nttM5mP8SencFQ59amN7v8dyRBQ0MDv91cz8jBo7PuU93aD4D3Gg9FPr997xtdft3ecgOisk8Q6XMLosql0vHLtL6+nlmzZrWXv/e97yWu6+fEE09sv04CcNJJJ+V97IAhMP4C6/Zrv7ayd94bqrGxEX9nb7dHI3nTbhoPH/1FbUOG0f+Ci7tV58GVD0du7zNkBDXTZnarzuYVnXeHJtnIwaOZ9Wdzc++Yxfxfzuv0+agTp6S07nMp+wTRGyagQbDefqoVkaS7dnV8c9fVHRmu+cADD/R0OCLSg8o+QfSGCWgpo0aNoqGhIdF37Uq1Iq666qqSxtHY2AjvHChsNvSuAzQeaixeUBFqa2tp6tunoHkQtcPyb6l1V2NjI61NTbz7L98OX/gwePTouHZWBdXhmmGHD9J4OEsfYoI1Njay7+33crYCOrP97a0MYmB7uSeua/SUsk8QEyZMyCgn4QJ1NjU1NZx66qmJaT1EGT58OMOHD+fii7vXvSHJdNxxx2VclD7Y1kJbW+ddglVVRv++fYJC3xqOO+64OEPsNRoaGqh/5TVGH5t9KYV+LcFX76HfN0c+/8a722OJravKPkHU12cu1pbv/AJJttraWnb1ay54LabaobW5d6wACxYsKHUIJVFbW8t7HCr4GsTA2n4Z20YfO5Ib/vgr3a7z1t/c2+1ji6nsE0THLqW5c+eydOnSEkUjIuWusbGR3Xt28zdPXg/A4dbDtOXqrgOqrIq+fYIuu4MthzihT/T6XT2p7BPEnj17Oi33lN7SL6k5C9k1NTVx6623MnfuXE44ofQf3p5UX1/P7Nmzufvuu9UCz6Fjdx0HWyBHdx0AVUZV/6DLbkD/AYnosiv7BJEUwXjrjQwfnP2NUtUaDOV8p/GlyOf/sDf+oZ4NDQ1s3ryRwe+Lfr41PBFq/EP2OQt7O+TgxsZG9r9T2FDV/U3QeDjeC8q5LFmyhE2bNvHggw/yla90v/ugN7rjjjtobm7Oa4Z/pSun7joliB40fLBx1aTu3ynuvnWdT4YqlsHvg0nndf/4dWuKF0tSNDU1sXr1atyd1atX84UvfKFiWhH19fXtM/23bt1atOt4ldwi6y3KPkFcc801fP/7328vf/WrXy1hNJWptraW3e/syvr8wb3B7/6Dc9eTYVcnw1z3hrNeB/eLfj48nqGdv2bKkiVLOHw4SNCHDx/uUivCm3ZnnSjne4OZyDb42KzH0gPDXDvTnXXC8lHJLbLeouwTxLRp0zISxNSpU7tcR1S/fFRffDn2uxdDrrPNLe8E/5YfGJZ9UUGGZdaTs869YZ1DO6lzaGY9CxYs4IknnmgvHzx4kLa2oE8t9Ttl5cqVrFq1iqqqqqOWRTn33HPb3we5//ZgEucHsiWBYScdVUdjYyPe1MSBxf8YfUxLS/C7OsvH+/BhGg/nf0vbqHXCCtXU1MSaNWtwd9asWVNRLbLepNcniI4f6ubm5va7dEVJzQQ2M2pqatq3p3+o83HMMdnuhRytsbGRpibn75dHr+cCkFr6prpP9POHWmEI8fbDB3HC8mXRz7eGMfbJEiNAawvQdiTOOBYV7ImFCltbWzt9L7W1tXX6PBwdZz6DFXKdaBw1ZyEtkaXiAqhKJQrITGR9+3bpAmgcqxEsWbIkI/nm04ro+FmH3J93yP1Z3773jU4nyu3aF9ztb+ig6CS+fe8bfLB2XKcxFKqr33MphX7P9foE0ROK0SrI9aEG2ofCeVtV+7b0D3Z1WE+ccsXpqQ81VRnHZXwB9Ys/zjhcffXVWVc43bjx6IvyH//4xwtuNXb1RCMVV8dyetIp9kqhLWmJJqrcHU8++WR7PS0tLTz55JMl6WbK51rKH7YEJ3Ud5zqkfLB2XNmO7LJ8slBSTZw40devX9/pPulrB6WsXl362zj2xBLAxTiLzufLB0ofZ9x1zpw5M6NrZcyYMRUzmufee+9l5cqV7eULLrig4C/ze++9l8cff5yWlhaqq6uZMmVKYq5DdHzPp7qTP/CBI92Vvb072cw2uPvEXPupBVEiveXN1VvijNucOXMyVttN8npZxTZjxoyML/MvfOELRalzzZpguFtVVVVR6oxLd1p55SJxCcLMpgD/APQB7nP32wqp75JLLmHZsiMd6jNmzCgsQEmkbGd9xRpEMG7cOMaMGcPWrVsTtdpuTxgyZAhTpkzhscceY8qUKUW5mDxkyBDOO+88HnvsMc4777xEXaDWSdERVbl36Tlm1geYD5wPfBS41Mw+WkidV1xxRUb5sssuK6Q66SWOOeaYop/5zZkzh5qamopqPaTMmDGDU089tahn+nHUKcWVqGsQZvZJ4CZ3rwvLNwC4+61R++dzDQJg0aJFLFu2jBkzZpR1gsin7xRK339aCX28lS6uEUdSHPleg0hagvg8MMXdrwrLXwQ+4e7XpO0zE0jd9upDwH/lWf1QIPtsre7pDXX2hhhVp+pUnT1b5xh3H5Zrp6Rdg4haqCgjg7n7QqDLw0fMbH0+GbPc6uwNMapO1ak6k1lnoq5BANuAUWnlkyHmmWEiIhIpaQniN8B4M3u/mfUDpgPLSxyTiEhFSlQXk7u3mNk1wGqCYa6L3P3lIlUfx6ym3lBnb4hRdapO1ZnAOhN1kVpERJIjaV1MIiKSEEoQIiISqewThJldZGZuZh8uYp2tZvaCmW0ys4fNrCb3UTnr/L9m9rKZbQzr/kSB9S0ysx1mtqnQ2NLqPMbMnjWzF8NYv12EOkeZ2Voz2xzWWfAdncxsuJktNbMtZvaKma0ysw8WWOeHwv+X1M87Zva1btTz3fTjzGy1md2XVr7bzP62wFj7mNnzZrYy99551dfa4W8/pQh17su9V5fr/Gr4mXy5O/83Weo83sx+ZGavhu/RTxahztfN7KXw3zL3TN/86vw/4d+9ycx+aGbFWUbA3cv6B3gI+CXBDO1i1bkv7fGDwN8WWN8ngaeB/mF5KFBbYJ2TgDOATUX8uw0YFD7uC/waOKvAOkcAZ4SPjwV+C3y0wBifBv4mbdtpwJ8V8d+hD/AHgslGXT32YuCh8HEVsAF4Ou35pwkmhxYS398CS4CVRfp79xWjnjjrBE4FNgE1BINvfgaML0K9i4Grwsf9gOOLUOfrwNAi/u0jgd8BA8LyQ8CXi1F3WbcgzGwQcDZwJcGQ2Tj8Eij0biEjgF3ufhDA3Xe5e0HzP9x9HbC7wLg61ununjrz6xv+FDTKwd3fdPfnwsfvApsJ3vDdNRk47O7/P+01XnD3XxYSZwfnAFvcvTu3VvtP4E/Cxx8j+FJ718zeZ2b9gY8Az3c3MDM7GfhfwH259i0zHwGecfdmd28B/gO4qJAKzew4ghOt+wHc/ZC7v11wpPGoBgaYWTVBkizK/LGyThDA54DH3f23wG4zO6OYlYf/GecDLxVY1RpglJn91sz+0cz+Z+HRxSPsvngB2AE84e6/LmLdpwCnE7RMuutUgrPyOE0HftidA8PE32JmowkSxdMEf+8ngYnARnfPftvB3L4HzAHacu3YBQPSupceLWK9xbQJmGRmQ8Iu36lkTrrtjrHATuAHYZfdfWY2sNBACU6q1pjZhnDpoMIqc98O3AW8AbwJ7HX3NYXWC+WfIC4FloaPl4blYhgQfkmuJ/hPub+QysKz8gkEa0ztBJaZ2ZcLDTIO7t7q7qcRzHI/08xOLUa9YWvvEeBr7v649Pf1AAAEDUlEQVROMeqMQziB87PAwwVUk2pFpBLE02nlXxUQ2wXADncvdoLc7+6nhT8FnZXHxd03A7cDTwCPAy8Chd76rpqgm3aBu58OvAcUYynfs939DIKTy1lmNqmQyszsfcCFwPuBWmCgmf1l4WGWcYIwsyHAp4H7zOx1YDZwiZlFrffUVekfmK8UeMYHtH/x/sLdvwVcA/x54WHGJ2xq/wKYUmhdZtaXIDk86O4/LrC6lwmSbVzOB55z97cKqONXBMngfxCc+T5D0IL4E4Lk0V1nA58N3+9LgU+b2b8VUF+v4u73u/sZ7j6JoHv1tQKr3AZsS2sl/4ggYRQk1X3s7juAR4EzC6zyM8Dv3H2nux8GfsyRbsyClG2CAD4PPODuY9z9FHcfRXAh509LHNdRwhEy49M2nQZ0p387VmY2zMyODx8PIHhjvlpgnUbQAtvs7t8pPEqeBPqb2V+lvcYfF7Hb7lK62b2U5j+BC4Dd4YnBbuB4jgxW6BZ3v8HdT3b3Uwi6wZ5096KcSfYGZnZi+Hs08L8p8P/J3f8A/N7MPhRuOgd4pcAYB5rZsanHwHkEJwmFeAM4y8xqws/TOQTX8gpWzgniUoLsnO4RIIm3lBsELA6HZG4kuFnSTYVUaGY/JPiy+ZCZbTOzKwsPkxHA2jDG3xBcgyh0KOXZwBcJznZT/dxTu1uZB8M4LgLODYe5vkzwb1nwRbuwb/tcgjO0QrxEMFLtmQ7b9rp7sZd/Tqqa8H2Z+iloaG/oETN7BVgBzHL3PUWo8yvAg+F7/jRgXoH1nQQ8ZWYvAs8Cj7n744VUGLZwfgQ8R/A+qqJIS25oqQ0REYlUzi0IEREpgBKEiIhEUoIQEZFIShAiIhJJCUJERCIpQYhkES7bkBp6+wcz255W7nTGs5n9wszyvnm8mX3NirAqsEgxJeqWoyJJ4u5NBGPfMbObCFYgvSuml/sa8G9Ac0z1i3SZWhAi3ZB+PwMzmxOu7/+imd3WYb8qM1tsZjeH5fPM7Gkze86Ce4kMMrNrCdbQWWtma3v2LxHJTi0IkQKY2fkEqwZ/wt2bzeyEtKerCe4XssndbzGzocCNwGfc/T0zu57gXiL/L5xJPLmCZlJLL6AEIVKYzwA/cPdmgHBdpZR/Irg50C1h+SyCZVT+M1wzsh8FrL0kEjclCJHCGNlvmvQrYLKZ3e3uB8J9n3D3Yi07LxIrXYMQKcwa4IrUCKQOXUz3A6uAh8ObSz0DnG1m48J9a+zIvbLfJbjlqkhiKEGIFCBciXM5sD68idR1HZ7/DsEqm/8KNAFfBn4Yrg76DPDhcNeFwE91kVqSRKu5iohIJLUgREQkkhKEiIhEUoIQEZFIShAiIhJJCUJERCIpQYiISCQlCBERifTfWXDY2QM0oh8AAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# use boxplot to visualize the distribution of Fare for each Ticket\n", + "sns.boxplot('Ticket','Fare',data=df)\n", + "plt.ylim(0, 300) # ignore one data point with Fare > 500\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "_cell_guid": "0c52113f-bf33-4187-9393-49e050b0503c", + "_execution_state": "idle", + "_uuid": "3f7e7fb7ee1879e529ca16e7af2d72f4131f759f" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Fare
Ticket
165.771211
220.235194
315.380682
416.764036
519.262500
619.153711
79.197438
88.397900
97.750000
A10.189681
C28.152273
F24.677246
L1.515000
P119.698253
S17.542900
W31.056579
\n", + "
" + ], + "text/plain": [ + " Fare\n", + "Ticket \n", + "1 65.771211\n", + "2 20.235194\n", + "3 15.380682\n", + "4 16.764036\n", + "5 19.262500\n", + "6 19.153711\n", + "7 9.197438\n", + "8 8.397900\n", + "9 7.750000\n", + "A 10.189681\n", + "C 28.152273\n", + "F 24.677246\n", + "L 1.515000\n", + "P 119.698253\n", + "S 17.542900\n", + "W 31.056579" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the correlation between Ticket and Fare \n", + "# (we saw this earlier)\n", + "df[['Ticket', 'Fare']].groupby(['Ticket']).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "_cell_guid": "cbd10230-dda2-4819-b330-3e67a6bef4a4", + "_execution_state": "idle", + "_uuid": "c6407a63d494490385a0024f572f3d3b7c1c5dc1" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Fare
Ticket
10.817411
21.284139
31.351244
40.497578
50.284367
60.735873
70.080078
80.005961
90.000000
A0.466846
C0.736299
F0.556023
L2.236068
P0.944596
S1.024639
W0.676999
\n", + "
" + ], + "text/plain": [ + " Fare\n", + "Ticket \n", + "1 0.817411\n", + "2 1.284139\n", + "3 1.351244\n", + "4 0.497578\n", + "5 0.284367\n", + "6 0.735873\n", + "7 0.080078\n", + "8 0.005961\n", + "9 0.000000\n", + "A 0.466846\n", + "C 0.736299\n", + "F 0.556023\n", + "L 2.236068\n", + "P 0.944596\n", + "S 1.024639\n", + "W 0.676999" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# divide the standard deviation by the mean. A lower ratio means a tighter \n", + "# distribution of Fare in each Ticket type\n", + "df[['Ticket', 'Fare']].groupby(['Ticket']).std() / df[['Ticket', 'Fare']].groupby(['Ticket']).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "_cell_guid": "43b41c45-b149-44da-a031-42b409966f28", + "_execution_state": "idle", + "_uuid": "84e953d33cf72578729f20c54cb8c2e8c29bcb54" + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEKCAYAAAAIO8L1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAGfRJREFUeJzt3X90X3Wd5/HnKw2FlmynsoYfkjJlSVHRxUozEdfZQDHOoMcRyugMPW6puyx1WZCDw54VagtxCl3Xs/ijR+3QCmPoskUc6dADCEOhlWVGDakwhYLaiPzIwEILCERa2jTv/SP3W75Jb5M0zc2933xfj3Nyvt/P/d7v/b6bnOaV9/3xuYoIzMzMBqvJuwAzMysmB4SZmaVyQJiZWSoHhJmZpXJAmJlZKgeEmZmlyiwgJB0hqUPSP0vaKukryfITJf1c0jZJP5A0OVl+eDLuSl6fmVVtZmY2vCw7iLeAsyLiA8Bs4GxJpwP/E/hGRMwCXgUuTNa/EHg1IhqBbyTrmZlZTjILiOjXkwwPS74COAv4u2R5O3Bu8vycZEzy+kclKav6zMxsaLVZblzSJGAz0Ah8B/gN8LuI6E1W6QaOT54fDzwHEBG9kl4D/jWwY9A2FwGLAI488sg573nPe7L8J5iZTTibN2/eERH1w62XaUBExF5gtqTpwDrgvWmrJY9p3cJ+84BExCpgFUBTU1N0dnaOUbVmZtVB0jMjWW9czmKKiN8Bm4DTgemSSsHUADyfPO8GZgAkr/8B8Mp41GdmZvvL8iym+qRzQNIUoBV4EtgIfDpZbSFwR/J8fTImef2B8EyCZma5yXIX03FAe3Icoga4LSLulPQEcKuka4FHgBuT9W8E1kjqor9zOD/D2szMbBiZBUREbAE+mLL8KaA5Zfku4DNZ1WNmZgfHV1KbmVkqB4SZmaVyQJiZWSoHhJmZpXJAmJlZKgeEmZmlckCYmVkqB4SZmaVyQJiZWSoHhJmZpXJAmJlZKgeEmZmlckCYmVkqB4SZmaVyQJiZWSoHhJmZpXJAmJlZKgdEQa1Zs4aWlhbWrl2bdylmVqUcEAW1evVqAFauXJlzJTYaN9xwAy0tLdx4443Dr2xWUA6IAlqzZs2AsbuIynPLLbcA0N7ennMlZqPngCigUvdQ4i6istxwww0Dxu4irFI5IMzGWKl7KHEXYZXKAWFmZqkcEAV00UUXDRhffPHFOVViZtXMAVFACxYsGDCeP39+TpXYaHz2s58dMF64cGFOlZgdGgdEQZW6CHcPlefzn//8gPGFF16YUyVmh8YBUVALFizgwQcfdPdQoUpdhLsHq2SKiGw2LM0AbgaOBfqAVRHxLUltwEXA9mTVxRFxd/Keq4ALgb3AZRFx71Cf0dTUFJ2dnZnUb2Y2UUnaHBFNw62XZQfRC1wREe8FTgcukXRK8to3ImJ28lUKh1OA84H3AWcD35U0KcP6zDLT0dHBmWeeyebNm/MuxWzUMguIiHghIn6RPH8DeBI4foi3nAPcGhFvRcRvgS6gOav6zLLU1tZGX18fS5cuzbsUs1Ebl2MQkmYCHwR+niy6VNIWSTdJekey7HjgubK3dTN0oJgVUkdHBz09PQD09PS4i7CKlXlASKoDfgRcHhGvAyuBk4DZwAvA9aVVU96+3wESSYskdUrq3L59e8pbzPLV1tY2YOwuwipVpgEh6TD6w+GWiLgdICJejIi9EdEHrObt3UjdwIyytzcAzw/eZkSsioimiGiqr6/PsnyzUSl1Dwcam1WKzAJCkoAbgScj4utly48rW20e8HjyfD1wvqTDJZ0IzAI6sqrPLCt1dXVDjs0qRZYdxEeABcBZkh5Nvj4BfE3SY5K2AHOBLwJExFbgNuAJ4B7gkojYm2F9ZpkYvItp2bJl+RRidohqs9pwRDxE+nGFu4d4z3XAdVnVZDYempubqauro6enh7q6OubMmZN3SWaj4iupzTLQ1tZGTU2NuweraJl1EGbVrLm5mU2bNuVdhtkhcQdhZmapHBBmZpbKAWFmZqkcEGZmlsoBYWZmqRwQZhlYt24dLS0trF+/Pu9SzEbNAWGWgW9+85sAXH/99cOsaVZcDgizMbZu3TpKd2qMCHcRVrEcEGZjrNQ9lLiLsErlgCioDRs20NLSwsaNG/MuxQ7S4Pu8Z3Xfd7OsOSAKavny5YBnAq1E/TPdH3hsVikcEAW0YcMGent7Aejt7XUXUWEuv/zyAeMrrrgip0rMDo0DooBK3UOJu4jKMm/evH1dgyQ+9alP5VyR2eg4IAqo1D0caGzFV+oi3D1YJfN03wVUW1s7IBRqa/1jqjTz5s1j3rx5eZdhdkjcQRTQ4sWLB4yXLl2aUyVmVs0cEAXU2tq6r2uora1l7ty5OVdkZtXIAVFQpQOb5513Xs6V2Gh4LiabCBwQBVX6xXL77bfnXImNhudisonAAVFAvg6isnkuJpsoVMnTADQ1NUVnZ2feZYy5s846a7+zmB544IEcK7KDccYZZwyYXkMSP/nJT3KsyGwgSZsjomm49dxBFJCvg6hsnovJJgoHRAENvu7B10FUFs/FZBOFA6KAfB1EZfNcTDZROCAKyNdBVDbPxWQThQOioEpdhLuHyuS5mGwiyOwsJkkzgJuBY4E+YFVEfEvSUcAPgJnA08BfRMSr6v+T61vAJ4A3gc9FxC+G+oyJehaTmVmWinAWUy9wRUS8FzgduETSKcCVwP0RMQu4PxkDfByYlXwtAlZmWJuZmQ0js4CIiBdKHUBEvAE8CRwPnAO0J6u1A+cmz88Bbo5+PwOmSzouq/rMzGxo43IMQtJM4IPAz4FjIuIF6A8R4OhkteOB58re1p0sG7ytRZI6JXVu3749y7LNzKpa5gEhqQ74EXB5RLw+1Kopy/Y7QBIRqyKiKSKa6uvrx6pMMzMbJNOAkHQY/eFwS0SUZp17sbTrKHl8KVneDcwoe3sD8HyW9ZmZ2YFlFhDJWUk3Ak9GxNfLXloPLEyeLwTuKFt+gfqdDrxW2hVlZmbjL8s5HD4CLAAek/Rosmwx8FXgNkkXAs8Cn0leu5v+U1y76D/N9T9mWJuZmQ0js4CIiIdIP64A8NGU9QO4JKt6zMzs4PhKajMzS+WAMDOzVA4IMzNL5YAwM7NUDggzM0vlgDAzs1QOCDMzS+WAMDOzVA4IMzNL5YAwM7NUDggzM0vlgDAzs1QOCDMzS+WAMDOzVA4IMzNL5YAoqA0bNtDS0sLGjRvzLsXMqpQDoqCWL18OwLJly3KuxMyqlQOigDZs2EBvby8Avb297iLMLBcOiAIqdQ8l7iLMLA8OiAIqdQ8HGpuZjQcHRAHV1tYOOTYzGw8OiAJavHjxgPHSpUtzqsTMqtmIAkL9/oOkq5PxCZKasy2terW2tu7rGmpra5k7d27OFZlZNRppB/Fd4MPA/GT8BvCdTCoy4O0uwt2DmeVlpDu3PxQRp0l6BCAiXpU0OcO6qt60adOoqalh2rRpeZdiZlVqpB3EHkmTgACQVA/0ZVaVcdVVV9HX18eXvvSlvEsxsyo10oBYAawDjpZ0HfAQsHzot9hodXR0sGfPHgB2797N5s2bc67IzKrRiAIiIm4B/jvwP4AXgHMj4odDvUfSTZJekvR42bI2Sf8i6dHk6xNlr10lqUvSryT96ej+ORPDVVddNWDsLsLM8jDsMQhJNcCWiHg/8MuD2Pb3gW8DNw9a/o2I+F+DPuMU4HzgfcC7gA2STo6IvQfxeRNGqXso2b17d06VmFk1G7aDiIg+4J8lnXAwG46IB4FXRrj6OcCtEfFWRPwW6AJ8Gq2ZWY5GehbTccBWSR3A70sLI+JTo/jMSyVdAHQCV0TEq8DxwM/K1ulOlu1H0iJgEcAJJxxUZpmZ2UEY6UHqrwCfBP4auL7s62CtBE4CZtN/LKO0DaWsG2kbiIhVEdEUEU319fWjKKH4LrroogHjiy++OKdKzKyajaiDiIifjMWHRcSLpeeSVgN3JsNuYEbZqg3A82PxmZVowYIFrF69et94/vz5Q6xto7VixQq6uroy2XZ3dzcADQ0NY77txsZGLrvssjHfrtlgI51q43RJD0vqkbRb0l5Jrx/sh0k6rmw4Dyid4bQeOF/S4ZJOBGYBHQe7/Ymk1EW4e6hMO3fuZOfOnXmXYXZIFJG6J2fgSlIn/WcZ/RBoAi4AZkXE4iHesxY4E3gn8CJwTTKeTf/uo6eBz0fEC8n6Xwb+E9ALXB4RPx6urqampujs7By2frPxVvoLf8WKFTlXYrY/SZsjomm49UY8j3REdEmalJx6+reS/mmY9dP2i9w4xPrXAdeNtB4zM8vWSA9Sv5nMvfSopK9J+iJwZIZ1Vb0dO3bwhS98gZdffjnvUsysSo00IBYk615K/2muM4A/z6oog/b2drZs2UJ7e3vepZhZlRoyIEoXx0XEMxGxKyJej4ivRMRfRUQ2p38YO3bs4K677iIiuOuuu9xFmFkuhusg/r70RNKPMq7FEu3t7fvuQ71nzx53EWaWi+ECovwCtn+TZSH2tnvvvXfA+J577smpEjOrZsMFRBzguWWodLvRA43NzMbDcL95PpBcECdgStnFcQIiIny7swz09PQMOTYzGw9DBkRETBqvQuxtM2fO5Omnnx4wNjMbbyM9zdXG0ZIlSwaMr7766pwqMbNq5oAooJNPPnlf1zBz5kwaGxvzLcjMqpIDoqCWLFnCkUce6e7BzHLj02MK6uSTT+bHPx52vkIzs8y4gyioDRs20NLSwsaNG/MuxcyqlAOioJYvXw7AsmXLcq7EzKqVA6KANmzYsG+qjd7eXncRZpYLB0QBlbqHEncRZpYHB0QBlbqHA43NzMaDA6KAPBeTmRWBA6KAFi8eeKvvpUuX5lSJmVUzB0QBtba27usaamtrmTt3bs4VmVk1ckAUVKmLcPdgZnnxzu2Cam1tpbW1Ne8yzKyKuYMwM7NUDggzM0vlgDAzs1QOCDMzS+WAMDOzVJkFhKSbJL0k6fGyZUdJuk/StuTxHclySVohqUvSFkmnZVWXmZmNTJYdxPeBswctuxK4PyJmAfcnY4CPA7OSr0XAygzrMjOzEcgsICLiQeCVQYvPAdqT5+3AuWXLb45+PwOmSzouq9rMzGx4430M4piIeAEgeTw6WX488FzZet3Jsv1IWiSpU1Ln9u3bMy3WzKyaFeUgtVKWRdqKEbEqIpoioqm+vj7jsvLjW46aWd7GOyBeLO06Sh5fSpZ3AzPK1msAnh/n2grFtxw1s7yNd0CsBxYmzxcCd5QtvyA5m+l04LXSrqhq5FuOmlkRZHma61rgp8C7JXVLuhD4KvAxSduAjyVjgLuBp4AuYDXwX7OqqxL4lqNm+eno6ODMM89k8+bNeZeSu8xmc42I+Qd46aMp6wZwSVa1VBrfctQsP21tbfT19bF06VLuvvvuvMvJVVEOUluZSZMmDTk2s2x0dHTQ09MDQE9PT9V3EQ6IApo+ffqQYzPLRltb24Bxtd+wywFRQC+//PKQYzPLRql7ONC42jggCkjSkGMzy0ZdXd2Q42rjgCig/mP2Bx6bWTYG72Kq9jMIHRBmZonm5uZ9XUNdXR1z5szJuaJ8OSDMzMq0tbVRU1NT9d0DZHgdhJlZJWpubmbTpk15l1EI7iDMzCyVOwgrvBUrVtDV1ZV3GQdl27ZtAFx22WU5VzJyjY2NFVWvZc8BYYXX1dXFrx//BSfU7c27lBGbvKe/Od/19MM5VzIyz/b4an3bnwPCKsIJdXtZ0lTdFy1l6drO6j7f39L5GEQBHXbYYQPGkydPzqkSM6tmDogC2rNnz4Dx7t27c6rEzKqZA8LMzFI5IMzMLJUDwszMUjkgzMwslQOigHxHOTMrAgdEAe3du3fIsZnZeHBAmJlZKgeEmZmlckCYmVkqB4SZmaXyZH2HKIupqCdPnjxgeo3JkyeP6TTMntbZzEbCHUQBzZw5c8ixmdl4cAdxiLL6S7y1tZXdu3fzrne9i+9973uZfIaZ2VBy6SAkPS3pMUmPSupMlh0l6T5J25LHd+RRW1HMnDmTmpoarr322rxLMbMqlecuprkRMTsimpLxlcD9ETELuD8ZV62pU6dy6qmn0tjYmHcpZlalinQM4hygPXneDpybYy1mZlUvr4AI4B8kbZa0KFl2TES8AJA8Hp1TbWZmRn4HqT8SEc9LOhq4T9IvR/rGJFAWAZxwwglZ1WdmVvVy6SAi4vnk8SVgHdAMvCjpOIDk8aUDvHdVRDRFRFN9ff14lWxmVnXGPSAkHSnpX5WeA38CPA6sBxYmqy0E7hjv2szM7G157GI6BlgnqfT5/yci7pH0MHCbpAuBZ4HP5FCbmZklxj0gIuIp4AMpy18GPjre9ZiZWboineZqZmYF4oAwM7NUDggzM0vlgDAzs1QOCDMzS+WAMDOzVA4IM7Mya9asoaWlhbVr1+ZdSu4cEGZmZVavXg3AypUrc64kfw4IM7PEmjVrBoyrvYvwLUet8Lq7u/n9G5O4trMu71ImrGfemMSR3d15l5G7UvdQsnLlSubPn59TNfmrioBYsWIFXV1deZdxULZt2wZkd8/rrDQ2NlZczWaWrioCoquri0cee4K+qUflXcqIaXcAsPk3/y/nSkau5s1XMtluQ0MDu3pfYElTTybbN7i2s44jGhryLsMKpioCAqBv6lHsOuWTeZcxoR3xxJ15l2B2SCZNmsTevXsHjKuZD1KbmSXKwyFtXG0cEGZmlsoBYWaWmDJlypDjauOAMDNLLFu2bMB4+fLlOVVSDA4IM7NEc3PzgPGcOXNyqqQYHBBmZolrrrlmwHhwR1FtHBBmZomNGzcOGN933305VVIMVXEdRHd3NzVvvubz9DNW8+bLdHf3ZrLtZ3sqa6qNF9/s/9vrmKl9OVcyMs/2TOLkvIuwwqmKgLDK1tjYmHcJB213MlXKETNn5VzJyJxM5X2fx2sKnbGeOqaSpqOpioBoaGjgxbdqfSV1xo544k4aGo4d8+1Wyn+mcqWaV6xYkXMl+cvqF3l3dzc7d+4c8+0OVpoXbax0d3dn8v3IIniqIiDMLD9dXV08svURmD7GGxYwdYy3ORXYXjauhx7Gdg6wHnrY/i/bh1/xYPxubDdXUjUBUfPmKxV1DEK7XgcgjpiWcyUj1z9Z39h3EFbZuru7oZfMfollRlROzb3J93mMVUVAVNq+VYBt294AYNZJlfQL99iK/F5btqZPn57JrqC33nqLvr6xPwmgj/5t1qgGMpiKqaamhsMPP3xsNzq5//s81qoiILwP2yw/N910UybbzfLYBvQfu8yCD1KbVYAsz4LJ8oZPlfQLJkv+HmSvcBfKSTpb0q8kdUm6Mu96zEZjypQpVT/Rm1W+QnUQkiYB3wE+BnQDD0taHxFP5FuZTUT+C9RsaIUKCKAZ6IqIpwAk3QqcAxQ2ILLaTeFdFGaWN0VE3jXsI+nTwNkR8Z+T8QLgQxFxadk6i4BFyfDdwK/GvdDx805gR95F2Kj551e5JvrP7g8jon64lYrWQShl2YAEi4hVwKrxKSdfkjojoinvOmx0/POrXP7Z9SvaQepuYEbZuAF4PqdazMyqWtEC4mFglqQTJU0GzgfW51yTmVlVKtQupojolXQpcC8wCbgpIrbmXFaeqmJX2gTmn1/l8s+Ogh2kNjOz4ijaLiYzMysIB4SZmaVyQBSQpC9L2ippi6RHJX0o75ps5CQdK+lWSb+R9ISkuyX5jp4VQFKDpDskbZP0lKRvSxrjqVcrhwOiYCR9GPgkcFpEnAq0As/lW5WNlCQB64BNEXFSRJwCLAaOybcyG07ys7sd+PuImAXMAqYAX8u1sBwV6iwmA+A4YEdEvAUQERP5as6JaC6wJyL+prQgIh7NsR4bubOAXRHxtwARsVfSF4FnJH05Isb21nIVwB1E8fwDMEPSryV9V9IZeRdkB+X9wOa8i7BReR+DfnYR8TrwNFCVd8JyQBRM8lfKHPrnm9oO/EDS53Ityqw6iEFT+5Qtr0oOiAKKiL0RsSkirgEuBf4875psxLbSH/BWebYCA+ZfkjSN/uNHE3lS0ANyQBSMpHdLmlW2aDbwTF712EF7ADhc0kWlBZL+yLsKK8L9wFRJF8C++9NcD3w7Isb+ptoVwAFRPHVAe3J65BbgFKAt35JspKJ/aoJ5wMeS01y30v/z86STBVf2s/u0pG3Ay0BfRFyXb2X58VQbZmYpJP07YC1wXkRU5YkHDggzM0vlXUxmZpbKAWFmZqkcEGZmlsoBYWZmqRwQVpUk7U1myi19XXkQ7z1T0p2H+PmbJDUNv2Y2n282Ep6sz6rVzoiYnccHJxdgmRWeOwizMpKelrRc0k8ldUo6TdK9yUVv/6Vs1WmS1iUXNP6NpJrk/SuT922V9JVB271a0kPAZ8qW10hql3RtMv6T5LN/IemHkuqS5WdL+mXy/vPG5ZthVc8BYdVqyqBdTH9Z9tpzEfFh4P8C3wc+DZwO/HXZOs3AFcC/BU7i7V/aX46IJuBU4AxJp5a9Z1dE/HFE3JqMa4FbgF9HxBJJ7wSWAK0RcRrQCfyVpCOA1cCfAf8eOHaMvgdmQ/IuJqtWQ+1iWp88PgbURcQbwBuSdkmanrzWERFPAUhaC/wx8HfAX0haRP//rePonyplS/KeHwz6nBuA28qmcjg9Wf8f++9dw2Tgp8B7gN9GxLbk8/43/bP9mmXKAWG2v7eSx76y56Vx6f/M4CkIQtKJwH8D/igiXpX0feCIsnV+P+g9/wTMlXR9ROyif1rp+yJifvlKkmanfJ5Z5ryLyWx0miWdmBx7+EvgIWAa/SHwmqRjgI8Ps40bgbuBH0qqBX4GfERSI4Ckqcm9rH8JnCjppOR981O3ZjbG3EFYtZoiqfxWoPdExIhPdaV/189X6T8G8SCwLiL6JD1C/30FngL+cbiNRMTXJf0BsAb4LPA5YK2kw5NVlkTEr5PdVndJ2kF/GL3/IGo1GxVP1mdmZqm8i8nMzFI5IMzMLJUDwszMUjkgzMwslQPCzMxSOSDMzCyVA8LMzFL9f1LBoZmsHDwLAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# use boxplot to visualize the distribution of Fare for each Embarked\n", + "sns.boxplot('Embarked','Fare',data=df)\n", + "plt.ylim(0, 300) # ignore one data point with Fare > 500\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "_cell_guid": "c4eb229d-67fc-423f-8d67-5d415e30ee53", + "_execution_state": "idle", + "_uuid": "c025b7ada92e44d73f768eaa49d39c49fb1dfd0c" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Fare
Embarked
C62.336267
Q12.409012
S27.418824
\n", + "
" + ], + "text/plain": [ + " Fare\n", + "Embarked \n", + "C 62.336267\n", + "Q 12.409012\n", + "S 27.418824" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the correlation between Embarked and Fare\n", + "df[['Embarked', 'Fare']].groupby(['Embarked']).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "_cell_guid": "e34bef31-0dea-4c44-877e-98a72bd53036", + "_execution_state": "idle", + "_uuid": "af43b5c1114f3912f0c6b6015e53c12affb86ce5" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Fare
Embarked
C1.350514
Q1.097278
S1.352954
\n", + "
" + ], + "text/plain": [ + " Fare\n", + "Embarked \n", + "C 1.350514\n", + "Q 1.097278\n", + "S 1.352954" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# divide the standard deviation by the mean. A lower ratio means a tighter \n", + "# distribution of Fare in each Embarked\n", + "df[['Embarked', 'Fare']].groupby(['Embarked']).std() / df[['Embarked', 'Fare']].groupby(['Embarked']).mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "458e4c5d-5a34-4c88-8200-deff90767bba", + "_execution_state": "idle", + "_uuid": "145eb63ced56b167bab74eb977a86df69680a106" + }, + "source": [ + "Looks like Fare indeed has correlation with these three features. I'll guess the missing value using the median value of (Pcalss = 3) & (Ticket = 3) & (Embarked = S)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "_cell_guid": "16822339-ab02-4799-b4ba-8c2ae824bb03", + "_execution_state": "idle", + "_uuid": "f277be6f1b7c6e16d77dc0353f885f28906e229a" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SurvivedFare
00.022.117887
11.048.395408
\n", + "
" + ], + "text/plain": [ + " Survived Fare\n", + "0 0.0 22.117887\n", + "1 1.0 48.395408" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "guess_Fare = df.Fare.loc[ (df.Ticket == '3') & (df.Pclass == 3) & (df.Embarked == 'S')].median()\n", + "df.Fare.fillna(guess_Fare , inplace=True)\n", + "\n", + "# inspect the mean Fare values for people who died and survived\n", + "df[['Fare', 'Survived']].groupby(['Survived'],as_index=False).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "_cell_guid": "e8477ab2-68ca-4632-835f-ba8df5a6ba36", + "_execution_state": "idle", + "_uuid": "2e55f9e20c72a0d8621b838ed15386c7ce3be9b4" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\HP-PC\\Anaconda3\\lib\\site-packages\\seaborn\\axisgrid.py:230: UserWarning: The `size` paramter has been renamed to `height`; please update your code.\n", + " warnings.warn(msg, UserWarning)\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAd4AAAEYCAYAAADyL5dqAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAFepJREFUeJzt3X+w5XV93/Hnq4AmEcsPWZgVMAt0cUKauOItYmhSHUoEmriaBIM1uk2Yrp2BjlKdBqST0M44wQZldCKYtTAujggYddxpCbIlWpNO+LHQ5ceCwAY2sLJZFkzBn8TFd/8436tnb8/d++vczz3n8nzMnDnf8znf7+e8z/d+97z2+ztVhSRJauMfLXUBkiS9mBi8kiQ1ZPBKktSQwStJUkMGryRJDRm8kiQ1ZPBKktSQwStJUkMGryRJDR241AUAnHnmmXXzzTcvdRmSpKWTpS6glZFY43366aeXugRJkpoYieCVJOnFwuCVJKkhg1eSpIYMXkmSGjJ4JUlqyOCVJKkhg1eSpIYMXkmSGjJ4JUlqaCQuGTksV2x+eMF9XHjGiUOoRJKkwVzjlSSpIYNXkqSGDF5JkhoyeCVJasjglSSpIYNXkqSGDF5JkhoyeCVJasjglSSpIYNXkqSGDF5JkhqaMXiTHJvkq0keTLItyXu79kuTfDPJ1u5xdt80FyfZnuShJG9ezC8gSdI4mc1NEvYC76+qu5O8HLgryebuvSuq6vL+kZOcBJwL/DzwSuB/Jjmxql4YZuGSJI2jGdd4q2pXVd3dDX8beBA4ej+TrAWur6rnq+oxYDtwyjCKlSRp3M1pH2+SVcBrgdu7pguS3JvkmiSHdW1HA0/0TbaT/Qe1JEkvGrMO3iQHA18A3ldVzwFXAScAa4BdwEcmRx0weQ3ob32SLUm27NmzZ86FS5I0jmYVvEkOohe6n62qLwJU1e6qeqGqfgR8ip9sTt4JHNs3+THAk1P7rKoNVTVRVRMrVqxYyHeQJGlszOao5gBXAw9W1Uf72lf2jfY24P5ueBNwbpKXJjkOWA3cMbySJUkaX7M5qvk04F3AfUm2dm0fBN6RZA29zcg7gPcAVNW2JDcCD9A7Ivp8j2iWJKlnxuCtqr9i8H7bm/YzzYeADy2gLkmSliWvXCVJUkMGryRJDRm8kiQ1ZPBKktSQwStJUkMGryRJDRm8kiQ1ZPBKktSQwStJUkMGryRJDRm8kiQ1ZPBKktSQwStJUkMGryRJDRm8kiQ1ZPBKktSQwStJUkMGryRJDRm8kiQ1ZPBKktSQwStJUkMGryRJDRm8kiQ1ZPBKktSQwStJUkMGryRJDRm8kiQ1ZPBKktSQwStJUkMGryRJDRm8kiQ1ZPBKktTQjMGb5NgkX03yYJJtSd7btR+eZHOSR7rnw7r2JPl4ku1J7k1y8mJ/CUmSxsVs1nj3Au+vqp8DTgXOT3IScBFwa1WtBm7tXgOcBazuHuuBq4ZetSRJY2rG4K2qXVV1dzf8beBB4GhgLbCxG20j8NZueC1wbfXcBhyaZOXQK5ckaQzNaR9vklXAa4HbgaOqahf0whk4shvtaOCJvsl2dm2SJL3ozTp4kxwMfAF4X1U9t79RB7TVgP7WJ9mSZMuePXtmW4YkSWNtVsGb5CB6ofvZqvpi17x7chNy9/xU174TOLZv8mOAJ6f2WVUbqmqiqiZWrFgx3/olSRorszmqOcDVwINV9dG+tzYB67rhdcCX+9rf3R3dfCrw7OQmaUmSXuwOnMU4pwHvAu5LsrVr+yBwGXBjkvOAx4FzuvduAs4GtgPfA353qBVLkjTGZgzeqvorBu+3BTh9wPgFnL/AuiRJWpa8cpUkSQ0ZvJIkNWTwSpLUkMErSVJDBq8kSQ0ZvJIkNWTwSpLUkMErSVJDBq8kSQ0ZvJIkNWTwSpLUkMErSVJDBq8kSQ0ZvJIkNWTwSpLUkMErSVJDBq8kSQ0ZvJIkNWTwSpLUkMErSVJDBq8kSQ0ZvJIkNWTwSpLUkMErSVJDBq8kSQ0ZvJIkNWTwSpLUkMErSVJDBq8kSQ0ZvJIkNWTwSpLUkMErSVJDMwZvkmuSPJXk/r62S5N8M8nW7nF233sXJ9me5KEkb16swiVJGkezWeP9NHDmgPYrqmpN97gJIMlJwLnAz3fTXJnkgGEVK0nSuJsxeKvq68C3ZtnfWuD6qnq+qh4DtgOnLKA+SZKWlYXs470gyb3dpujDurajgSf6xtnZtUmSJOYfvFcBJwBrgF3AR7r2DBi3BnWQZH2SLUm27NmzZ55lSJI0XuYVvFW1u6peqKofAZ/iJ5uTdwLH9o16DPDkNH1sqKqJqppYsWLFfMqQJL1IJLkkybZuS+vWJK8fQp9vSXLRkOr7zmzHPXCeH7CyqnZ1L98GTB7xvAm4LslHgVcCq4E75vMZkiQBJHkD8GvAyVX1fJIjgJfMctoDq2rvoPeqahO93GpqxuBN8jngjcARSXYCfwi8MckaepuRdwDvAaiqbUluBB4A9gLnV9ULi1O6JOlFYiXwdFU9D1BVTwMk2QFMVNXTSSaAy6vqjUkupbfytwp4OskJwO9V1bZuuq8B7wd+AZgALgHuAY6vqh8l+RngIeB44FXAJ4AVwPeAf1tV30hyHHAdvRy9eS5fZjZHNb+jqlZW1UFVdUxVXV1V76qqX6iqX6yqt/St/VJVH6qqE6rq1VX153MpRpKkAW4Bjk3ycJIrk/yLWUzzOmBtVf1r4Hrg7dDbYgu8sqrumhyxqp6lF7yT/f468JWq+iGwAfj3VfU64APAld04HwOuqqp/BvzdXL6MV66SJI20qvoOvSBdD+wBbkjyb2aYbFNVfb8bvhE4pxt+O/D5AePfAPx2N3xu9xkHA78EfD7JVuBP6a19A5wGfK4b/sxcvs+89vFKktRSt9vya8DXktwHrKO3S3NyBfKnpkzy3b5pv5nkmSS/SC9c3zPgIzYBf5TkcHoh/xfAy4D/W1VrpitrPt/FNV5J0khL8uokq/ua1gB/S+8Yo9d1bb85QzfXA/8ROKSq7pv6ZrdWfQe9Tcj/vTtz5zngsSTndHUkyWu6Sf43vTVjgHfO5fsYvJKkUXcwsDHJA0nuBU4CLgX+M/CxJH8JzHQg75/RC8ob9zPODcDvdM+T3gmcl+QeYBu9KzQCvBc4P8mdwCFz+TKpmtea8lBNTEzUli1bFtzPFZsfXnAfF55x4oL7kCTN2aALMC1LrvFKktSQwStJUkMGryRJDRm8kiQ1ZPBKktSQwStJUkMGryRJUyQ5M8lDSbYPunVgkpcmuaF7//Ykq2bbt5eMlCSNrFUX/Y9Lh9nfjsv+1Yz9JTmA3h2JzqB3n/k7k2yqqgf6RjsP+Puq+idJzgU+zE+u9bxfrvFKkrSvU4DtVfVoVf0DvctNrp0yzlpgYzf8Z8DpSWZ1ERCDV5KkfR0NPNH3emfXNnCcqtoLPAu8YjadG7ySJO1r0Jrr1Osrz2acgQxeSZL2tRM4tu/1McCT042T5EB6N0r41mw6N3glSdrXncDqJMcleQm9uxptmjLOJnr3BAb4LeAvapZ3HfKoZkmS+lTV3iQXAF8BDgCuqaptSf4LsKWqNgFXA59Jsp3emu650/e4L4NXkjSyZnP6z2KoqpuAm6a0/UHf8A+Ac+bTt5uaJUlqyOCVJKkhg1eSpIYMXkmSGjJ4JUlqyOCVJKkhg1eSpD5JrknyVJL7p3k/ST7e3RLw3iQnz6V/z+OVJI2uSw+5dLj9PTub/j4N/Alw7TTvnwWs7h6vB67qnmfFNV5JkvpU1dfZ/3WX1wLXVs9twKFJVs62f4NXkqS5mc1tA6dl8EqSNDfzviUgGLySJM3VbG4bOK0Zg3fQ0V1JDk+yOckj3fNhXfuCjvSSJGkMbALe3WXeqcCzVbVrthPP5qjmT/P/H911EXBrVV2W5KLu9e+zwCO9RsEVmx8eSj8XnnHiUPqRJLWV5HPAG4EjkuwE/hA4CKCqPknvrkVnA9uB7wG/O5f+Zwzeqvp6klVTmtd2RQFsBL5GL3h/fKQXcFuSQ5OsnMv/BCRJ+rHZnf4zVFX1jhneL+D8+fY/3328R02Gafd8ZNe+oCO9JEla7oZ9cNWsj/RKsj7JliRb9uzZM+QyJEkaTfMN3t2TJwt3z0917bM+0quqNlTVRFVNrFixYp5lSJI0XuYbvJuAdd3wOuDLfe3zPtJLkqTlbsaDq6Y5uusy4MYk5wGPA+d0oy/oSC9Jkpa72RzVPN3RXacPGHdBR3pJkrTceeUqSZIaMnglSWrI4JUkqSGDV5KkhgxeSZIaMnglSWrI4JUkqSGDV5KkhgxeSZIaMnglSWrI4JUkqSGDV5KkhgxeSZIamvHuRJqfKzY/vOA+LjzjxCFUIkkaJQbvFKc+vmEo/dz2qvVD6UeStLy4qVmSpIYMXkmSGjJ4JUlqyOCVJKkhg1eSpIYMXkmSGjJ4JUlqaFmdxzusc3AlSVosrvFKktSQwStJUkMGryRJDRm8kiQ1ZPBKktSQwStJUkPL6nSiUTKcU5suH0IfkqRR4hqvJEkNGbySJDVk8EqS1NCC9vEm2QF8G3gB2FtVE0kOB24AVgE7gLdX1d8vrExJkpaHYazxvqmq1lTVRPf6IuDWqloN3Nq9liRJLM6m5rXAxm54I/DWRfgMSZLG0kKDt4BbktyVZH3XdlRV7QLono8cNGGS9Um2JNmyZ8+eBZYhSdJ4WOh5vKdV1ZNJjgQ2J/nGbCesqg3ABoCJiYlaYB2SJI2FBa3xVtWT3fNTwJeAU4DdSVYCdM9PLbRISZKWi3kHb5KXJXn55DDwq8D9wCZgXTfaOuDLCy1SkqTlYiGbmo8CvpRksp/rqurmJHcCNyY5D3gcOGfhZUqStDzMO3ir6lHgNQPanwFOX0hRkiQtV165SpKkhgxeSZIaMnglSWrI4JUkqSGDV5KkhhZ65Sotois2PzyUfi4848Sh9CNJWjjXeCVJasjglSSpIYNXkqSGDF5JkhoyeCVJasjglSSpIU8nGmGnPr5hSD1dPqR+JEkLZfBqdr76R8Pp500XD6cfSRpTbmqWJKkhg1eSpIYMXkmSGjJ4JUlqyOCVJKkhg1eSpIYMXkmSGvI8Xo2nYZxX7DnFkpaAa7ySJDVk8EqS1JCbmvXi5WUwJS0Bg1ez8tePPjOUft7wpqF0M5R63nD8K4ZQiSTNjcH7InDF5ocX3MepQ6gDhlMLDK8eSWrNfbySJDVk8EqS1JCbmqWF8pxiSXNg8KqpUx/fsNQlqBX/QyINZPBKy4lhJ428RQveJGcCHwMOAP5bVV22WJ+l/XMtU3MyrPObR8hfX/2BBffxhvMuH0Il0iIFb5IDgE8AZwA7gTuTbKqqBxbj86T5GNq5yUM4H3gYwTAsI3V+8yj9J8ALrmhIFmuN9xRge1U9CpDkemAtYPBq2RlWgC83XuREGmyxgvdo4Im+1zuB1/ePkGQ9sL57+Z0kDw3hc48Anh5CP62MU73jVCtY72Kz3nn74GxGGqF6Z2UY9d5cVWcOo5hRt1jBmwFttc+Lqg3AUHc+JtlSVRPD7HMxjVO941QrWO9is97FZb3L22JdQGMncGzf62OAJxfpsyRJGhuLFbx3AquTHJfkJcC5wKZF+ixJksbGomxqrqq9SS4AvkLvdKJrqmrbYnzWFON23sw41TtOtYL1LjbrXVzWu4ylqmYeS5IkDYU3SZAkqSGDV5KkhpZF8CY5M8lDSbYnuWip65kqybFJvprkwSTbkry3a780yTeTbO0eZy91rZOS7EhyX1fXlq7t8CSbkzzSPR+21HUCJHl13zzcmuS5JO8bpfmb5JokTyW5v69t4PxMz8e75fneJCePSL1/nOQbXU1fSnJo174qyff75vMnR6Teaf/+SS7u5u9DSd48ArXe0FfnjiRbu/ZRmLfT/X6N7PI78qpqrB/0Dt76G+B44CXAPcBJS13XlBpXAid3wy8HHgZOAi4FPrDU9U1T8w7giClt/xW4qBu+CPjwUtc5zfLwd8DPjtL8BX4FOBm4f6b5CZwN/Dm98+FPBW4fkXp/FTiwG/5wX72r+scbofk78O/f/du7B3gpcFz3+3HAUtY65f2PAH8wQvN2ut+vkV1+R/2xHNZ4f3x5yqr6B2Dy8pQjo6p2VdXd3fC3gQfpXd1r3KwFNnbDG4G3LmEt0zkd+Juq+tulLqRfVX0d+NaU5unm51rg2uq5DTg0yco2lfYMqreqbqmqvd3L2+idnz8Sppm/01kLXF9Vz1fVY8B2er8jTeyv1iQB3g58rlU9M9nP79fILr+jbjkE76DLU45sqCVZBbwWuL1ruqDbHHPNqGy67RRwS5K70ru8J8BRVbULev8YgSOXrLrpncu+P1qjOn9h+vk5Dsv079Fbq5l0XJL/k+R/JfnlpSpqgEF//1Gev78M7K6qR/raRmbeTvn9Gufld0kth+Cd8fKUoyLJwcAXgPdV1XPAVcAJwBpgF71NTKPitKo6GTgLOD/Jryx1QTNJ72ItbwE+3zWN8vzdn5FeppNcAuwFPts17QJeVVWvBf4DcF2Sf7xU9fWZ7u8/yvP3Hez7H8eRmbcDfr+mHXVA26jM35GwHIJ3LC5PmeQgegvtZ6vqiwBVtbuqXqiqHwGfouHmrplU1ZPd81PAl+jVtntyk1H3/NTSVTjQWcDdVbUbRnv+dqabnyO7TCdZB/wa8M7qduh1m2yf6YbvorfP9MSlq7JnP3//kZy/SQ4EfgO4YbJtVObtoN8vxnD5HRXLIXhH/vKU3X6bq4EHq+qjfe39+z3eBtw/ddqlkORlSV4+OUzvoJr76c3Xdd1o64AvL02F09pnbWFU52+f6ebnJuDd3dGhpwLPTm7SW0pJzgR+H3hLVX2vr31FevfgJsnxwGrg0aWp8if28/ffBJyb5KVJjqNX7x2t6xvgXwLfqKqdkw2jMG+n+/1izJbfkbLUR3cN40HvKLqH6f1v8JKlrmdAff+c3qaWe4Gt3eNs4DPAfV37JmDlUtfa1Xs8vaM+7wG2Tc5T4BXArcAj3fPhS11rX80/AzwDHNLXNjLzl95/CHYBP6S3RnDedPOT3qa6T3TL833AxIjUu53evrvJZfiT3bi/2S0n9wB3A78+IvVO+/cHLunm70PAWUtda9f+aeDfTRl3FObtdL9fI7v8jvrDS0ZKktTQctjULEnS2DB4JUlqyOCVJKkhg1eSpIYMXkmSGjpwqQuQlrMkL9A7pWLSW6tqxxKVI2kEeDqRtIiSfKeqDp7HdAdU1QuLUZOkpeWmZqmx7h6rf5nk7u7xS137G7v7nl5Ht5ac5HeS3NHdi/VPJ69iJGl8ualZWlw/PXlTc+CxqnobvWvanlFVP0iymt6VjCa6cU4B/mlVPZbk54DfpnfDih8muRJ4J3Bt4+8gaYgMXmlxfb+q1kxpOwj4kyRrgBfY96L3d1TvHrHQu7fw64A7e5fL5acZvRtTSJojg1dq70JgN/Aaert7ftD33nf7hgNsrKqLG9YmaZG5j1dq7xBgV/VuV/cuYLr9trcCv5XkSIAkhyf52UY1SlokBq/U3pXAuiS30dvM/N1BI1XVA8B/Am5Jci+wGVg5aFxJ48PTiSRJasg1XkmSGjJ4JUlqyOCVJKkhg1eSpIYMXkmSGjJ4JUlqyOCVJKmh/wdP84s77E+/gAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# visualize the distribution of Fare for people who survived and died\n", + "grid = sns.FacetGrid(df, hue='Survived', size=4, aspect=1.5)\n", + "grid.map(plt.hist, 'Fare', alpha=.5, bins=range(0,210,10))\n", + "grid.add_legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "_cell_guid": "0269e823-86a3-442b-b48c-dc659c8b2005", + "_execution_state": "idle", + "_uuid": "61e09e2b4fc7a0a1f31a3b066e80656794d73702" + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEKCAYAAAD9xUlFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3X903XWd5/Hn+97cJqEJtKS1YpPaasosiG3QLNCpPxDFqegpzrYqCFtnD2fZPQd2HMexwNF1ldnZWYo/Z0CXzizj4HFEtLvQ4bAHHcBR0WJTCZVWkdgCSYulhJQ2Nc3P9/5xv/fbm+Qm997kfu9N7vf1OCcn+X7v98f7c7ncd7+fn+buiIiIACQqHYCIiMwdSgoiIhJSUhARkZCSgoiIhJQUREQkpKQgIiIhJQUREQkpKYiISEhJQUREQjWVDqBYS5Ys8ZUrV1Y6DBGReWXPnj0vu/vSfMfNu6SwcuVKOjo6Kh2GiMi8YmbPF3Kcqo9ERCSkpCAiIiElBRERCSkpiIhISElBRERCSgoiIhJSUhARkVBk4xTM7G7gA8BL7n5BjtcN+CpwBfB74E/c/RdRxZPLXY89y/eePMQblyzk1PAoR0+corE+xcDwGM2L6li5pIFUMsHJoRGWnLGAA72/Z90bzmbV0kYWLkhycmiU5sX1NDXU0ts/SE/fQLid0ds/yL7Dxzk+MMSJUyP09J0EjObFZ7B8cT2vO6uew6+eApzXnVXPyaHR8NoLFyT59e9O8KsXj4XnNNbVcGZ9Kjzv+MBQuJ0dT7Gmij+j68gJOruP0dayiNZljUWdO9NjS6Hc96v0faV6leszFeXgtW8AdwD3TPH6+4DVwc/FwNeD32Vx3mceYmAkvT71sy+dnPT6Uz3HgZcm7f9OR0/4d10q/aD14bc2c9+eHlKJBMNjY2zbtIaNbct5oPMQn7yvk5GxwuOqScDI2OnfxcjEk7l/oR7oPMRNO/ZOij/js/f/knt2vRBub1m3gluvfHNB5xZzn1Ir9/0qfV+pXuX8TEVWfeTuPwJemeaQK4F7PG0XsMjMzokqnmx3PfZsmBBm49TwGKeGx7hn1wucGh7jxOAIp4bH2LpjL11HTrD1e08V/cWeOb7Y87Lj2bpjL739gwWd09s/yE079k6KP3N+15ET4xICwD0/e4GuIyfynlvMfUqt3Per9H2lepX7M1XJNoXlQHfWdk+wbxIzu97MOsys4+jRo7O+8f17X5z1NaaTSiTo7D5G0irz9qYSCXr6Bgo6tqdvgFRifJzZ53d2H8t5Xmf3sbznFnOfUiv3/Sp9X6le5f5MVTIpWI59Of/57u7b3b3d3duXLs07n1NeH1wT7QPJ8NgYbS2LGPUZ/HO/RPdvXlxf0LHNi+sZHhsfZ/b5bS2Lcp7X1rIo77nF3KfUyn2/St9Xqle5P1OVTAo9QEvWdjNwuBw3/k/vWk19Ta6cVJy6VIK6VIIt61ZQl0rQWFtDXSrBtk1raF3WyO2b11JT5DucOb7Y87Lj2bZpTcENUU0NtWzbtGZS/JnzW5c1smXdinHnbFm3gtZljXnPLeY+pVbu+1X6vlK9yv2ZMvfZ161PeXGzlcCDU/Q+ej9wI+neRxcDf+PuF+W7Znt7u5dqllT1PjpNvY+q475SvWb7mTKzPe7enve4qJKCmX0buBRYAhwB/huQAnD3/xV0Sb0D2EC6S+p/cPe83/alTAoiInFRaFKIrEuqu1+d53UHbojq/iIiUjyNaBYRkZCSgoiIhJQUREQkpKQgIiIhJQUREQkpKYiISEhJQUREQkoKIiISUlIQEZGQkoKIiISUFEREJKSkICIiISUFEREJKSmIiEhISUFEREJKCiIiElJSEBGRkJKCiIiElBRERCSkpCAiIiElBRERCSkpiIhISElBRERCSgoiIhJSUihSb/8gT3Ufo7d/sNKhiIiUXE2lA5hPHug8xE079pJKJBgeG2PbpjVsbFte6bBEREpGTwoF6u0f5KYdezk1PMaJwRFODY+xdcdePTGISFVRUihQT98AqcT4tyuVSNDTN1ChiERESk9JoUDNi+sZHhsbt294bIzmxfUVikhEpPQiTQpmtsHMnjGzLjO7OcfrK8zsMTN70sz2mtkVUcYzG00NtWzbtIa6VILG2hrqUgm2bVpDU0NtpUMTESmZyBqazSwJ3AlcDvQAu81sp7vvzzrsM8B97v51MzsfeAhYGVVMs7WxbTnnn3Mmnd3HaGtZROuyxkqHJCJSUlH2ProI6HL3AwBmdi9wJZCdFBw4M/j7LOBwhPHMmnofiUi1i7L6aDnQnbXdE+zL9jngWjPrIf2U8F8ijGdW1PtIROIgyqRgOfb5hO2rgW+4ezNwBfBNM5sUk5ldb2YdZtZx9OjRCELNT72PRCQOokwKPUBL1nYzk6uHrgPuA3D3nwF1wJKJF3L37e7e7u7tS5cujSjc6an3kYjEQZRJYTew2sxWmdkC4Cpg54RjXgDeDWBm55FOCpV5FMhDvY9EJA4ia2h29xEzuxF4GEgCd7v7PjO7Fehw953AJ4G/M7NPkK5a+hN3n1jFNGdsbFvO+tYl9PQN0Ly4XglBRKqOzeHv4Jza29u9o6Oj0mGIiMwrZrbH3dvzHacRzSIiElJSEBGRkKbOnoHe/kF6+gZYuCDJyaHR8Hcl2hkysRR672KPjwO9JyKnKSkUKTOq2cecwVGnJgEjY1CXSj90lXOUc7EjrDUiezK9JyLjqfqoCNmjmgdH0w30I8HQhVPDY2Ud5VzsCGuNyJ5M74nIZEoKRcg1qnmico1yLnaEtUZkT6b3RGQyJYUi5BrVPFG5RjkXO8JaI7In03siMpmSQhGyRzXXJtNTO9UE72BdKlHWUc7FjrDWiOzJ9J6ITKbBazOg3kfVRe+JxEGhg9fU+2gGmhpqZ/zlUeovoGJjmU3s1UrvichpSgplpO6PIjLXqU2hTNT9UUTmAyWFMlH3RxGZD5QUykTdH0VkPlBSKBN1fxSR+UANzWWkRXpEZK5TUigzdX8UkblM1UciIhLSk0Jg4qCy6bYB9h0+Djhvet1Z4b/8oxwZW+prz+R6hZ6jEcIi85eSApMHlX34rc3ct6cn5/bA8AhjDmPB7CCppPHFD63FIbKBaaUe9DaT6xV6jgboicxvsZ/7qLd/kPW3Pcqp4elnP53OgqRhZgyOnL5GXSrB4zddNut/KeeKbzbXnsn1Cj2n1LGKSOkUOvdR7NsUClkjIR8zI5mwcftKNTCt1IPeZnK9Qs/RAD2R+S/21UeFrJGQj7szOuESpRqYVupBbzO5XqHnaICeyPwX+yeFXIPKtqxbMeV2TQKyHwpSSeMLH1rL7ZujGZhW6kFvM7leoedogJ7I/Bf7NoUM9T4q3TnqfSQy9xTapqCkICISA2poFhGRoikpiIhISElBRERCSgoiIhKKNCmY2QYze8bMuszs5imO+bCZ7TezfWb2T1HGIyIi04ts8JqZJYE7gcuBHmC3me109/1Zx6wGbgHWu3ufmb0mqngKMZ+6lIqIRCHKEc0XAV3ufgDAzO4FrgT2Zx3zH4E73b0PwN1fijCeaUU5kZsmiROR+SLK6qPlQHfWdk+wL9u5wLlm9riZ7TKzDRHGM6Xe/kFu2rGXU8NjnBgc4dTwGFt37KW3f3BOX1tEpNSmfVIwsxPAlKPb3P3M6U7PdUqO+68GLgWagR+b2QXufmxCHNcD1wOsWLFiupBnJDOR2ylOz9uTmchttlU9UV5bRKTUpk0K7t4IYGa3Ar8Dvkn6y/4aoDHPtXuAlqztZuBwjmN2ufswcNDMniGdJHZPiGM7sB3SI5rz3LdoUU7kpkniRGQ+KbT66I/c/WvufsLdj7v714FNec7ZDaw2s1VmtgC4Ctg54Zj7gXcBmNkS0tVJBwoPvzSinMhNk8SJyHxSaEPzqJldA9xLugroamB0uhPcfcTMbgQeBpLA3e6+L3jq6HD3ncFr7zWz/cH1PuXuvTMsy6xsbFvO+tYlkfQQivLaIiKlVNCEeGa2EvgqsJ50Ungc+DN3fy7C2HLShHgiIsUrdEK8gp4Ugi//K2cblIiIzG0FtSmY2blm9oiZPR1srzGzz0QbmoiIlFuhDc1/R3rk8TCAu+8l3XAsIiJVpNCkcIa7/3zCvpFSByMiIpVVaFJ42czeSDD4zMw2Ay9GFpWIiFREoV1SbyA9eOzfmNkh4CDpAWyxoQntRCQOCk0Kz7v7e8xsIZBw9xNRBjXXaEI7EYmLQquPDprZduASoD/CeOYcTWgnInFSaFL4A+BfSFcjHTSzO8zsbdGFNXdkJrTLlpnQTkSk2hSUFNx9wN3vc/d/B1wInAn8a6SRzRGa0E5E4qTg9RTM7J1m9jXgF0Ad8OHIoppDNKGdiMRJQQ3NZnYQ6ATuIz1p3clIoyqD3v5Bvr/vd+x5/hWWnVnHH1/YTOuyRnr7B9l3+DjHB4YAOLM+xfnnnMkXNq/l+d5+zl5Yy6IzUvT2D9LUUEvXkRN0dh9jZdMZpGqSk3onZa4HzptedxZAzl5Mmd5NCxckOTk0Ou717Hv8fngsvFYxiWliHFEltTj00iq0jHF4L6T6FNr7aK27H480kjJ6oPMQH7+3c9y+O394gLe1NrHrQC8jY1OcmCWVNC5edTY/6eodty+ZsLB30gOdh/jkfZ3h9RIGyYRRV5Mc14sp07vJx5zBUaculX6A27ZpDR3PvcI9u17Ief8vfmhtQb2gJsZRzLnFiEMvrULLGIf3QqrTtLOkmtlWd99mZn9LjhXY3P1Powwul9nOktrbP8gl/+NfGC7gi3+m6lIJHrzxbbz/b3/M4MjU72/muA/c8RNO5QhoQRKGppmgvLYmwU9vvizvv1b/8H8+MimOQs4tRm//IOtve3RcOepSCR6/qXT3qLRCyxiH90Lmn1LNkvqr4HfVzFWd7jVkTLPK6KylEgk6u4+RtATTLTuROW7icp0ZRgJy7M9IJizvsp49fQM54yjk3GLEYdnRQssYh/dCqle+5Tj/Ofhzr7s/WYZ4IpfuNRRdQoB076S2lkWM+vSPI5njJvZuyvBpEgLA6Jjn7QXVvLg+ZxyFnFuMOPTSKrSMcXgvpHoV2vvoS2b2azP7SzN7U6QRRaypoZYvfrgt52tvb22ipsB3JJU03t7aNGlfpndS67JGbt+8dtz1EpY+JrsXU+uyxrB3U23SgHRVQ10qwRc+1MaWdSumvP/tm/P3gmpqqJ0UR6HnFiMOvbQKLWMc3gupXgWtvAZgZq8l3Q31I6THKXzH3f97hLHlVKqV1wrtffRy/yC3P/wbTmZV7tfVJNi+pZ13nLtUvY9y3Kfae9yo95HMR4W2KRScFLIu/GZgK/ARd18ww/hmrNzLcarRUESqQaFJodCV184zs88FK6/dAfwUaJ5ljPOCqgJEJE4KHafwD8C3gfe6++EI45mTNrYtZ33rElUFiEjVy5sUzCwJ/Nbdv1qGeOaspoZaJQMRqXp5q4/cfRRoMrOytx9Erbd/kKe6j2kabBGRQMGL7ACPm9lOIJz3yN2/FElUZaBpCEREJit0nMJh4MHg+Masn3lJC+eIiORW0JOCu38+6kDKqdzTEKi/uojMF4VOnf0YuSfEu6zkEZVBOachUDWViMwnhbYp/EXW33XAJmCk9OGUR2bswdYJX9al/ld8djVV5qlk6469rG9doicGEZmTCq0+2jNh1+NmNq+X4yzH2APNliki802h1UdnZ20mgHbgtZFEVEZRjz3QbJkiMt8U2vtoD+k1FTpIT3Hx58B1+U4ysw1m9oyZdZnZzdMct9nM3Mzyzssxn2iKDBGZb6Z9UjCzfwt0u/uqYPtjpNsTngP25zk3CdwJXA70ALvNbKe7759wXCPwp8ATMyzDnKYpMkRkPsn3pHAXMARgZu8A/hr4R+BVYHuecy8Cutz9gLsPAfcCV+Y47i+BbcCpIuKeV5oaalnbskgJQUTmvHxJIenurwR/fwTY7u473P2/Aq15zl0OdGdt9wT7QmZ2IdDi7g9OdyEzu97MOsys4+jRo3luKyIiM5U3KZhZporp3cCjWa/la6S2HPvCsQ5mlgC+DHwyX5Duvt3d2929fenSpfkOnxHNgyQikv+L/dvAv5rZy8AA8GMAM2slXYU0nR6gJWu7mfR0GRmNwAXAD80M0r2ZdprZRncv3yo6wLd2Pc/nH9zPgqQxPDrGje9azUcvXqHqHhGJnbwrr5nZJcA5wPfd/WSw71ygwd1/Mc15NcBvSD9hHAJ2Ax91931THP9D4C/yJYRSr7z2rV3P8+n7n560v7Ymwe2bNfpYRKpDoSuv5R2n4O67cuz7TQHnjZjZjcDDQBK42933mdmtQIe778x3jaj19g/y+X/OmaMYHBnT6GMRiZ1Cp7mYEXd/CHhowr7PTnHspVHGkktP3wCpZIKh0dGcr2v0sYjETaGD16pS8+J6RqepPtPoYxGJm1gnBYAbLm2ltibBwgVJkgappGn0sYjEVqTVR3NZ9pTW4Pznd7by0YtXAGj0sYjEViyfFCauvDY44tz5wy5Ao49FJN5imRQyU1pnyzQqZ8s1oE2D3ESkmsWy+qh5cT0nh8avEXRyaGRco3KuFdMctIqaiFS1WCaFvpNDjE3odDTm6f1NDbU5V0z71PeeAozBEa2iJiLVK5bVR53dx6bdn6t6KWkJkonx0znlqnISEZnPYvmk0NayaNr9uVZMG/Ux8PFJQeMYRKTaxPJJoXVZI1vWrRi3b8u6FbQuawRyr5h2++a13L5Zq6iJSHXLOyHeXFPKCfG6jpygs/sYbS2LwoSQrbd/cNKYhVz7RETmupJNiFfNWpc15kwGGU0NtZO++HPtExGpFrGsPhIRkdyUFEREJKSkkEWjlUUk7mLdppAt1whmjVYWkbjRkwKTJ8g7NZxedU1PDCISN7F+Ush0L311YIhUIhFOXwFadU1E4im2SSG7umhodHTSXEgarSwicRTLpJBrwruaBNTWJFiQPN2moKcEEYmbWCaFzIR32dVF9aka7rzmLZxVn9JoZRGJrVg2NOea8G5odBRwJQQRibVYJoWmhlpWnD2+vWBoxLnhW0+y/rZH2dl5qEKRiYhUViyTQsfBXn5z5OS4fQ7qjioisRfLpPCjZ1+e9nUtniMicRXLpPCO1UumfV3dUUUkrmKZFFYtbWDCypoALKxNUltj3HBpa/mDEhGZA2KZFHr6Bli4YHJv3LXLFwHG9h8dUIOziMRSLJNC8+L6oAvqeD890MvgiOY/EpH4imVSaGqo5cZ3rc57nBqcRSRuIk0KZrbBzJ4xsy4zuznH639uZvvNbK+ZPWJmr48ynmwfvXgFtTXTF39ig7PWWxCRahdZUjCzJHAn8D7gfOBqMzt/wmFPAu3uvgb4HrAtqngmamqo5fbNa6hLJWisraEulWDLuhXjtrPnP3qg8xDrb3uUa//+CbU3iEjVinLuo4uALnc/AGBm9wJXAvszB7j7Y1nH7wKujTCeSTa2LWd96xJ6+gbC6S0+/u5zx21D7gn0tu7Yy/rWJZoSQ0SqSpTVR8uB7qztnmDfVK4D/l+uF8zsejPrMLOOo0ePlizAzHoK2QmgqaGWtS2Lxn3ZZybQy6b2BhGpRlE+KeQYCYDn2IeZXQu0A+/M9bq7bwe2A7S3t+e8RrGKWX4z1wR6GuAmItUoyieFHqAla7sZODzxIDN7D/BpYKO7l6UFt9jlN5saatm2ac2U7Q0iItUiyieF3cBqM1sFHAKuAj6afYCZXQjcBWxw95cijGWcXOsp5Ft+M1f7g4hItYksKbj7iJndCDwMJIG73X2fmd0KdLj7TuB2oAH4rpkBvODuG6OKKWOm1UFNDbVKBiJS1SJdec3dHwIemrDvs1l/vyfK+08lUx20dUKbgr7wRSTuYrkcJ6g6SEQkl9gmBVB1kIjIRLGc+2gqmsZCROIu1k8K2YoZtyAiUq30pEDx4xZERKqVkgKaxkJEJENJgfzjFtTWICJxEeukkPmyB6acxkJTZotInMS2oTlXw/LjN102btyCpswWkbiJ5ZPCVA3LwLhps9XWICJxE8ukUOiXvabMFpG4iWVSKPTLXlNmi0jcxLJNoZgJ8TRHkojESSyTAhT3Za85kkQkLmKbFEBf9iIiE8WyTSGj68gJvtfRTdeRE5UORURkTojtk8Jn7/8l9+x6Idzesm4Ft1755gpGJCJSebF5UsieqqLryIlxCQHgnp+9oCcGEYm9WDwpTBy9vOnC5pzHdXYfo3VZY5mjExGZO6o+KeSaquK7e17IeWxby6JyhiYiMudUffVRrtHLtTU1vP+C147bt2XdCj0liEjsVf2TwlSjl2/94AV84vJz6ew+RlvLIiUEERFi8KQw3VQVrcsa2dzeooQgIhKo+icF0FQVIiKFikVSAI1eFhEpRNVXH01Hy2yKiIwXmyeFiXKtvLaxbXmlwxIRqahYPilMtfKanhhEJO5imRS0zKaISG6RJgUz22Bmz5hZl5ndnOP1WjP7TvD6E2a2Msp4MrTMpohIbpG1KZhZErgTuBzoAXab2U5335912HVAn7u3mtlVwG3AR6KKKaPv5BDLF9Xy26OnnwwaUkmu2f5TXvn9MG9ZsYg3vqaBgaHTiaN+QZJUMsHL/YMsXFDDyaEREhjHB4e59Nyl1CSTPN/bz+ubGlj3xib6Tg7R2X2MxWek6Pv9cM4Bch0He/nRsy/zjtVLaF/VBKSrtnr6Bli4IMnJodHwt7rSzkzm/Zz4/k21XyTuzN2jubDZOuBz7v5HwfYtAO7+11nHPBwc8zMzqwF+Byz1aYJqb2/3jo6OGcc1ccrscsqenvvav9/FT7p6w9fe3trE5vYWbtqxFx9zBkedmgSMjEFdKv1Ap8bw4kzVmUCdDCSOzGyPu7fnOy7K6qPlQHfWdk+wL+cx7j4CvAo0RRVQrimzyykzPXfHwd5xCQHgx129fOq76cbvwdF0ThwJHlRODY+pMbxIU3Um6DpyQp0MRKYRZVKwHPsmPgEUcgxmdr2ZdZhZx9GjR2ccUGf3sRmfWyqd3cf40bMvT/Hq9E9tagwv3FSdCTq7j6mTgcg0okwKPUBL1nYzcHiqY4Lqo7OAVyZeyN23u3u7u7cvXbp0xgHNhamx21oW8Y7VS6Z4NVeOPE2N4YWbqjNBW8sidTIQmUaUSWE3sNrMVpnZAuAqYOeEY3YCHwv+3gw8Ol17wmy1Lmtky7oVUV0+r8z03O2rmnh76/hasre3NvGFD6Un7qtNppNDTfBfpy6VGDeRn+Q31USIrcsap5wgUUQibGgGMLMrgK8ASeBud/8rM7sV6HD3nWZWB3wTuJD0E8JV7n5gumvOtqEZ0m0Ln75/L08cPF2dtKqpntqkqfdRlVHvI5G0QhuaI00KUShFUhARiZu50PtIRETmGSUFEREJKSmIiEhISUFEREJKCiIiElJSEBGRkJKCiIiE5t04BTM7Cjw/w9OXAFNNPFSN4lTeOJUV4lVelbU0Xu/ueecJmndJYTbMrKOQwRvVIk7ljVNZIV7lVVnLS9VHIiISUlIQEZFQ3JLC9koHUGZxKm+cygrxKq/KWkaxalMQEZHpxe1JQUREphGbpGBmG8zsGTPrMrObKx3PbJnZ3Wb2kpk9nbXvbDP7gZk9G/xeHOw3M/uboOx7zewtlYu8eGbWYmaPmdmvzGyfmX082F+t5a0zs5+b2VNBeT8f7F9lZk8E5f1OsHgVZlYbbHcFr6+sZPwzYWZJM3vSzB4Mtqu5rM+Z2S/NrNPMOoJ9c+azHIukYGZJ4E7gfcD5wNVmdn5lo5q1bwAbJuy7GXjE3VcDjwTbkC736uDneuDrZYqxVEaAT7r7ecAlwA3Bf79qLe8gcJm7rwXagA1mdglwG/DloLx9wHXB8dcBfe7eCnw5OG6++Tjwq6ztai4rwLvcvS2r++nc+Sy7e9X/AOuAh7O2bwFuqXRcJSjXSuDprO1ngHOCv88Bngn+vgu4Otdx8/EHeAC4PA7lBc4AfgFcTHpQU02wP/xMAw8D64K/a4LjrNKxF1HGZtJfhJcBD5JerLwqyxrE/RywZMK+OfNZjsWTArAc6M7a7gn2VZtl7v4iQPD7NcH+qil/UF1wIfAEVVzeoDqlE3gJ+AHwW+CYu48Eh2SXKSxv8PqrwPhFwOe2rwBbgcz6t01Ub1kBHPi+me0xs+uDfXPms1wT5cXnEMuxL07drqqi/GbWAOwA/szdj5vlKlb60Bz75lV53X0UaDOzRcD/Bc7LdVjwe96W18w+ALzk7nvM7NLM7hyHzvuyZlnv7ofN7DXAD8zs19McW/byxuVJoQdoydpuBg5XKJYoHTGzcwCC3y8F++d9+c0sRTohfMvd/0+wu2rLm+Hux4Afkm5LWWRmmX/IZZcpLG/w+lnAK+WNdMbWAxvN7DngXtJVSF+hOssKgLsfDn6/RDrhX8Qc+izHJSnsBlYHPRoWAFcBOyscUxR2Ah8L/v4Y6br3zP4tQU+GS4BXM4+q84GlHwn+N/Ard/9S1kvVWt6lwRMCZlYPvId0I+xjwObgsInlzbwPm4FHPaiAnuvc/RZ3b3b3laT/v3zU3a+hCssKYGYLzawx8zfwXuBp5tJnudKNLmVs3LkC+A3putlPVzqeEpTn28CLwDDpf01cR7pu9RHg2eD32cGxRrr31W+BXwLtlY6/yLK+jfQj816gM/i5oorLuwZ4Mijv08Bng/1vAH4OdAHfBWqD/XXBdlfw+hsqXYYZlvtS4MFqLmtQrqeCn32Z76K59FnWiGYREQnFpfpIREQKoKQgIiIhJQUREQkpKYiISEhJQUREQnEZ0SwyY2Y2Sro7YMYH3f25CoUjEil1SRXJw8z63b1hBuclPT1dhci8oeojkRkws5Vm9mMz+0Xw84fB/kuDtR/+ieDpwsyuDdZH6DSzu4Kp3EXmJFUfieRXH8xYCnDQ3f+Y9Nw0l7v7KTNbTXqEeWZu/IuAC9z9oJmdB3yE9CRow2b2NeAa4J4yl0GkIEoKIvkNuHvbhH0p4A4zawNGgXOzXvu5ux8M/n438FZgdzCraz2nJzsTmXOUFERm5hPAEWAt6WrYU1mvncz624B/dPdbyhibyIypTUGMcA/pAAAAeUlEQVRkZs4CXnT3MeDfA1O1EzwCbA7mzs+sxfv6MsUoUjQlBZGZ+RrwMTPbRbrq6GSug9x9P/AZ0itt7SW9ito5ZYtSpEjqkioiIiE9KYiISEhJQUREQkoKIiISUlIQEZGQkoKIiISUFEREJKSkICIiISUFEREJ/X9rMRaTTBnl3QAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# visualize the correlation between Fare and Survived using a scatter plot\n", + "df[['Fare', 'Survived']].groupby(['Fare'],as_index=False).mean().plot.scatter('Fare','Survived')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "bf22eb7f-78ae-4f28-9f2c-af8f754dadf2", + "_execution_state": "idle", + "_uuid": "92da518eed3f5e86bf7c05216f2697397b01bd9b" + }, + "source": [ + "We can see that people with lower Fare are less likely to survive. But this is certainly not a smooth curve if we don't bin the data. It would be better to feed machine learning algorithms with intervals of Fare, because using the original Fare values would likely cause over-fitting. " + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "_cell_guid": "dfa08997-c349-4a56-b69d-1b32b9945730", + "_execution_state": "idle", + "_uuid": "5cd244995e5442dc614ce8dbcf4907d66b285345" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Fare-binSurvived
010.217877
120.201087
230.426901
340.443243
450.645349
\n", + "
" + ], + "text/plain": [ + " Fare-bin Survived\n", + "0 1 0.217877\n", + "1 2 0.201087\n", + "2 3 0.426901\n", + "3 4 0.443243\n", + "4 5 0.645349" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# bin Fare into five intervals with equal amount of people\n", + "df['Fare-bin'] = pd.qcut(df.Fare,5,labels=[1,2,3,4,5]).astype(int)\n", + "\n", + "# inspect the correlation between Fare-bin and Survived\n", + "df[['Fare-bin', 'Survived']].groupby(['Fare-bin'], as_index=False).mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "c309b91d-cfa8-40fd-815f-1a41692a0b37", + "_execution_state": "idle", + "_uuid": "e715870983cdd7ce85a3c92a79672b247c6130c2" + }, + "source": [ + "Now the correlation is clear after binning the data!\n", + "\n", + "### Cabin" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "_cell_guid": "cf7344e8-76ba-4ce7-807e-272f49272423", + "_execution_state": "idle", + "_uuid": "dc0da43c475aa71894141aa6578e22b01a99750e" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1014" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# check if there is any NAN\n", + "df.Cabin.isnull().sum(axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "_cell_guid": "eed848fb-b155-41b2-be78-d04334111893", + "_execution_state": "idle", + "_uuid": "742f1b8d7a6e09087135754b14da0e9137b9a981" + }, + "outputs": [], + "source": [ + "df = df.drop(labels=['Cabin'], axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "a61221c4-6b5b-489b-a124-35f2b3fae207", + "_execution_state": "idle", + "_uuid": "3506dbb251461b09ede6f1ece58c7f15340c6628" + }, + "source": [ + "### Embarked" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "_cell_guid": "8a8baacb-bcc3-439f-a540-0f5baac20b91", + "_execution_state": "idle", + "_uuid": "e1d3efbfad7ee51433326e86de59e2b9e8d5fb21" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# check if there is any NAN\n", + "df.Embarked.isnull().sum(axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "_cell_guid": "f7df232f-1f39-4aa6-8285-dcaa0c320e16", + "_execution_state": "idle", + "_uuid": "b21097135f1b6199b0b341ef12a88398b0c8f7ad" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EmbarkedTicketTitle
count130713091309
unique3165
topS3Mr
freq914429757
\n", + "
" + ], + "text/plain": [ + " Embarked Ticket Title\n", + "count 1307 1309 1309\n", + "unique 3 16 5\n", + "top S 3 Mr\n", + "freq 914 429 757" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe(include=['O']) # S is the most common" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "_cell_guid": "d65d5808-4d12-4a6c-8aa5-d67bd800a6fe", + "_execution_state": "idle", + "_uuid": "0833afac0ae691222f5436be2645a8e9120ca736" + }, + "outputs": [], + "source": [ + "# fill the NAN\n", + "df.Embarked.fillna('S' , inplace=True )" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "_cell_guid": "c5a5acd2-47f4-4eda-8cee-7f5759a80ec1", + "_execution_state": "idle", + "_uuid": "49fe53c86777c9b62b94a5b559ce15006368458d" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EmbarkedSurvivedPclassFareAgeSex
0C0.5535711.85185262.33626732.3321700.418519
1Q0.3896102.89430912.40901228.6300000.487805
2S0.3390092.34497827.51248529.2981510.319869
\n", + "
" + ], + "text/plain": [ + " Embarked Survived Pclass Fare Age Sex\n", + "0 C 0.553571 1.851852 62.336267 32.332170 0.418519\n", + "1 Q 0.389610 2.894309 12.409012 28.630000 0.487805\n", + "2 S 0.339009 2.344978 27.512485 29.298151 0.319869" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the correlation between Embarked and Survived as well as some other features\n", + "df[['Embarked', 'Survived','Pclass','Fare', 'Age', 'Sex']].groupby(['Embarked'], as_index=False).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "_cell_guid": "2a43f956-9690-4869-bd67-442c338bc57a", + "_execution_state": "idle", + "_uuid": "1426a18aa9b7a7dd0f18b260ec13f2d245f3a5a4" + }, + "outputs": [], + "source": [ + "df = df.drop(labels='Embarked', axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "_cell_guid": "c8fd62bb-30ea-4675-a593-771c6b8c8333", + "_execution_state": "idle", + "_uuid": "0972d4354355d154e63aa5ec97565260898749f5" + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA1gAAADQCAYAAAAalMCAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3X20XXV95/H3R8BKUQtooCmQBl2AIlNA70KQqUUQi8gyOEULOppWpmlX1SraKWi7lp3p2MHVFqS2aqNQ4pSCD2BhrIOkFKS2igaMPEUBkdFAJLFCxYepgt/54+zoJbknufeeh30e3q+1zjp7//Y+53x37pcf53t+e/92qgpJkiRJUu8e13YAkiRJkjQpLLAkSZIkqU8ssCRJkiSpTyywJEmSJKlPLLAkSZIkqU8ssCRJkiSpTyywFijJU5Ksbx7fSHLfrPV/afZZnuSVs15zXJKPDzCmi5N8L8mTZrVdkKSSPHVQn6vRYm5qVI1rbm6NTZNlXPNxge/3a0l+rr9RalhGNEeT5A+S3JXkziTXJXnWrO1vm7W8PMltg4plHFhgLVBV/WtVHVFVRwDvA87ful5Vz2t2Ww68suubDMbdwAqAJI8DXgDct+1OzX8g/t0nkLmpUTWuuTkrNk2Qcc3HBfo1YEEFVpJdF/lZ6rMRzdHXAc8DDq+qg4H/CVyV5AnN9rd1feUCTUIu+mWmj5J8p1k8F/jF5peGs7bZZ48kFyX5fJIvJFnRp4+/FPjVZvk44J+BR5rPXJ5kQ5L3ADcDB/TpMzUmxik3m19yb0ty67YxavKMam7Oji3J0iQ3NLHdluQXk+xirk6eEc/Hv0tyU5Lbk6xq2rbLwySnATPAJU38uyd5TpJPNa//ZJKlzeuvT/LHST4FvLFPx6EBajFHzwbeUFXfA6iqa4B/AV6V5Fxg9yaWS5r9d0ny/iZfr0myexPb05Nc3eTiPyV5RtN+cZLzklwHvDPJL+Uno3ZfyKzR3XEw9hXiiDoH+N2qOgU6w7aztv0+8I9V9dokewKfS/IPVfXdrTs0SfRPXd77lVV1xxztdwErkuwFnAH8DfDiWdsPAX69qn57sQeliTDSuZnkOcB+VXVY83l7LuooNY5GMTd//Hrgk1X1jiS7AD8NHIG5OslGMR9fW1Xfar6ofj7J5XRGMR6Th1X1UJLXN/GvS7Ib8G5gRVVtSfKrwDuA1zbvu2dV/dK8/2U0KoaWo0meDOxRVV/ZZr91wLOq6s1JXt+MuJFkOXAQcEZV/UaSDwO/QienVwO/VVV3JXku8B7g+Ob9DgZeWFWPJvnfwOuq6p+TPBH4fwv752mXBdbwvQh4aZLfbdafACwDNmzdoaoepvM/74W6AjgdeC7wm9ts+79V9dlFvKemxyjk5j3A05K8G/h74JpFfJYmT1u5udXngYuaL6p/V1Xrk5ir06utfPydJC9rlg+g8wX2y+w8Dw8BDgPWJgHYBdg0a/uHFhGnRtsgc3S2ANVl21eran2zfBOwvCmUngd8pMlFgJ+a9ZqPVNWjzfI/A+c1I2JXVNXGHmMdKgus4QvwK1X15a47LO6XL4DL6JxmtaaqfjQreQG+O/dLpB9rPTer6sEkhwO/TOd871fwk19ZNb3ayk0AquqGJM8HXgL8ryR/UlUfNFen1tDzsRmdeCFwTFV9L8n1wBPm2WcGuL2qjunymX4/mDx9zdGq+naS7yZ5WlXdM2u/ZwOf6vIe/z5r+VFgdzqXJj20daRrDrO/D5yb5O+Bk4HPJnlhVX2p2/GMGguswXgY6Hau6CeBNyR5Q1VVkiOr6guzd1jsrwpV9bUkvw/8w4Ij1rQY6dxMZ7asH1TV5Um+Aly80M/S2BrZ3Ezy88B9VfX+JHsAz07yCczVSTZq+fgzwINNcfUM4GjYYZ85O/4vA0uSHFNVn2lGYg+uqtsXGp9GyrBz9E+AP0/y8qr6fpIXAv+Rn4y0/jDJblX1w25v0BRqX23e4yPp/ILwC1X1xW33TfL0qroVuDXJMcAzAAusKXcL8EiSL9Lp7GYn9R8B7wJuaRLrXuCUfn1wVf1Vv95LE2nUc3M/4K/zk9kE39qvz9fIG+XcPA74r0l+CHwHeA3m6qQbtXy8GvitJLfQKZi2nlbdLQ8vBt6X5PvAMcBpdL4c/wyd737vAiywxtuwc/TdwF50Cp5HgW/Qua7v+8321c3n3UznGrBuXgW8N8kfALvRGbXdrsAC3pTkBXRGv+4A/k+P8Q9VqrqdOilJkiRJWginaZckSZKkPtlpgZXOPPqbM+uOzEn+JMmXktyS5GOzp6dN8tYkdyf5cpJfHlTgkiRJkjRq5jOCdTFw0jZta4HDquoXgDtpzvlNciid6UWf1bzmPencM0SSJEmSJt5OC6yqugH41jZt11TV1ruLfxbYv1leAVxWVf9eVV8F7gaO6mO8kiRJkjSy+nEN1mv5ycwe+wFfn7VtY9O2nSSrkqxLsu5Zz3pW0blRmQ8f2z5GxkknndT2v4WP0X2MBHPUxw4eI8Ec9bGTx0gwT33s4DEvPRVYzb0aHgEu2do0x25zBlNVq6tqpqpmdt99917CkIbim9/8ZtshaEIluTfJrUnWJ1nXtO2dZG2Su5rnvXb2PuaoRp05qnFgnqpXiy6wkqykM6f+q+onc71vBA6Ytdv+wP2LD0+SpsYLquqIqppp1s8Brq2qg4Brm3VJkjTiFlVgJTkJOBt4aVV9b9amq4DTk/xUkgOBg4DP9R6mJE2dFcCaZnkNcGqLsUiSpHmazzTtlwKfAQ5JsjHJmcBfAE8C1jantLwPoKpuBz5M547LVwOvq6pHBxa9JE2GAq5JclOSVU3bvlW1CaB53qe16CRJ0rzturMdquqMOZov3MH+7wDe0UtQkjRljq2q+5PsQ+eHqy/N94VNQbYKYNmyZYOKT5IkzVM/ZhGUJPWgqu5vnjcDH6Nze4sHkiwFaJ43d3ntjycMWrJkybBCliRJXVhgSVKLkuyR5Elbl4EXAbfRuaZ1ZbPbSuDKdiKUJEkLsdNTBCfd+WvvnLP9rBMPHnIk0sJ0y10wf8fMvsDHkkCnT/7bqro6yeeBDzfXvX4NeHmLMe6Ufal6ZQ5Jo8fvGosz9QWWJLWpqu4BDp+j/V+BE4YfkSRJ6oUFVhf+kiZJkiRpobwGS5IkSZL6xAJLkiRJkvrEAkuSJEmS+sQCS5IkSZL6xAJLEy3JAUmuS7Ihye1J3ti0/2GS+5Ksbx4ntx2rJI0i+1FJWhhnEdSkewR4S1Xd3NzM9aYka5tt51fVn7YYmySNA/tRSVoACyxNtKraBGxqlh9OsgHYr92oJGl82I9K0sJ4iqCmRpLlwJHAjU3T65PckuSiJHt1ec2qJOuSrNuyZcuQIpWk0WQ/Kkk7Z4GlqZDkicDlwJuq6tvAe4GnA0fQ+WX2z+Z6XVWtrqqZqppZsmTJ0OKVpFFjPypJ82OBpYmXZDc6XwouqaorAKrqgap6tKp+BLwfOKrNGCVplNmPStL8WWBpoiUJcCGwoarOm9W+dNZuLwNuG3ZskjQO7EclaWF2OslFkouAU4DNVXVY07Y38CFgOXAv8IqqerDphC8ATga+B/xaVd08mNCleTkWeDVwa5L1TdvbgDOSHAEUnRz+zXbCk6SRZz8qSQswn1kELwb+AvjgrLZzgGur6twk5zTrZwMvBg5qHs+lc372c/sZsLQQVfVpIHNs+sSwYxlH56+9c872s048eMiRSGqL/agkLcxOTxGsqhuAb23TvAJY0yyvAU6d1f7B6vgssOc2pxBIkiRJ0sRa7DVY+zb3xdh6f4x9mvb9gK/P2m8jXe6V4bStkiRJGrQkeyb5aJIvJdmQ5Ji2Y9Jk6/ckF3OdQlBz7ei0rZIkSRqCC4Crq+oZwOHAhpbj0YRbbIH1wNZT/5rnzU37RuCAWfvtD9y/+PAkSZKkxUnyZOD5dGbCpKp+UFUPtRuVJt1iC6yrgJXN8krgylntr0nH0cC/bT2VUJIkSRqypwFbgL9O8oUkH0iyx7Y7eemK+mmnBVaSS4HPAIck2ZjkTOBc4MQkdwEnNuvQmVHoHuBuOjcd/O2BRC1JkiTt3K7As4H3VtWRwHfpzH79GF66on7a6TTtVXVGl00nzLFvAa/rNShJmjZJdgHWAfdV1SlJDgQuA/YGbgZeXVU/aDNGSRpDG4GNVXVjs/5R5iiwpH7q9yQXkqTFeSOPvfD6ncD5VXUQ8CBwZitRSdIYq6pvAF9PckjTdAJwR4shaQpYYElSy5LsD7wE+ECzHuB4Or+0wmPvNyhJWpg3AJckuQU4AvjjluPRhNvpKYKSpIF7F/B7wJOa9acAD1XVI81613sKSpJ2rKrWAzNtx6HpYYElTZHz1945Z/tZJx485Ei0VZJTgM1VdVOS47Y2z7HrnPcUTLIKWAWwbNmygcQoSZLmz1MEJaldxwIvTXIvnUktjqczorVnkq0/gnW9p6AzX0mSNFossCSpRVX11qrav6qWA6cD/1hVrwKuA05rdpt9v0FJkjTCLLAkaTSdDbw5yd10rsm6sOV4JEnSPHgNliSNiKq6Hri+Wb4HOKrNeCRJ0sJZYEmS5q3bRCmSJKnDUwQlSZIkqU8ssDTRkhyQ5LokG5LcnuSNTfveSdYmuat53qvtWCVpFNmPStLCWGBp0j0CvKWqngkcDbwuyaHAOcC1VXUQcG2zLknanv2oJC2ABZYmWlVtqqqbm+WHgQ3AfsAKYE2z2xrg1HYilKTRZj8qSQvjJBcL1O0C77NOPHjIkWihkiwHjgRuBPatqk3Q+fKQZJ8ur1kFrAJYtmzZcALdhpMKSBoV49qPStIwOYKlqZDkicDlwJuq6tvzfV1Vra6qmaqaWbJkyeAClKQRZz8qSfNjgaWJl2Q3Ol8KLqmqK5rmB5IsbbYvBTa3FZ8kjTr7UUmav54KrCRnNTMK3Zbk0iRPSHJgkhubWYU+lOTx/QpWWqgkAS4ENlTVebM2XQWsbJZXAlcOOzZJGgf2o5K0MIsusJLsB/wOMFNVhwG7AKcD7wTOb2YVehA4sx+BSot0LPBq4Pgk65vHycC5wIlJ7gJObNYlSduzH5WkBeh1kotdgd2T/BD4aWATcDzwymb7GuAPgff2+DnSolTVp4F02XzCMGORpHFkPypJC7PoAquq7kvyp8DXgO8D1wA3AQ9V1SPNbhvpTOW6HWcVkgbHmQclSZLa0cspgnvRuQfGgcDPAXsAL55j15rr9c4qJEmSJGnS9DLJxQuBr1bVlqr6IXAF8DxgzyRbR8b2B+7vMUZJkiRJGgu9FFhfA45O8tPNDEMnAHcA1wGnNfs4q5AkSZKkqbHoAquqbgQ+CtwM3Nq812rgbODNSe4GnkJnaldJkiRJmng9zSJYVW8H3r5N8z3AUb28ryRJkiSNo55uNCxJkiRJ+gkLLEmSJEnqEwssSZIkSeoTCyxJalGSJyT5XJIvJrk9yX9r2g9McmOSu5J8KMnj245VkiTtnAWWJLXr34Hjq+pw4AjgpCRHA+8Ezq+qg4AHgTNbjFGSJM2TBZYktag6vtOs7tY8Cjiezq0wANYAp7YQniRJWiALLElqWZJdkqwHNgNrga8AD1XVI80uG4H92opPkiTNnwWWJLWsqh6tqiOA/encR/CZc+0212uTrEqyLsm6LVu2DDJMSZI0DxZYkjQiquoh4HrgaGDPJFtvBr8/cH+X16yuqpmqmlmyZMlwApUkSV3tuvNdJEmDkmQJ8MOqeijJ7sAL6UxwcR1wGnAZsBK4sr0opfacv/bOOdvPOvHgIUeicZZkF2AdcF9VndJ2PJpsFliS1K6lwJrmf/6PAz5cVR9PcgdwWZL/AXwBuLDNICVpzL0R2AA8ue1ANPkssCSpRVV1C3DkHO330LkeS5LUgyT7Ay8B3gG8ueVwNAUssDTRklwEnAJsrqrDmrY/BH4D2DojwNuq6hPtRChJo8++VGPuXcDvAU/qtkOSVcAqgGXLlg0prOHrdsqt+stJLjTpLgZOmqP9/Ko6onn4hUCSduxi7Es1hpJs/WHgph3t54RB6qepGcGyYp9OVXVDkuVtxyFJ48y+VGPsWOClSU4GngA8OcnfVNV/bjkuTbCpKbCkbbw+yWvozCj0lqp6cK6dPGVAknZop33ptPSjGk1V9VbgrQBJjgN+1+JKg9bTKYJJ9kzy0SRfSrIhyTFJ9k6yNsldzfNe/QpW6pP3Ak8HjgA2AX/WbUdPGZCkrubVl9qPSpo2vV6DdQFwdVU9AziczvSX5wDXVtVBwLXNujQyquqBqnq0qn4EvB9napOkBbMv1bipquu9B5aGYdEFVpInA8+nuTdLVf2gqh4CVgBrmt3WAKf2GqTUT0mWzlp9GXBbW7FI0riyL5WkufVyDdbT6EzN+tdJDgduonMTt32rahNAVW1Kss9cL/acbA1DkkuB44CnJtkIvB04LskRQAH3Ar/ZWoCSNAbsSyVp/nopsHYFng28oapuTHIBCzgdsKpWA6sBZmZmqoc4pK6q6ow5mi8ceiDz4EQTkkbVOPWlktS2XgqsjcDGqrqxWf8onQLrgSRLm9GrpcDmXoMcBzv6cnzWiQcPMRJJGn3d+kz7S0nSuFv0NVhV9Q3g60kOaZpOAO4ArgJWNm0rgSt7ilCSJEmSxkSv98F6A3BJkscD9wC/Tqdo+3CSM4GvAS/v8TMkSZIkaSz0VGBV1XpgZo5NJ/TyvpKk/vF0PEmShqfX+2BJkiRJkhoWWJIkSZLUJxZYkiRJktQnFliSJEmS1CcWWJIkSZLUJxZYkiRJktQnvd4HS9IUctpvSZKkuTmCJUmSJEl94giWJLUoyQHAB4GfBX4ErK6qC5LsDXwIWA7cC7yiqh5sK05J0ujxjJLR5AiWJLXrEeAtVfVM4GjgdUkOBc4Brq2qg4Brm3VJkjTiLLAkqUVVtamqbm6WHwY2APsBK4A1zW5rgFPbiVCSJC2EpwgOgcO3kuYjyXLgSOBGYN+q2gSdIizJPl1eswpYBbBs2bLhBCpJkrpyBEuSRkCSJwKXA2+qqm/P93VVtbqqZqpqZsmSJYMLUJIkzYsFliZakouSbE5y26y2vZOsTXJX87xXmzFKSXajU1xdUlVXNM0PJFnabF8KbG4rPsm+VJLmz1MENekuBv6CzixtW22dPODcJOc062e3EJtEkgAXAhuq6rxZm64CVgLnNs9XthBez7qdIq2xczH2pZI0L45gaaJV1Q3At7ZpdvIAjZJjgVcDxydZ3zxOplNYnZjkLuDEZl1qhX2pJM1fzyNYSXYB1gH3VdUpSQ4ELgP2Bm4GXl1VP+j1c6Q+mtfkAeAEAhq8qvo0kC6bTxhmLNICtToRixNISRpV/ThF8I10phV+crP+TuD8qrosyfuAM4H39uFzpKGrqtXAaoCZmZlqORxp4vmlefLYj0qaNj2dIphkf+AlwAea9QDHAx9tdvGUAY0iJw+QpN7Zl0rSHHq9ButdwO8BP2rWnwI8VFWPNOsb6dwwcztJViVZl2Tdli1begxDWpCtkwfAGE8eIEktsy+VpDksusBKcgqwuapumt08x65zng7gvVs0DEkuBT4DHJJkY5IzcfIASVoQ+1JJmr9ersE6FnhpM9vVE+hcg/UuYM8kuzajWPsD9/ceprQ4VXVGl01OHjAAi5mS22trpNFnXypJ87foEayqemtV7V9Vy4HTgX+sqlcB1wGnNbt5yoAkSZKkqTGI+2CdDbw5yd10rsm6cACfIUmSJEkjpx/TtFNV1wPXN8v3AEf1430nndMRS5IkSZOlLwWWJGn8+COPpEmX5ADgg8DP0pn1enVVXdBuVJp0FliSJEmaVI8Ab6mqm5M8CbgpydqquqPtwDS5BnENliRJktS6qtpUVTc3yw8DG+hyj1apXxzBkiRJ0sRLshw4Erhxjm2rgFUAy5YtG2pc87HQ26As5rYpC+Vp5t05giVJkqSJluSJwOXAm6rq29tur6rVVTVTVTNLliwZfoCaKBZYkiRJmlhJdqNTXF1SVVe0HY8mnwWWJEmSJlKS0Lkn64aqOq/teDQdLLAkSZI0qY4FXg0cn2R98zi57aA02ZzkQpIkSROpqj4NpO04NF0cwZIkSZKkPrHAkiRJkqQ+scCSJEmSpD7xGixJalGSi4BTgM1VdVjTtjfwIWA5cC/wiqp6cFgxDeMGlZIkTSpHsCSpXRcDJ23Tdg5wbVUdBFzbrEuSpDHgCJamVpJ7gYeBR4FHqmqm3Yg0jarqhiTLt2leARzXLK8BrgfOHlpQ0gLYl0rSY1lgadq9oKq+2XYQ0jb2rapNAFW1Kck+3XZMsgpYBbBs2bIhhSdtx75UkhqLPkUwyQFJrkuyIcntSd7YtO+dZG2Su5rnvfoXriRptqpaXVUzVTWzZMmStsORJGnq9XIN1iPAW6rqmcDRwOuSHIrXDmh8FHBNkpuaUYDtJFmVZF2SdVu2bBlyeJpiDyRZCtA8b245HmlHdtiX2o9KmjaLLrCqalNV3dwsPwxsAPajc+3Amma3NcCpvQYpDcixVfVs4MV0fiB4/rY7ODqgllwFrGyWVwJXthiLtDM77EvtRyVNm75cg9VcoH0kcCPzvHbA6wa66zZF8lknHjzkSCZbVd3fPG9O8jHgKOCGdqPStElyKZ0JLZ6aZCPwduBc4MNJzgS+Bry8vQilHbMvlaTH6rnASvJE4HLgTVX17STzel1VrQZWA8zMzFSvcUgLkWQP4HFV9XCz/CLgv7cclqZQVZ3RZdMJQw1EWgT7UknaXk8FVpLd6BRXl1TVFU3zA0mWNqNXXjugUbUv8LHmB4Fdgb+tqqvbDUmSxo59qSRtY9EFVjq96YXAhqo6b9amrdcOnIvXDmhEVdU9wOFtxyFJ48y+VJK218sI1rHAq4Fbk6xv2t6G1w5IkiRJC9btOvxxstC5BCZx7oFFF1hV9Wmg2wVXXjswRJOYmJIkSdI46uU+WJIkSZKkWfoyTbuGYxKGjSVJkqRJ5giWJEmSJPWJBZYkSZIk9YkFliRJkiT1iQWWJEmSJPWJBZYkSZIk9YmzCEqSRt5i7vfnPQIlSW1wBEuSJEmS+sQCS5IkSZL6xAJLkiRJkvrEAkuSJEmS+sQCS5IkSZL6ZOJmEew2a9Q0Wsy/hbNrSRoni+nnnF1QkjRIE1dgSaPOHwF645djSZI0yjxFUJIkSZL6ZGAjWElOAi4AdgE+UFXnDuqzpMUwRzXqzNHhcnR0ccxTjTpzVMM2kBGsJLsAfwm8GDgUOCPJoYP4LGkxzFGNOnNU48A81agzR9WGQY1gHQXcXVX3ACS5DFgB3DGgz9OATeAvu+aoRp05qnFgnmrUmaMaukEVWPsBX5+1vhF47uwdkqwCVjWr30ny5Tne56nANwcS4Whr7bjfPOD9d6DbMV9dVSf172N+bKc5CubpDvTtmPuVQ33MxR0ZZp6ao73ry3EPKbf6ZeT60nnmKPj3mhY7Omb70tHT2v/vW/5vuae+dFAFVuZoq8esVK0GVu/wTZJ1VTXTz8DGwTQedwvHvNMcBfO0m2k8Zhj6cZujPZrG4x7FvnQ+OQr+vabFKOYo2Jd2M43HDL0f96BmEdwIHDBrfX/g/gF9lrQY5qhGnTmqcWCeatSZoxq6QRVYnwcOSnJgkscDpwNXDeizpMUwRzXqzFGNA/NUo84c1dAN5BTBqnokyeuBT9KZEvOiqrp9EW+101MKJtQ0HvdQj7mPOQr+vabJ0I7bHO2LaTxu+9Lx4jEPmDnas2k8ZujxuFO13WmokiRJkqRFGNQpgpIkSZI0dSywJEmSJKlPRrbASnJSki8nuTvJOW3HMyhJ7k1ya5L1SdY1bXsnWZvkruZ5r7bj7FWSi5JsTnLbrLY5jzMdf9787W9J8uz2Iu9uWnIUpiNPJzFHYXrydBpyFCYzT6clR2E68tQcHW/TkKMw+DwdyQIryS7AXwIvBg4FzkhyaLtRDdQLquqIWfPtnwNcW1UHAdc26+PuYmDbG7N1O84XAwc1j1XAe4cU47xNYY7C5OfpxUxQjsJU5umk5yhMWJ5OYY7C5OfpxZij427ScxQGnKcjWWABRwF3V9U9VfUD4DJgRcsxDdMKYE2zvAY4tcVY+qKqbgC+tU1zt+NcAXywOj4L7Jlk6XAinbdpz1GYsDydwBwF83SichQmMk+nPUdhwvLUHJ1IE5WjMPg8HdUCaz/g67PWNzZtk6iAa5LclGRV07ZvVW0CaJ73aS26wep2nOPw9x+HGPtpWvN0nHMUxifOfpjWHIXxztNxiLGfpjVPzdHxMa05Cn3M04HcB6sPMkfbpM4nf2xV3Z9kH2Btki+1HdAIGIe//zjE2E/m6WONy99/XOLsB3N0e+Pw9x+HGPvJPH2scfj7j0OM/WSObm/BOTCqI1gbgQNmre8P3N9SLANVVfc3z5uBj9EZin5g69Bj87y5vQgHqttxjsPffxxi7JspztNxzlEYnzh7NsU5CuOdp+MQY99McZ6ao2NiinMU+pino1pgfR44KMmBSR4PnA5c1XJMfZdkjyRP2roMvAi4jc6xrmx2Wwlc2U6EA9ftOK8CXtPM2nI08G9bh2xHyFTkKEx9no5zjsKU5OmU5yiMd55ORY7C1OepOToGpjxHoZ95WlUj+QBOBu4EvgL8ftvxDOgYnwZ8sXncvvU4gafQmb3kruZ577Zj7cOxXgpsAn5I55eAM7sdJ52h2L9s/va3AjNtx9/lmCY+R5vjnIo8ncQcbWKd+Dydlhxtjmni8nQacrQ5zqnIU3N0fB/TkqPNMQ00T9O8UJIkSZLUo1E9RVCSJEmSxo4FliRJkiT1iQWWJEmSJPWJBZYkSZIk9YkFliRJkiT1iQVWC5K8LEkleUbbsUjdmKcadeaoRp05qnFgnvafBVY7zgA+TedmddKoMk816sxRjTpzVOPAPO0zC6whS/JE4Fg6NzQ7vWl7XJL3JLk9yceTfCLJac225yT5VJKbknwyydIWw9eUME816sxRjTpzVOPAPB0MC6zhOxW4uqruBL6V5NnAfwKWA/8B+C/AMQBJdgPeDZxWVc8BLgLe0UbQmjrmqUadOapRZ45qHJinA7Br2wFMoTOAdzXLlzXruwEfqar2rX05AAABPklEQVQfAd9Icl2z/RDgMGBtEoBdgE3DDVdTyjzVqDNHNerMUY0D83QALLCGKMlTgOOBw5IUncQs4GPdXgLcXlXHDClEyTzVyDNHNerMUY0D83RwPEVwuE4DPlhVP19Vy6vqAOCrwDeBX2nOed0XOK7Z/8vAkiQ/HppN8qw2AtdUMU816sxRjTpzVOPAPB0QC6zhOoPtfxW4HPg5YCNwG/BXwI3Av1XVD+gk/zuTfBFYDzxveOFqSpmnGnXmqEadOapxYJ4OSKqq7RhEZxaXqvpOM1z7OeDYqvpG23FJs5mnGnXmqEadOapxYJ72xmuwRsfHk+wJPB74I5NYI8o81agzRzXqzFGNA/O0B45gSZIkSVKfeA2WJEmSJPWJBZYkSZIk9YkFliRJkiT1iQWWJEmSJPWJBZYkSZIk9cn/B2h/ie+85Tu9AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# visualize the correlation between Title and Age\n", + "grid = sns.FacetGrid(df, col='Title', size=3, aspect=0.8, sharey=False)\n", + "grid.map(plt.hist, 'Age', alpha=.5, bins=range(0,105,5))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": { + "_cell_guid": "770f5c78-948f-4261-ae6f-dda0a05da8e6", + "_execution_state": "idle", + "_uuid": "9b312ecc51104501196e3f6806abc92d29e9eb46" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Age
Title
Master5.482642
Miss21.795236
Mr32.252151
Mrs36.930636
Others45.074074
\n", + "
" + ], + "text/plain": [ + " Age\n", + "Title \n", + "Master 5.482642\n", + "Miss 21.795236\n", + "Mr 32.252151\n", + "Mrs 36.930636\n", + "Others 45.074074" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the mean Age for each Title\n", + "df[['Title', 'Age']].groupby(['Title']).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": { + "_cell_guid": "b81954da-2e36-410f-9505-c6b7ae26803f", + "_execution_state": "idle", + "_uuid": "ba25e608acce46fba9b0c623f8b62b91f29151a7" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Age
Title
Master4.161554
Miss12.192794
Mr12.422089
Mrs12.872625
Others11.303253
\n", + "
" + ], + "text/plain": [ + " Age\n", + "Title \n", + "Master 4.161554\n", + "Miss 12.192794\n", + "Mr 12.422089\n", + "Mrs 12.872625\n", + "Others 11.303253" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the standard deviation of Age for each Title\n", + "df[['Title', 'Age']].groupby(['Title']).std()" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": { + "_cell_guid": "38208976-cd0d-4756-aebc-5d980d7357a5", + "_execution_state": "idle", + "_uuid": "3d9ea40ed50748bfac7990788916d49eb3886174" + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA1gAAADQCAYAAAAalMCAAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3X+0ZXV53/H3R8BIRIuYgUyEyZAWDWor6l0UJXEhBIvGFbBFK7UGG9JJV0yraBLRZi1tNWthfoimJtaJUsYuFRClUGKNsyYQwqqiAyI/JIoSlo4zYUaFCEmqAZ7+sffVy8y9c+/ce87ZZ+/zfq211zl7n73vefa9z+w5z/l+9/ebqkKSJEmStHaP6ToASZIkSRoKCyxJkiRJGhELLEmSJEkaEQssSZIkSRoRCyxJkiRJGhELLEmSJEkaEQssIMnDSW5ZsGwcw3u8Jsl7l3jt/476/fb6+Z9Kcn+Sa8b5Phq/IedqkhOSfCbJHUluTfKvx/VeGq+B5+lPJrmpPa87kvyHcb2Xxm/IubrgPZ6Y5JtLxaB+GHqu7nV+V4/zvSbh4K4DmBJ/X1UnHOhBSQ6qqofX+uZV9fy1/oxl/C7wo8CvjPl9NH5DztW/A36xqu5K8hPATUn+tKruH+N7ajyGnKe7gOdX1feSHAbcnuTqqto5xvfU+Aw5V+e9HfjzCbyPxmvoubqq85tWtmAtIcnGJH+R5OZ2eX67/ZQk1yb5CHBbu+3fJvlcW3W/P8lBS/zYY9rWpC8neeuC93pwwc++LskVSf4yyYeTZK3nUlXbgAfW+nM0nYaSq1X1laq6q32+E9gNrFvLz9T0GFCefr+qvteu/gj+Pzo4Q8nV9uc+FzgK+PRaf5amz5BydXCqauYX4GHglna5st32o8Dj2ufHAdvb56cAfwsc264fD/xv4JB2/Y9ovoXf+z1eQ/PN55OBQ4Hbgbn2tQcX/Oy/AY6m+U/7M8DPLPKzfmNBvAuXP9jPOZ4CXNP179rFXF0uV9vjTgTuBB7T9e/cxTxdZP9jgFtpWl1f2/Xv28VcXSxX259zXZuvrwHe2/Xv28Vc3c919SFgO/BZ4Kyuf99rXewi2FisWfIQ4L1JTqBJ6qcueO1zVfVX7fPTgOcCn28L+ENpvnlfzNaq+jZAkk8AP0OTTAt9rqp2tPvcAmwEbli4Q1X9Lk23P82ewedqkvXA/wTOrapHDuRYTY1B52lVfQP4Z2m6sv6vJFdU1b0rPV5TZci5+qvAJ6vqGzYwDMKQcxVgQ1XtTPJTwJ8lua2qvnYAx08VC6ylnQ/cCzyLpkL/fwte+9sFzwNsqao3Lzw4ycuA+abVX24fa6/32Hsd4HsLnj/MIn+jJL8BvGqRY6+vqv+0yHYN22ByNckTgT8BfquqPrvIceqvweTpD96s+TBwB/CzwBVL7afeGUquPg/42SS/ChwGPDbJg1V1wSLHq5+GkqtUex9rVd2d5Drg2YAF1gD9I2BHVT2S5Fxgqb6q24CrklxUVbuTHAE8oaquBK6c3ynJM4HT29f/HjgL+KXVBGYLlvYyiFxN8tg2jg9V1cdW836aakPJ06OBb1fV3yd5EnAy8K7VvK+m1iBytap+8OE2yWtounpZXA3LIHK1vZb+XTWDB/0YzXX1d1bzvtPCm3OX9kfAuUk+S9Pk+reL7VRVXwJ+C/h0kluBrcD6JX7mDTRdn24BPl5Veze5jkWSvwA+BpyWZEeSfzGJ99XEDCVXXwG8AHhNfjhU62BGFNJg8vR44MYkX6QZme33quq2CbyvJmcouarhG0quHg9sb6+r1wIXtjH3VqoWa/mTJEmSJB0oW7AkSZIkaURWVGAlOXzBePd3JnlekiOSbE1yV/v4pHEHK0lDkeSgJF9Ick27fmySG9tr6mXtPWmSJKlnVtqC9R7gU1X10zQjldwJXABsq6rjaG6e88ZJSVq519FcS+e9E7iovabeB5zXSVSSJGlNli2w2mGTXwB8EH4wi/39wJnAlna3LTQjjUiSltGORPfzwAfa9QCn8sOhvr2mSpLUUytpwfopYA/wP9ruLB9I8njgqKraBdA+HrnYwUk2JdmeZPsznvGMohlP38VlsWUqnHHGGV3/Hlymf1mrdwO/CcxPpPxk4P6qeqhd3wE8ZbEDvaa6HOAyFbyuuiyzTAXz1GUFy4qspMA6GHgO8L6qejbNEJAr7g5YVZuraq6q5g499NCVHiZ15lvf+lbXIWjAkrwU2F1VNy3cvMiui17Ivaaqj7yuqg/MU43KSgqsHTSTmN3Yrl9BU3Ddm2Q9QPu4ezwhStKgnAz8QpJ7gEtpuga+Gzg8yfzk70cDO7sJT5IkrcWyBVZV/TXwjSRPazedBnwJuBo4t912LnDVWCKUpAGpqjdX1dFVtRF4JfBnVfUqmskVz25385oqSVJPHbz8LgD8R+DD7bDBdwP/jqY4uzzJecDXgZePJ0RJmglvAi5N8g7gC7QDC0mSpH5ZUYFVVbcAc4u8dNpow5Gk2VFV1wHXtc/vBk7sMh5JkrR2K50HS5IkSZK0jJV2EZR6oR044AHgYeChqppLcgRwGbARuAd4RVXd11WMkiRJGq6ZKbAu2vqVRbeff/pTJxyJJuCFVbVwrNULgG1VdWGSC9r1N3UT2vLMVQ2Vua1pYj5qqMzt7tlFULPgTGBL+3wLcFaHsUhSLyR5XJLPJflikjuS/Jd2+7FJbkxyV5LL2gGwJEktCywNTQGfTnJTkk3ttqOqahdA+3jk3gcl2ZRke5Lte/bsmWC4kjS1vgecWlXPAk4AzkhyEvBO4KKqOg64DzivwxglaepYYGloTq6q5wAvBl6b5AUrOaiqNlfVXFXNrVu3brwRSlIPVOPBdvWQdimaybGvaLfbK0CS9mKBpUGpqp3t427gSpphr+9Nsh6gfdzdXYSS1B9JDkpyC811cyvwNeD+qnqo3WUH8JQljrVngKSZZIGlwUjy+CRPmH8OvAi4HbgaOLfd7Vzgqm4ilKR+qaqHq+oE4GiaL6yOX2y3JY61Z4CkmTQzowhqJhwFXJkEmtz+SFV9KsnngcuTnAd8HXh5hzFKUu9U1f1JrgNOAg5PcnDbinU0sLPT4CRpylhgaTCq6m7gWYts/zZw2uQjkqT+SrIO+Ie2uDoU+DmaAS6uBc4GLsVeAZK0DwssSZK0mPXAliQH0dxScHlVXZPkS8ClSd4BfAH4YJdBSkPh/FXDYYElSZL2UVW3As9eZPvdNPdjSVMhyeOA64Efoflse0VVvTXJsTQtrUcANwOvrqrvdxepZoWDXEiSJKnPnLNNU8UCS5IkSb3lnG2aNhZYkiRJ6rXVztnmfG0aBwssSZIk9dpq52xzvjaNw4oGuUhyD/AA8DDwUFXNJTkCuAzYCNwDvKKq7htPmJIkSdL+OWebpsGBtGC9sKpOqKq5dv0CYFt74+C2dl2SJEmamCTrkhzePp+fs+1OfjhnGzhnmyZoLV0Ez6S5YRC8cVCSJEndWA9cm+RW4PPA1qq6BngT8IYkXwWejHO2aUJWOg9WAZ9OUsD7q2ozcFRV7QKoql1JjlzswCSbgE0AGzZsGEHIkiRJUsM52zRtVlpgnVxVO9siamuSv1zpG7TF2GaAubm5fW4ulCQNx0Vbv9J1CJIkdWpFBVZV7Wwfdye5kubbgHuTrG9br9bTDIspaUyW+uB6/ulPnXAkkiRJWsqy92AleXySJ8w/B14E3A5cTXPDIHjjoCRJkiStqAXrKODKJPP7f6SqPpXk88DlSc4Dvg68fHxhSpIkSRoHe8mM1rIFVnuD4LMW2f5t4LRxBCVJkiRJfbSWYdolSZIkSQtYYEmSpH0kOSbJtUnuTHJHkte129+W5JtJbmmXl3QdqyRNk5UO0y5JkmbLQ8Abq+rmdrCrm5JsbV+7qKp+r8PYJGlq2YIlSROU5HFJPpfki22rwH9ptx+b5MYkdyW5LMlju45Vs62qdlXVze3zB4A7gad0G5UkTT8LLA1KkoOSfCHJNe26H1o1bb4HnFpVzwJOAM5IchLwTppWgeOA+4DzOoxRepQkG4FnAze2m34tya1JLk7ypCWO2ZRke5Lte/bsmVCkktQ9CywNzetovmWd54dWTZVqPNiuHtIuBZwKXNFu3wKc1UF40j6SHAZ8HHh9VX0XeB/wj2m+INgF/P5ix1XV5qqaq6q5devWTSxeSeqaBZYGI8nRwM8DH2jXgx9aNYXaltZbgN3AVuBrwP1V9VC7yw6W6Iplq4AmKckhNMXVh6vqEwBVdW9VPVxVjwB/DJzYZYySNG0ssDQk7wZ+E3ikXX8yfmjVFGo/nJ4AHE3z4fT4xXZb4lhbBTQR7ZdUHwTurKp3Ldi+fsFuLwNun3RskjTNLLA0CEleCuyuqpsWbl5kVz+0ampU1f3AdcBJwOFJ5kd2PRrY2VVcUutk4NXAqXsNyf47SW5LcivwQuD8TqPUTHM6AU0jh2nXUJwM/EJ7AX0c8ESaFq3DkxzctmL5oVWdS7IO+Iequj/JocDP0dwreC1wNnApcC5wVXdRSlBVN7D4F1WfnHQs0n44nYCmjgWWBqGq3gy8GSDJKcCvV9WrknwMP7RquqwHtiQ5iKYXweVVdU2SLwGXJnkH8AWarllT66KtX+k6BEmiqnbRDLZCVT2QxOkE1DkLLA3dm+jRh1YNX1XdSjPc9d7b78bBAiRp1faaTuBkmukEfhHYTtPKdd8ix2wCNgFs2LBhYrFq2LwHS4NTVddV1Uvb53dX1YlV9U+q6uVV9b2u45MkSaPldAKaJrZgSZIktZbq/nr+6U+dcCRaqaWmE1jw+h8D13QUnmaQLViSJEnqJacT0DSyBUuSJEl9NT+dwG3tBO4AbwHOSXICzfQs9wC/0k14mkUrLrDaEa+2A9+sqpcmOZZmZLYjgJuBV1fV98cTpiRJkvRoTiegaXQgXQRfB9y5YP2dNPMLHAfcB5w3ysAkSZIkqW9W1IKV5Gjg54HfBt7Q9nc9Ffg37S5bgLfRjNgiSZK0LAeUkDREK23Bejfwm8Aj7fqTgfur6qF2fQdLTOqWZFOS7Um279mzZ03BSpIkSdI0W7YFK8lLgd1VdVOSU+Y3L7JrLXZ8VW0GNgPMzc0tuo8kSZKklVuqBVjdW0kXwZOBX0jyEuBxwBNpWrQOT3Jw24p1NLBzfGFKkiRJ0vRbtsCqqjcDbwZoW7B+vapeleRjwNk0IwmeC1w1xjglSZI6s7/WAu8Zk7TQWiYafhPNgBdfpbkn64OjCUmSJEmS+umAJhququuA69rndwMnjj4kafjsNy1p2iU5BvgQ8OM0g1xtrqr3JDkCuAzYSDOB6yuq6r6u4pSkabOWFixJkjRcDwFvrKrjgZOA1yZ5OnABsK2dB3Nbuy5JallgSZKkfVTVrqq6uX3+AHAnzZQsZ9LMf0n7eFY3EUrSdDqgLoKSJGn2JNkIPBu4ETiqqnZBU4QlOXKJYzYBmwA2bNgwmUCnjBMpS7PJFixJkrSkJIcBHwdeX1XfXelxVbW5quaqam7dunXjC1CSpowFliRJWlSSQ2iKqw9X1SfazfcmWd++vh7Y3VV8EjQDsiS5NsmdSe5I8rp2+xFJtia5q318UtexajZYYEmSpH0kCc0ULHdW1bsWvHQ1zfyX4DyYmg4OyKKpYoElSZIWczLwauDUJLe0y0uAC4HTk9wFnN6uS51xQBZNGwe50GAkeRxwPfAjNLl9RVW9NcmxwKXAEcDNwKur6vvdRSpJ06+qbgCyxMunTTKWaefchtNjNQOySKNmgaUh+R5walU92N43cEOS/wO8Abioqi5N8t+B84D3dRmoJEkarb0HZGl6uS57zERHu1xNMW4B3z92EdRgVOPBdvWQdingVOCKdrtdBCRJGpjVDsjiaJcaBwssDUqSg5LcQnMR3Qp8Dbi/qh5qd9lB0y977+M2JdmeZPuePXsmF7AkSVoTB2TRtLGLoAalqh4GTkhyOHAlcPxiuy1y3GZgM8Dc3Nw+r0t9sr/uJAc6waldUyT1wPyALLe1X7ICvIVmAJbLk5wHfB14eUfxacZYYGmQqur+JNfRDNd6eJKD21aso4GdnQYnSZJGxgFZxmepL9kO9Mu6WWMXQQ1GknVtyxVJDgV+jmao1muBs9vd7CIgSZKksZn5Fiwr80FZD2xJchDNlweXV9U1Sb4EXJrkHcAXaPppS5IkSSM38wWWhqOqbqWZ+2Lv7XcDJ04+IkmSJM2aZQssJ2+VpputsP2S5BjgQ8CPA48Am6vqPUmOAC4DNgL3AK+oqvu6ilPqktc1SX22knuw5idvfRZwAnBGkpOAd9JM3noccB/N5K2SpP17CHhjVR1PMwjLa5M8HbgA2NZeU7e165IkqWeWLbCcvFWSRqeqdlXVze3zB2gGYnkKcCbNtRS8pkqS1FsrGkVwtZO3tsc6gaskLSLJRpr7Bm8EjqqqXdAUYcCRSxzjNVWSpCm2okEuVjt5a3usE7hK0l6SHAZ8HHh9VX03WWoKl0fzmiodGCfLljRpBzQPVlXdD1zHgslb25ecvFWSVijJITTF1Yer6hPt5nuTrG9fX0/TY0CSJPXMsgWWk7dK0uikaar6IHBnVb1rwUtX01xLwWuqpkCSi5PsTnL7gm1vS/LNJLe0y0u6jFGSptFKugg6easkjc7JwKuB29p7WwHeAlwIXJ7kPODrwMs7im9qOFR35y4B3kszrcBCF1XV700+HEnqh2ULrL5N3mpfa0nTrKpuAJa64eq0ScYi7U9VXd8OxCJJOgAHdA+WJEmaeb+W5Na2C+GTltrJES8lzaoVjSIoSdK42POgV94HvJ1m5OC3A78P/NJiOzripSYlycXAS4HdVfXMdtvbgH8PzFf3b6mqT3YToWaNLViSJGlFqureqnq4qh4B/pgpvFVAM+kS4IxFtl9UVSe0i8WVJsYCS5Ikrcj8VAKtlwG3L7WvNClVdT3wna7jkOZZYEmSpH0k+SjwGeBpSXa0I1z+TpLbktwKvBA4v9Mgpf1b9n5B7xXUOHgPliRJ2kdVnbPIZqdkUV+s6H5B7xXUONiCJUmSpEHxfkF1yQJLkiRJg+L9guqSXQQlSZLUW+39gqcAP5ZkB/BW4JQkJ9B0EbwH+JXOApwSTokxORZYkiRJ6i3vF9S0sYugBiPJMUmuTXJnkjuSvK7dfkSSrUnuah8XHUlIkiRJWisLLA3JQ8Abq+p44CTgtUmeDlwAbKuq44Bt7bokSZI0chZYGoyq2lVVN7fPHwDuBJ4CnAlsaXfbApzVTYSSJEkaOgssDVKSjcCzgRuBo6pqFzRFGHDkIvs70aAkSZLWzEEuNDhJDgM+Dry+qr6bZNljnGhQkvqtTyOkLRXr+ac/dcKRSBoHW7A0KEkOoSmuPlxVn2g33zs/H0b7uLur+CRJkjRsyxZYjsymvkjTVPVB4M6qeteCl64Gzm2fnwtcNenYJEmSNBtW0kVwfmS2m5M8AbgpyVbgNTQjs12Y5AKakdneNL5QpWWdDLwauC3JLe22twAXApcnOQ/4OvDyjuKTJEnqPbu57t+yBVY7KMD8AAEPJFk4Mtsp7W5bgOuwwFKHquoGYKkbrk6bZCySJEmaTQc0yMX+RmZLss/IbO0xm4BNABs2bFhLrBNlZS5JkiTpQK14kIu9R2Zb6XFVtbmq5qpqbt26dauJUZIkTViSi5PsTnL7gm3efy1Jy1hRC9b+RmZrW68cmU2SpGG5BHgv8KEF2y7A+68lLcEeYI2VjCLoyGySJM2Yqroe+M5em8+kue+a9vGsiQYlLcLWVk2blbRgOTKbJEmCFd5/Df29B3ta2TKwX5dga6umyEpGEXRkNkmSdECqajOwGWBubq46DkcDVlXXtwOxLeRo1+rMige5kCRJM+/e9r5rvP9aU+5Rra3Akq2t0qhZYEmSpJXy/msNSpJNSbYn2b5nz56uw9FAWGBJkqR9JPko8BngaUl2tPdcXwicnuQu4PR2XZpGK2ptdTohjcMBTTQsSVq7JBcDLwV2V9Uz221HAJcBG4F7gFdU1X2jfm9vlNdKVdU5S7zk/dfqg/nW1guxtVUTZguWJE3eJcAZe22bH/HqOGBbuy5JWoatrZo2tmBJA2VLxfRyxCtJGh1bWzVtbMGSpOmwohGvvCFbkqTpZguWJPWIcwtpli3VMj8UQz8/aVbYgiVJ08H5hSRJGgBbsA7Q/r5d8t4WSWvgiFeSNBC2Rs42W7AkacIc8UqSpOGyBUuSJswRryRJGi5bsCRJkiRpRCywNBhJLk6yO8ntC7YdkWRrkrvaxyd1GaMkSZKGzS6CGpJLgPcCH1qw7QJgW1VdmOSCdt3JW6W9eEO2JEmjYQuWBqOqrge+s9fmM4Et7fMtwFkTDUqSJEkzZdkCy25X6rmjqmoXQPt45GI7JdmUZHuS7Xv27JlogJIkSRqOlbRgXQKcsde2+W5XxwHb2nWpt6pqc1XNVdXcunXrug5HkiRJPbVsgWW3K/XcvUnWA7SPuzuOR5J6L8k9SW5LckuS7V3HI0nTZLWDXDyq21WSRbtdQdP1CtgEsGHDhlW+nbRqVwPn0kzaei5wVbfhdG9/gxmcf/pTJxiJpJ57YVV9q+sgpP1Jcg/wAPAw8FBVzXUbkWbB2Ae5sOuVJiXJR4HPAE9LsiPJeTSF1elJ7gJOb9clSdLseGFVnWBxpUlZbQvWvUnWt61XdrtaxlItBrYWjFZVnbPES6dNNBBJY7WaIeW93o5cAZ9OUsD7q2rz3jvYg0XSrFptC9Z8tyuw25UkSbPm5Kp6DvBi4LVJXrD3DvZg0ZSY/zLgprbofxRHEdY4rGSYdrtdSZKkH6iqne3jbuBK4MRuI5KWtN8vA/wiQOOwbBdBu12t3Gq6rUiS1CdJHg88pqoeaJ+/CPivHYclLWrhlwFJ5r8MuL7bqDR0Yx/kQpIkDcpRwA1Jvgh8DviTqvpUxzFJ+0jy+CRPmH9O82XA7d1GpVmw2kEuJEnSDKqqu4FndR2HtAJHAVcmgeYz70f8MkCTYIElSZKkwRnllwGOCK0DYRdBSZIkSRoRW7AkSYPnt8+SpEmxBUuSJEmSRsQCS5IkSZJGxAJLkiRJkkbEe7AkLetA71/xfhdJkjSrbMGSJEmSpBGxBUs6AENvmVnq/CRJ0r78f3NlVvN76vNnK1uwJEmSJGlELLAkSZIkaUTsIihp1ewaIUmS9Gi9LLCG8qFuEvfzDP2eIUmSZtmBfiY60NFf93eMNE59/gzbywJLkqRR6PN/4JKk6bSmAivJGcB7gIOAD1TVhSOJqjWUlqoDNavnPU7jztWl+Ld8tFkbRWg1uspV6UCYp+oLc1VdWPUgF0kOAv4QeDHwdOCcJE8fVWDSqJir6gtzVX1gnqovzFV1ZS2jCJ4IfLWq7q6q7wOXAmeOJixppMxV9YW5qj4wT9UX5qo6sZYugk8BvrFgfQfwz/feKckmYFO7+mCSLy/ys34M+NYaYumrzs77DV286Q8tdd6fqqozxvB+y+bqCvMUloi949/nJJirjzbtueo1dY169m96f+c9jlz1//+169U1dYT/Hvp6TQVzdWpM6Pq8plxdS4GVRbbVPhuqNgOb9/uDku1VNbeGWHrJ857cWy6y7VG5upI8Bf9mXccxaX3NVf9es2Ua8xTM1f3xvCf3lots8///A+B5r85augjuAI5ZsH40sHMNP08aF3NVfWGuqg/MU/WFuapOrKXA+jxwXJJjkzwWeCVw9WjCkkbKXFVfmKvqA/NUfWGuqhOr7iJYVQ8l+TXgT2mGvry4qu5Y5Y9btml2oDzvCTBXR8LznoAR5qp/r9nS1zwF/2azxlztH897FVK1T7dpSZIkSdIqrKWLoCRJkiRpAQssSZIkSRqRzgusJGck+XKSrya5oOt4xinJPUluS3JLku3ttiOSbE1yV/v4pK7jXKskFyfZneT2BdsWPc80/qD9+9+a5DndRb4083R4eQrmat/NSq4OMU/BXDVX+5Gr5unw8hTGn6udFlhJDgL+EHgx8HTgnCRP7zKmCXhhVZ2wYGz9C4BtVXUcsK1d77tLgL0nYVvqPF8MHNcum4D3TSjGFTNPgWHmKZirQzALuXoJA8pTMFfbdXN1ynPVPAWGmacw5lztugXrROCrVXV3VX0fuBQ4s+OYJu1MYEv7fAtwVoexjERVXQ98Z6/NS53nmcCHqvFZ4PAk6ycT6YqZpwPMUzBXB2pwuTrAPAVzFczVPuSqeTrAPIXx52rXBdZTgG8sWN/RbhuqAj6d5KYkm9ptR1XVLoD28cjOohuvpc6zDznQhxhHaZbzFMzVPpnlXO1znkJ/4hwVc5Ve5mofYhylWc5TGGGurnoerBHJItuGPG78yVW1M8mRwNYkf9l1QFOgDznQhxhHyTxdXB/yoA8xjpK5uq++5EBf4hwVc3VffciBPsQ4Subp4g44D7puwdoBHLNg/WhgZ0exjF1V7WwfdwNX0jQ93zvfzNg+7u4uwrFa6jz7kAN9iHFkZjxPwVztjRnP1T7nKfQnzpEwV3ubq32IcWRmPE9hhLnadYH1eeC4JMcmeSzwSuDqjmMaiySPT/KE+efAi4Dbac733Ha3c4Gruolw7JY6z6uBX2xHaDkJ+Jv55tkpYp7OTp6CudoL5mqv8xTMVXO1H7lqns5OnsIoc7WqOl2AlwBfAb4G/Oeu4xnjef4U8MV2uWP+XIEn04xUclf7eETXsY7gXD8K7AL+gabqP2+p86Rpdv3D9u9/GzDXdfxLnJN5OrA8bc/LXO3pMku5OsQ8bWM1V83Vqc9V83R4edqe11hzNe2BkiRJkqQ16rqLoCRJkiQNhgWWJEmSJI2IBZYkSZIkjYgFliRJkiSNiAWWJEmSJI2IBdYYJXlZkkry013HIu2Puaq+MFfVB+ap+sJcHQ8LrPE6B7iBZmI6aZqZq+oLc1V9YJ6qL8zVMbDAGpMkhwEn00xc9sp222OS/FGSO5Jck+STSc5uX3tukj9PclOSP02yvsPwNUPMVfWFuao+ME/VF+bq+Fhgjc9ZwKeq6ivAd5I8B/iXwEbgnwK/DDwPIMkhwH8Dzq6q5wIXA7/dRdCaSeaq+sJcVR+Yp+oLc3VMDu46gAE7B3itcKNSAAABSElEQVR3+/zSdv0Q4GNV9Qjw10mubV9/GvBMYGsSgIOAXZMNVzPMXFVfmKvqA/NUfWGujokF1hgkeTJwKvDMJEWThAVcudQhwB1V9bwJhSgB5qr6w1xVH5in6gtzdbzsIjgeZwMfqqqfrKqNVXUM8FfAt4B/1fZvPQo4pd3/y8C6JD9ohk3yjC4C18wxV9UX5qr6wDxVX5irY2SBNR7nsO83AB8HfgLYAdwOvB+4Efibqvo+TaK/M8kXgVuA508uXM0wc1V9Ya6qD8xT9YW5Okapqq5jmClJDquqB9um2c8BJ1fVX3cdl7Q3c1V9Ya6qD8xT9YW5unbegzV51yQ5HHgs8HYTVlPMXFVfmKvqA/NUfWGurpEtWJIkSZI0It6DJUmSJEkjYoElSZIkSSNigSVJkiRJI2KBJUmSJEkjYoElSZIkSSPy/wGWTk6BpNHf4gAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# visualize the correlation between Fare-bin and Age\n", + "grid = sns.FacetGrid(df, col='Fare-bin', size=3, aspect=0.8, sharey=False)\n", + "grid.map(plt.hist, 'Age', alpha=.5, bins=range(0,105,5))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": { + "_cell_guid": "d63fb4e1-0c8d-423d-bf93-5821b08fde51", + "_execution_state": "idle", + "_uuid": "d8472c4371963ccdc90498b657d3f7f26e9f1e99" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Age
Fare-bin
127.848315
227.898241
326.144009
430.728604
535.877913
\n", + "
" + ], + "text/plain": [ + " Age\n", + "Fare-bin \n", + "1 27.848315\n", + "2 27.898241\n", + "3 26.144009\n", + "4 30.728604\n", + "5 35.877913" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the mean Age for each Fare-bin\n", + "df[['Fare-bin', 'Age']].groupby(['Fare-bin']).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": { + "_cell_guid": "40a7c6d6-c3ae-46f0-baef-352ff8509783", + "_execution_state": "idle", + "_uuid": "3fb0839aa1bb049ea5998f458ad939272a736345" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Age
Fare-bin
110.812580
210.995650
313.523208
417.466022
515.150724
\n", + "
" + ], + "text/plain": [ + " Age\n", + "Fare-bin \n", + "1 10.812580\n", + "2 10.995650\n", + "3 13.523208\n", + "4 17.466022\n", + "5 15.150724" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the standard deviation of Age for each Fare-bin\n", + "df[['Fare-bin', 'Age']].groupby(['Fare-bin']).std()" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": { + "_cell_guid": "37c1feaf-efa8-467a-8fe3-90b8773bde01", + "_execution_state": "idle", + "_uuid": "cab042b668d7f35dd2b778162835bcb5a0bc1aed" + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAqsAAAGoCAYAAABhZ6zCAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3X20ZGV55/3vT140CgaVxiDdnWYmjQkyRvAMYphJCMg8DfrQZoIZSFQ0JD1JNBo0T8Q4S4yZrIVJRhJfQtIKA6gRCGrsYYjaQYg6KyANIgIt0EEDHdBuRFGikbRezx+12zkc6vR5rdq7qr6ftWqd2rvu2vva61x9+qq77n3fqSokSZKkLnpc2wFIkiRJs7FYlSRJUmdZrEqSJKmzLFYlSZLUWRarkiRJ6iyLVUmSJHWWxeoCJXlTktuS3JLk5iTPa/a/N8nhzfOHZ3nvMUmub963NclbBhjn45NclmRbc841gzqXumWEcvSnk9yUZFeSUwd1HnXPCOXo65Lc3sR5dZIfHdS51D0jlKe/luQLzbk+szu2cRLnWZ2/JM8H3g4cV1XfTXIgsG9V3Tej3cNVtV+f998B/EJVfT7JXsAzq+r2AcX6G8Czq+rXkpwG/FxV/ZdBnEvdMWI5ugZ4MvDbwKaqumIQ51G3jFiO/ixwfVV9O8mvNzH7d3QCjFiePrmqvtk8PwX4japaN4hztcWe1YU5GHigqr4LUFUP7E7cJNcmmdrdMMn/aHqNrk6yotl9EHB/897v7U7cJG9J8r4kn0xyV5JfXYZY1wMXN8+vAE5IkmU4rrptZHK0qr5cVbcA31/qsTRSRilHr6mqbzeb1wErl3pMjYxRytNvTtt8EjB2vZAWqwvzCWBVkjuT/FmSn5ml3ZOAm6rqKODvgHOa/ecBdyT5SJL/muQJ097zbOCFwPOBNyd5xsyDJvl0080/8/GCPjEcAtwLUFW7gIeApy3imjVaRilHNZlGNUfPBP5mAdep0TZSeZrkVUn+AfhD4DWLuuIOs1hdgKp6GHgusAHYCVyW5BV9mn4fuKx5/n7gPzTvfyswRe8fwS8CH5v2no9W1Xeq6gHgGuDoPuf/j1X1nD6Pv+0TQ79e1LH7tKVHG7Ec1QQaxRxN8tLmnH+0sKvVqBq1PK2qd1fVvwXeAPy3hV9xt+3ddgCjpqq+B1wLXJvkC8AZwEVzvW3a+/8BOD/Je4CdSZ42s80s2yT5NLB/n+P/dp8E3g6sArYn2Rv4YeDBOeLUGBihHNWEGqUcbXqy3gT8zO6vhDUZRilPp7kUOH+OGEeOPasLkOSZSdZO2/Uc4B/7NH0csPvu5l8EPtO8/4XTxo2uBb4HfKPZXp/kCU0yHwfcMPOgC/yktYnePyyaWD5Z3k039kYsRzWBRilHkxwJ/AVwSlXtWPDFamSNWJ5Oj/OFwF3zvtARYc/qwuwHvDPJAcAuYBu9rwhm+mfgWUlupDdWdPfdoy8Dzkvy7eb9v1RV32vy+bPA/wZWA79fM+44XIQLgPcl2UavR/W0JR5Po2FkcjTJvwc+AjwF+H+T/F5VPWspx9RIGJkcpfe1/37AXzXHv6eqTlniMTUaRilPX918A/CvwNf5vx1VY8OpqzogvfnXHq6qP247Fqkfc1RdZ45qFJini+MwAEmSJHWWPauSJEnqLHtWJUmS1FkWq5IkSeqsThSr69atK3rzjPnwUXSUeepjxqNzzFEfMx6dY476mPGYl04Uqw888EDbIUhzMk/Vdeaous4c1WJ0oliVJEmS+rFYlSRJUmdZrEqSJKmzLFYlSdKiJdkryeeSXNl2LBpPFquSJGkpXgtsbTsIja85i9UkFybZkeTWPq/9dpJKcmCznSTvSLItyS1JjhpE0JIkqX1JVgIvBN7bdiwaX3vPo81FwLuAS6bvTLIKOBG4Z9ruk4C1zeN5wPnNz046b/OdffefdeJhQ45EWjjzV11hLk60PwF+B9h/tgZJNgAbAFavXt23jTmkPZmzZ7WqPgU82Oel8+gl6PRJXdcDl1TPdcABSQ5elkglSVJnJHkRsKOqbtxTu6raWFVTVTW1YsWKIUWncbKoMatJTgH+qao+P+OlQ4B7p21vb/b1O8aGJFuSbNm5c+diwpAkSe05FjglyZeBS4Hjk7y/3ZA0jhZcrCZ5IvAm4M39Xu6zr+9yWn7SkiRpdFXVG6tqZVWtAU4DPllVL205LI2h+YxZnenfAocCn08CsBK4KcnR9HpSV01ruxK4b6lBSpIkaTItuGe1qr5QVQdV1Zrm09R24Kiq+gqwCXh5MyvAMcBDVXX/8oYsSZK6pKquraoXtR2HxtN8pq76IPD3wDOTbE9y5h6aXwXcDWwD3gP8xrJEKUmSpIk05zCAqjp9jtfXTHtewKuWHpYkSZLkClaaADOXAkxyaJLrk9yV5LIk+7YdoyRJ6s9iVZNg5lKAbwPOq6q1wNeBPQ1tkQaq3yqBSf4oyReblQA/kuSANmOUpDZZrGqszVwKML0pLI4HrmiaXAy8uJ3oJKC3SuC6Gfs2A0dU1bOBO4E3DjsoSeoKi1WNu91LAX6/2X4a8I2q2tVsz7pwBbh4hQav3yqBVfWJaTl6Hb1pACVpIlmsamzNshTgvBeuABevUCf8MvA3s73oBypJ485iVePsMUsB0utpPSDJ7pkwXLhCnZXkTcAu4AOztfEDlaRxZ7GqsTXLUoC/BFwDnNo0OwP4aEshSrNKcgbwIuCXmmkBJWkiWaxqEr0BeF2SbfTGsF7QcjzSoyRZRy9PT6mqb7cdjyS1ac5FAaRxUFXXAtc2z+8Gjm4zHmm3ZpXA44ADk2wHzqF39//jgc29CSy4rqp+rbUgJalFFquS1KJZVgm0t1+SGg4DkCRJUmdZrEqSJKmzLFYlSZLUWXMWqwtdtzrJG5NsS3JHkv9nUIFLkiRp/M2nZ/Ui5rludZLD6c1n+azmPX+WZK9li1aSJEkTZc5idYHrVq8HLq2q71bVl4BtOEWQJEmSFmk5xqxOX7f6EODeaa9tb/Y9hutZS5IkaS5LKlb7rFudPs36LhPoetaSJEmay6IXBZi2bvUJ09at3g6smtZsJXDf4sOTJEnSJFtUz+oe1q3eBJyW5PFJDgXWAp9depiSJEmaRHP2rC5k3eqqui3J5cDt9IYHvKqqvjeo4CVJkjTe5ixWF7pudVX9AfAHSwlKkiRJAlewkiRJUodZrEqSJKmzFj0bwDg7b/OdffefdeJhQ45E0rhLciG9mVV2VNURzb6nApcBa4AvA79QVV9vK0ZJapM9q5LUrot47JLWZwNXV9Va4OpmW5ImksWqJLWo35LW9Jauvrh5fjHw4qEGJUkdYrEqSd3z9Kq6H6D5eVDL8Uh9JXlCks8m+XyS25L8XtsxafxYrErSCEuyIcmWJFt27tzZdjiaPN8Fjq+qnwSeA6xLckzLMWnMWKxKUvd8NcnBAM3PHbM1rKqNVTVVVVMrVqwYWoASQPU83Gzu0zxqD2+RFsxiVZK6ZxNwRvP8DOCjLcYi7VGSvZLcTO9D1eaqur7tmDRenLpKklo0y5LW5wKXJzkTuAd4SXsRSnvWLKv+nCQHAB9JckRV3br79SQbgA0Aq1evXtCx25pK0iksu8ViVRoz/pEdLbMsaQ1wwlADkZaoqr6R5Fp6U7HdOm3/RmAjwNTUlEMEtGAOA5AkSYuSZEXTo0qSHwJeAHyx3ag0buYsVpNcmGRHkuld+k9NsjnJXc3PpzT7k+QdSbYluSXJUYMMXpIktepg4JoktwA30BuzemXLMWnMzGcYwEXAu4BLpu3bvbrKuUnObrbfAJwErG0ezwPOb35KrUjyBOBTwOPp5fsVVXVOkkOBS4GnAjcBL6uqR9qLdPAcHiBpuVXVLcCRbceh8TZnz+oCV1dZD1zSTGVxHXDA7ulXpJbMNgfg24DzmuUsvw6c2WKMkiRpFosdszrb6iqHAPdOa7e92Se1Yg9zAB4PXNHsdzlLSZI6arlvsEqffX3v/HPVFQ3LzDkAgX8AvlFVu5omfqiSJKmjFluszra6ynZg1bR2K4H7+h3AVVc0LFX1vap6Dr18PBr4iX7N+r3XD1WSJLVrscXqbKurbAJe3swKcAzw0O7hAlLbquobwLXAMfTGU+++wdAPVZIkddR8pq76IPD3wDOTbG9WVDkXODHJXcCJzTbAVcDdwDbgPcBvDCRqaZ5mmQNwK3ANcGrTzOUsJUnqqDmnrlrI6ipVVcCrlhpUVzn1z0g6GLg4yV70PpxdXlVXJrkduDTJfwc+B1zQZpCSJKk/l1vVWJttDsCqupve+FVJktRhLrcqSZKkzrJYlSRJUmdZrEqSJKmzLFYlSZLUWRarktRRSc5KcluSW5N8MMkT2o5JkobN2QCkjpttyjSNtySHAK8BDq+q7yS5HDgNuKjVwCRpyOxZlaTu2hv4oWa1tScyy0prkjTOLFYlqYOq6p+APwbuAe6nt3z1J2a2S7IhyZYkW3bu3DnsMCVp4CxWJamDkjwFWA8cCjwDeFKSl85sV1Ubq2qqqqZWrFgx7DAlaeAcsyp1RNfGprq8cOteAHypqnYCJPkw8FPA+1uNSpKGzJ5VSeqme4BjkjwxSYATgK0txyRJQ2exKkkdVFXXA1cANwFfoPf3emOrQUlSC5ZUrPabAzDJoUmuT3JXksuS7LtcwUrSJKmqc6rqx6vqiKp6WVV9t+2YJGnYFl2sTpsDcKqqjgD2ojcH4NuA86pqLfB14MzlCFSSJEmTZ6k3WO2eA/Bf6c0BeD9wPPCLzesXA28Bzl/ieZakazeuSJIkaX4W3bPabw5A4EbgG1W1q2m2HTik3/udG1CSJElzWXTP6ow5AL8B/BVwUp+m1e/9VbWR5maBqampvm2kUeAUT5IkDc5SbrD6wRyAVfWvwO45AA9olgYEWInLA0qSJGmRllKs9psD8HbgGuDUps0ZwEeXFqIkSZIm1VLGrM42B+AbgNcl2QY8DbhgGeKUJEnSBFrSbABVdQ5wzozddwNHL+W4kiRJEriClSRJkjrMYlWSJC1KklVJrkmytVnR8rVtx6Txs9RFASRJ0uTaBby+qm5Ksj9wY5LNVXV724FpfNizKkmSFqWq7q+qm5rn3wK2MstiQNJi2bMqSZKWLMka4Ejg+hn7NwAbAFavXj30uMBl10edPauSJGlJkuwHfAj4rar65vTXqmpjVU1V1dSKFSvaCVAjzWJVY222wf9Jnppkc5K7mp9PaTtWSRpFSfahV6h+oKo+3HY8Gj8Wqxp3uwf//wRwDPCqJIcDZwNXV9Va4OpmW+qUJAckuSLJF5sPXM9vOyZpumYFywuArVX19rbj0XhyzOoymG0szFknHjbkSDRTVd0P3N88/1aS3YP/1wPHNc0uBq6lt/qa1CV/Cnysqk5Nsi/wxLYDkmY4FngZ8IUkNzf7freqrmoxJo0Zi1VNjBmD/5/eFLJU1f1JDmoxNOkxkjwZ+GngFQBV9QjwSJsxSTNV1WeAtB2HxpvFqibCzMH/vW+u5vW+1u9iHTTvku2sfwPsBP5nkp8EbgReW1X/PL3RJOSopMnmmFWNvVkG/381ycHN6wcDO/q917tY1aK9gaOA86vqSOCf6TO22hyVNO4sVjXW9jD4fxNwRvP8DOCjw45NmsN2YHtV7Z6z8gp6xaskTZQlFav97lR1SiB1zO7B/8cnubl5nAycC5yY5C7gxGZb6oyq+gpwb5JnNrtOAFzCUtLEWeqY1X53qv4uvSmBzk1yNr2vrbzLWq2YY/D/CcOMRVqE3wQ+0Px9vRt4ZcvxSNLQLbpYne1O1SROCSRJy6Cqbgam2o5Dktq0lGEA0+9U/VyS9yZ5EjOmBAL6TgmUZEOSLUm27Ny5cwlhSJIkaVwtpVid152qs/EOVkmSJM1lKcXqbHeqzmtKIEmSJGkuiy5W93CnqlMCSZIkaVksdTaAfneqPg64PMmZwD3AS5Z4DkkdMtuKV2edeNiQI5EkTYIlFat7uFPVKYEkSZK0ZEvtWZUkTYjZetUlaZBcblWSJEmdZbEqSZKkzrJYlSRJUmdZrEqSJKmzLFYlSZLUWRarkiRJ6iyLVUmSJHWW86wOkCv9SJIkLY09q5LUYUn2SvK5JFe2HYsktcFiVZK67bXA1raDkKS2WKxKUkclWQm8EHhv27FIUlscsypJ3fUnwO8A+8/WIMkGYAPA6tWr+7Zx/LykUbbkntWZ46mSHJrk+iR3Jbksyb5LD1OSJkuSFwE7qurGPbWrqo1VNVVVUytWrBhSdJI0PMsxDGDmeKq3AedV1Vrg68CZy3AOSZo0xwKnJPkycClwfJL3txuSJA3fkorVmeOpkgQ4HriiaXIx8OKlnEOSJlFVvbGqVlbVGuA04JNV9dKWw5KkoVvqmNWZ46meBnyjqnY129uBQ/q9cT7jrMaV48c0Scx3SdJSLLpndZbxVOnTtPq933FWkjQ/VXVtVb2o7TikmZJcmGRHklvbjkXjaynDAB4znopeT+sBSXb32K4E7ltShJIkqasuAta1HYTG26KL1VnGU/0ScA1watPsDOCjS45SkiR1TlV9Cniw7Tg03gaxKMAbgNcl2UZvDOsFAziHNC/9vqJK8tQkm5vp1TYneUqbMUqSpNkty6IAVXUtcG3z/G7g6OU4rrQMLgLeBVwybd/ZwNVVdW6Ss5vtN7QQmySNvUHcUD3bjZsaTy63qrE2y1dU6+lNqwZOryZJA+UN1Voqi1VNoqdX1f0Azc+DZmuYZEOSLUm27Ny5c2gBSpKkHotVaQ/sEZCk2SX5IPD3wDOTbE/iqpVadssyZlUaMV9NcnBV3Z/kYGBH2wFJ0iiqqtPbjkHjz55VTaJN9KZVA6dXkySp0yxWNdZm+YrqXODEJHcBJzbbkiSpgxwGoLG2h6+oThhqIJIkaVHsWZUkSVJnWaxKkiSpsxwGIA3IbCusnHXiYUOORJKk0WXPqiR1UJJVSa5JsjXJbUle23ZMktQGe1Y7xJ44SdPsAl5fVTcl2R+4Mcnmqrq97cAkaZgWXawmWQVcAvwI8H1gY1X9aZKnApcBa4AvA79QVV9feqjSeJjtQ4k0XbMU8O5lgb+VZCtwCGCxKmmiLKVnte+nfuAVwNVVdW6Ss4GzgTcsPVRJmkxJ1gBHAtf3eW0DsAFg9erVCzruoD84Lde3RQs9zkKvy2+vpG5bdLG6h0/964HjmmYXA9disbokDg+QJleS/YAPAb9VVd+c+XpVbQQ2AkxNTdWQw5OkgVuWG6xmfOp/elPI7i5oD5rlPRuSbEmyZefOncsRhiSNlST70CtUP1BVH247Hklqw5KL1bk+9c+mqjZW1VRVTa1YsWKpYUjSWEkS4AJga1W9ve14JKktSypWZ/nU/9UkBzevHwzsWFqIkjSRjgVeBhyf5ObmcXLbQUnSsC1lNoDZPvVvAs4Azm1+fnRJEUrSBKqqzwBpOw5JattSZgPY/an/C0lubvb9Lr0i9fIkZwL3AC9ZWoiSJEmaVEuZDWBPn/pPWOxxJUmSpN1cwWqEOaWVJEkad8sydZUkSZI0CPasSloWLiMrSRoEe1YlSZLUWZ3uWXVMpiSNH3vhJS2EPauSJEnqrE73rM7GT+WSJEmTwZ5VSZIkdZbFqiRJkjrLYlWSJEmdZbEqSZKkzhrJG6y0OE4FJkmSRo3F6hhytgRJkjQuBlasJlkH/CmwF/Deqjp3UOeSFsMcbddyfaga528GzFGNAvNUgzaQMatJ9gLeDZwEHA6cnuTwQZxLWgxzVF1njmoUmKcahkH1rB4NbKuquwGSXAqsB24f0Pk0AGM+xtUcVdeZoxoF5qkGblDF6iHAvdO2twPPm94gyQZgQ7P5cJI7+hznQOCBgUTYTa1c7+sG3H4PZrvej1XVuuU7TV9z5iiYp7Po1PUuYz7Opq08NUfnsIff/YKudwg5NGid/ls6Tjk6hP//xtWScnRQxWr67KtHbVRtBDbu8SDJlqqaWs7AuszrHe7p++yrx+wwTx/D6x3eqfvsM0fnwesd7un77PP/+zl4vQszqHlWtwOrpm2vBO4b0LmkxTBH1XXmqEaBeaqBG1SxegOwNsmhSfYFTgM2Dehc0mKYo+o6c1SjwDzVwA1kGEBV7UryauDj9KayuLCqblvEofb4tcEY8nqHZBlzFPy9jbtWrtccXRKvd0j8/37RvN4FSNVjhkBJkiRJnTCoYQCSJEnSklmsSpIkqbMsViVJUmuSrEtyR5JtSc5uO55BSPLlJF9IcnOSLc2+pybZnOSu5udT2o5zsZJcmGRHklun7et7fel5R/P7viXJUXMd32JVkiS1YsKWa/3ZqnrOtPlGzwaurqq1wNXN9qi6CJg5uf9s13cSsLZ5bADOn+vgFquSJKktP1iutaoeAXYv1zoJ1gMXN88vBl7cYixLUlWfAh6csXu261sPXFI91wEHJDl4T8e3WJUkSW3pt1zrIS3FMkgFfCLJjc3yswBPr6r7AZqfB7UW3WDMdn0L/p0ParlVSZKkucxrWeExcGxV3ZfkIGBzki+2HVCLFvw7t2dVkiS1ZSKWa62q+5qfO4CP0Bv+8NXdX383P3e0F+FAzHZ9C/6dW6xKkqS2jP1yrUmelGT/3c+B/wTcSu86z2ianQF8tJ0IB2a269sEvLyZFeAY4KHdwwVm4zAASZLUimVeVrirng58JAn06q6/rKqPJbkBuDzJmcA9wEtajHFJknwQOA44MMl24BzgXPpf31XAycA24NvAK+c8vsutSpIkqascBiBJkqTOsliVJElSZ1msSpIkqbMsViVJktRZFquSJEnqLItVSZKkZZbk55JUkh9vO5ZRZ7EqSZK0/E4HPkNvoQMtgcWqJEnSMkqyH3AscCZNsZrkcUn+LMltSa5MclWSU5vXnpvk75LcmOTju5cpVY/FqiRJ0vJ6MfCxqroTeDDJUcB/BtYA/w74FeD5AEn2Ad4JnFpVzwUuBP6gjaC7yuVWJUmSltfpwJ80zy9ttvcB/qqqvg98Jck1zevPBI4ANjdLsu4F3D/ccLvNYlWSJGmZJHkacDxwRJKiV3wW8JHZ3gLcVlXPH1KII8dhAJIkScvnVOCSqvrRqlpTVauALwEPAD/fjF19OnBc0/4OYEWSHwwLSPKsNgLvKovVBUrypmZw9C1Jbk7yvGb/e5Mc3jx/eJb3HpPk+uZ9W5O8ZQjxntpMnTE16HOpG0YlR5O8IsnO5lw3J/mVQZ1L3TNCebo6yTVJPtfEevKgzqWxcTqP7UX9EPAMYDtwK/AXwPXAQ1X1CL0C921JPg/cDPzU8MLtvlRV2zGMjOZTz9uB46rqu0kOBPatqvtmtHu4qvbr8/47gF+oqs8n2Qt4ZlXdPsB49wf+N7Av8Oqq2jKoc6kbRilHk7wCmKqqVw/i+OquEcvTjcDnqur8poi+qqrWDOJcGn9J9quqh5uhAp8Fjq2qr7QdV9fZs7owBwMPVNV3Aarqgd1/XJNcO733Msn/SHJTkquTrGh2H0QzaLqqvrf7j2uStyR5X5JPJrkrya8uU7y/D/wh8C/LdDx136jlqCbTKOVpAU9unv8wcN8e2kpzuTLJzcCngd+3UJ0fi9WF+QSwKsmd6c2V9jOztHsScFNVHQX8HXBOs/884I4kH0nyX5M8Ydp7ng28kN5UFm9O8oyZB03y6WlfmU5/vKBP2yOBVVV15eIvVyNoZHK08fPNV6tXJFm1mAvWSBqlPH0L8NIk24GrgN9czAVLAFV1XFU9p6oOr6qL2o5nVFisLkBVPQw8F9gA7AQua77KnOn7wGXN8/cD/6F5/1uBKXp/qH8R+Ni093y0qr5TVQ8A1wBH9zn/f2ySfObjb6e3S/I4en/MX7/4q9UoGpUcbfwvYE1VPRv4W+DihV+xRtGI5enpwEVVtRI4GXhf8zdW0pA4ddUCVdX3gGuBa5N8ATgDuGiut017/z8A5yd5D7CzGbfyqDazbJPk08D+fY7/2zP+yO5Pb862a9Obs+1HgE1JTnHc6vgbkRylqr42bfM9wNvmiFFjZFTylN4KROuac/5904t7ILBjjlglLRM/HS5AkmcmWTtt13OAf+zT9HH07uyD3qf+zzTvf2Ga6hFYC3wP+EazvT7JE5o/uMcBN8w86Hx7A6rqoao6sJkyYw1wHWChOgFGJUebc01fTvAUYOu8L1QjbZTyFLgHOKE5708AT6DXGyxpSOxZXZj9gHcmOQDYBWyj9zXWTP8MPCvJjcBDwH9p9r8MOC/Jt5v3/1JVfa/5m/tZenfur6Y36NpB/FqMUcrR1yQ5pTnPg8Arlng8jY5RytPXA+9Jcha9XtpXlNPoSEPl1FUdkN4cgQ9X1R+3HYvUjzmqUWCeSuPJYQCSJEnqLHtWJUmS1Fn2rEqSJKmzLFYlSZLUWZ0oVtetW1f07rL04aPoKPPUx4xH55ijPmY8pLHQiWL1gQceaDsEaU7mqbrOHJU0jjpRrEqSJEn9WKxKkiSpsyxWJUmS1FkDK1aTnJXktiS3JvlgkicM6lzSdM264J9N8vkmB3+vT5vHJ7ksybYk1ydZM/xINamSXJhkR5JbZ3k9Sd7R5OctSY4adoyS1BUDKVaTHAK8BpiqqiOAvYDTBnEuqY/vAsdX1U8CzwHWJTlmRpszga9X1Y8B5wFvG3KMmmwXAev28PpJwNrmsQE4fwgxSVInDXIYwN7ADyXZG3gicN8AzyX9QPU83Gzu0zxmTuOyHri4eX4FcEKSDClETbiq+hTw4B6arAcuaXL5OuCAJAcPJzpJ6pa9B3HQqvqnJH8M3AN8B/hEVX1iepskG+j1GLB69eoFHf+8zXf23X/WiYctJlyNoSR7ATcCPwa8u6qun9HkEOBegKraleQh4GnAAzOOs+g8nY35q3n4QX42tjf77p/Z0ByVNO4GNQzgKfR6Bg4FngE8KclLp7epqo1VNVVVUytWrBhEGJpgVfW9qnoOsBI4OskRM5r060V9zCTa5qlaMq/8BHNU0vgb1DCAFwBfqqqdVfWvwIeBnxrQuaRZVdU3gGt57PjA7cAqgGaoyg+z569lpWH6QX42VuJQKkkTalDF6j3AMUme2IwDPAHYOqBzSY+SZEWSA5rnP0Tvw9N2/aIpAAATiUlEQVQXZzTbBJzRPD8V+GRVuTyhumIT8PJmVoBjgIeq6jFDACRpEgxqzOr1Sa4AbgJ2AZ8DNg7iXFIfBwMXN+NWHwdcXlVXJnkrsKWqNgEXAO9Lso1ej6qzVWhoknwQOA44MMl24Bx6NwJSVX8OXAWcDGwDvg28sp1IJal9AylWAarqHHp/gKWhqqpbgCP77H/ztOf/ArxkmHFJu1XV6XO8XsCrhhSOJHWaK1hJkiSpsyxWJUmS1FkWq5IkSeosi1VJkiR1lsWqJEmSOstiVZIkSZ1lsSpJkqTOsliVJElSZ1msSpIkqbMsViVJktRZFquSJEnqLItVSZIkdZbFqiRJkjpr77YD2JPzNt/ZdgiSJElqkT2rkiRJ6iyLVUmSJHWWxarGSpJVSa5JsjXJbUle26fNcUkeSnJz83hzG7FKkqS5dXrMqrQIu4DXV9VNSfYHbkyyuapun9Hu01X1ohbikyRJC2DPqsZKVd1fVTc1z78FbAUOaTcqSZK0WBarGltJ1gBHAtf3efn5ST6f5G+SPGsPx9iQZEuSLTt37hxQpJIkaTYWqxpLSfYDPgT8VlV9c8bLNwE/WlU/CbwT+OvZjlNVG6tqqqqmVqxYMbiAJUlSXxarGjtJ9qFXqH6gqj488/Wq+mZVPdw8vwrYJ8mBQw5TEy7JuiR3JNmW5Ow+r69ubhb8XJJbkpzcRpyS1DaLVY2VJAEuALZW1dtnafMjTTuSHE3v38HXhhelJl2SvYB3AycBhwOnJzl8RrP/BlxeVUcCpwF/NtwoJakbnA1A4+ZY4GXAF5Lc3Oz7XWA1QFX9OXAq8OtJdgHfAU6rqmojWE2so4FtVXU3QJJLgfXA9FkrCnhy8/yHgfuGGqEkdYTFqsZKVX0GyBxt3gW8azgRSX0dAtw7bXs78LwZbd4CfCLJbwJPAl4wnNAkqVscBiBJw9fvA9XM3v3TgYuqaiVwMvC+JI/5m+2MFZLGncWqJA3fdmDVtO2VPPZr/jOBywGq6u+BJwCPuRHQGSskjTuLVUkavhuAtUkOTbIvvRuoNs1ocw9wAkCSn6BXrNp1KmniWKxK0pBV1S7g1cDH6a2ydnlV3ZbkrUlOaZq9HvjVJJ8HPgi8whsBJU2igd1gleQA4L3AEfTGYv1y81WWJE28Zo7fq2bse/O057fTm91CkibaIGcD+FPgY1V1avM11xMHeC5JkiSNoYEUq0meDPw08AqAqnoEeGQQ55IkSdL4GtSY1X9D70aA/9ksFfjeJE+a3sDpViRJkjSXQRWrewNHAec3SwX+M/Cota+dbkWSJElzGVSxuh3YXlXXN9tX0CteJUmSpHkbSLFaVV8B7k3yzGbXCTx6zWtJkiRpToOcDeA3gQ80MwHcDbxygOeSJEnSGBpYsVpVNwNTgzq+JEmSxp8rWEmSJKmzLFYlSZLUWRarkiRJ6iyLVUmSJHWWxarGSpJVSa5JsjXJbUle26dNkrwjybYktyRxDmBJkjpqkFNXSW3YBby+qm5Ksj9wY5LNVTV9nt+TgLXN43nA+c1PSZLUMfasaqxU1f1VdVPz/FvAVuCQGc3WA5dUz3XAAUkOHnKokiRpHixWNbaSrAGOBK6f8dIhwL3Ttrfz2IJ29zE2JNmSZMvOnTsHEaYkSdoDi1WNpST7AR8Cfquqvjnz5T5vqX7HqaqNVTVVVVMrVqxY7jAlSdIcLFY1dpLsQ69Q/UBVfbhPk+3AqmnbK4H7hhGbJElaGItVjZUkAS4AtlbV22dptgl4eTMrwDHAQ1V1/9CClCRJ8+ZsABo3xwIvA76Q5OZm3+8CqwGq6s+Bq4CTgW3At4FXthCnJEmaB4tVjZWq+gz9x6ROb1PAq4YTkSRJWgqHAUiSJKmzLFYlqQVJ1iW5o1lJ7exZ2vxCktub1dj+ctgxSlIXOAxAkoYsyV7Au4ET6c1OcUOSTdNXWkuyFngjcGxVfT3JQe1EK0ntsmdVkobvaGBbVd1dVY8Al9JbWW26XwXeXVVfB6iqHUOOUZI6wWJVkoZvPquoHQYcluT/JLkuybp+B3KVNUnjzmJVkoZvPquo7Q2sBY4DTgfem+SAx7zJVdYkjTmLVUkavvmsorYd+GhV/WtVfQm4g17xKkkTxWJVkobvBmBtkkOT7AucRm9lten+GvhZgCQH0hsWcPdQo5SkDrBYlaQhq6pdwKuBjwNbgcur6rYkb01yStPs48DXktwOXAP8f1X1tXYilqT2OHWVJLWgqq6it/Tv9H1vnva8gNc1D0maWPasSpIkqbMsViVJktRZFquSJEnqLItVSZIkdZbFqiRJkjrLYlWSJEmdZbEqSZKkzhpYsZpkrySfS3LloM4h9ZPkwiQ7ktw6y+vHJXkoyc3N48392kmSpPYNclGA19JbmeXJAzyH1M9FwLuAS/bQ5tNV9aLhhCNJkhZrID2rSVYCLwTeO4jjS3tSVZ8CHmw7DkmStHSD6ln9E+B3gP1na5BkA7ABYPXq1cty0vM239l3/1knHrYsx9dYeX6SzwP3Ab9dVbf1azSIPJUkSfO37D2rSV4E7KiqG/fUrqo2VtVUVU2tWLFiucOQ9uQm4Eer6ieBdwJ/PVtD81SSpHYNYhjAscApSb4MXAocn+T9AziPtChV9c2qerh5fhWwT5IDWw5LkiT1sezFalW9sapWVtUa4DTgk1X10uU+j7RYSX4kSZrnR9P7d/C1dqOSJEn9DHI2AKkVST4IHAccmGQ7cA6wD0BV/TlwKvDrSXYB3wFOq6pqKVxJkrQHAy1Wq+pa4NpBnkOaqapOn+P1d9Gb2kqSJHWcK1hJkiSpsyxWJUmS1FkWq5IkSeosi1VJkiR1lsWqJEmSOstiVZIkSZ1lsSpJLUiyLskdSbYlOXsP7U5NUkmmhhmfJHWFxaokDVmSvYB3AycBhwOnJzm8T7v9gdcA1w83QknqDotVSRq+o4FtVXV3VT0CXAqs79Pu94E/BP5lmMFJUpdYrErS8B0C3Dtte3uz7weSHAmsqqor93SgJBuSbEmyZefOncsfqSS1zGJVkoYvffbVD15MHgecB7x+rgNV1caqmqqqqRUrVixjiJLUDRarkjR824FV07ZXAvdN294fOAK4NsmXgWOATd5kJWkSWaxK0vDdAKxNcmiSfYHTgE27X6yqh6rqwKpaU1VrgOuAU6pqSzvhSlJ7LFYlaciqahfwauDjwFbg8qq6Lclbk5zSbnSS1C17tx2AJE2iqroKuGrGvjfP0va4YcQkSV1kz6okSZI6y2JVYyfJhUl2JLl1lteT5B3NykG3JDlq2DFKkqT5sVjVOLoIWLeH108C1jaPDcD5Q4hJkiQtgsWqxk5VfQp4cA9N1gOXVM91wAFJDh5OdJIkaSG8wUqTaLbVg+6f2TDJBnq9r6xevbrvwc7bfOeyBDXbcc468bBlOb4kSaPInlVNoj2uHvSona4OJElSqyxWNYnmWj1IkiR1hMWqJtEm4OXNrADHAA9V1WOGAEiSpPY5ZlVjJ8kHgeOAA5NsB84B9gGoqj+nNxH7ycA24NvAK9uJVJIkzcViVWOnqk6f4/UCXjWkcCRJ0hI4DECSJEmdZbEqSZKkzrJYlSRJUmdZrEqSJKmzBlKsJlmV5JokW5PcluS1gziPJEmSxtugZgPYBby+qm5Ksj9wY5LNVXX7gM4nSZKkMTSQntWqur+qbmqefwvYSm/tdUmSJGneBj7PapI1wJHA9TP2bwA2AKxevXqgMZy3+c6++8868bCBnleSJElLM9AbrJLsB3wI+K2q+ub016pqY1VNVdXUihUrBhmGJEmSRtTAitUk+9ArVD9QVR8e1HkkSZI0vgY1G0CAC4CtVfX2QZxDkiRJ429QPavHAi8Djk9yc/M4eUDnkiRJ0pgayA1WVfUZIIM4tiSNgyTrgD8F9gLeW1Xnznj9dcCv0JsKcCfwy1X1j0MPVJJa5gpWkjRkSfYC3g2cBBwOnJ7k8BnNPgdMVdWzgSuAPxxulJLUDRarkjR8RwPbquruqnoEuBRYP71BVV1TVd9uNq8DVg45RknqBItVSRq+Q4B7p21vZ88Lp5wJ/E2/F5JsSLIlyZadO3cuY4iS1A0Wq5I0fP3G9FffhslLgSngj/q97pzVksadxarGTpJ1Se5Isi3J2X1ef0WSndNmqviVNuLURNsOrJq2vRK4b2ajJC8A3gScUlXfHVJsktQpA19uVRqmaTeunEivILghyaaqun1G08uq6tVDD1DquQFYm+RQ4J+A04BfnN4gyZHAXwDrqmrH8EOUpG6wZ1XjZs4bV6S2VdUu4NXAx4GtwOVVdVuStyY5pWn2R8B+wF813wBsailcSWqVPasaN/1uXHlen3Y/n+SngTuBs6rq3j5tSLIB2ACwevXqZQ5Vk6yqrgKumrHvzdOev2DoQUlSB9mzqnEznxtX/hewppm/8m+Bi2c7mDevSJLULotVjZs5b1ypqq9Nu1nlPcBzhxSbJElaIItVjZsf3LiSZF96N648aqxfkoOnbZ5Cb8ygJEnqIMesaqxU1a4ku29c2Qu4cPeNK8CWqtoEvKa5iWUX8CDwitYCliRJe2SxqrEzjxtX3gi8cdhxSZKkhXMYgCRJkjrLntVlcN7mO/vuP+vEw4YciSRJ0nixZ1WSJEmdZbEqSZKkzrJYlSRJUmdZrEqSJKmzLFYlSZLUWRarkiRJ6iyLVUmSJHWWxaokSZI6y2JVkiRJnWWxKkmSpM6yWJUkSVJnWaxKkiSpsyxWJUmS1FkWq5IkSeosi1VJkiR11sCK1STrktyRZFuSswd1HmmmuXIvyeOTXNa8fn2SNcOPUpPOPJWk+RlIsZpkL+DdwEnA4cDpSQ4fxLmk6eaZe2cCX6+qHwPOA9423Cg16cxTSZq/QfWsHg1sq6q7q+oR4FJg/YDOJU03n9xbD1zcPL8COCFJhhijZJ5K0jztPaDjHgLcO217O/C86Q2SbAA2NJsPJ7mjz3EOBB4YSITA6wZ14MUff6DX20GzXe/HqmrdIo85Z+5Nb1NVu5I8BDytXyyTkKeLYJ72dCJPh5mjHczF2ZijPUvJUakzBlWs9vv0X4/aqNoIbNzjQZItVTW1nIF1mde7PIfts68W0aa30zx9DK93eQ7bZ9+i8tQcfSyvVxovgxoGsB1YNW17JXDfgM4lTTef3PtBmyR7Az8MPDiU6KQe81SS5mlQxeoNwNokhybZFzgN2DSgc0nTzSf3NgFnNM9PBT5ZVX17VqUBMU8laZ4GMgygGV/1auDjwF7AhVV12yIOtcevtsaQ17tEs+VekrcCW6pqE3AB8L4k2+j1VJ22xNP6extv45Cn/s7G26RdryZM/KAuSZKkrnIFK0mSJHWWxaokSZI6q7PF6iQs15rky0m+kOTmJFuafU9NsjnJXc3Pp7Qd52IluTDJjiS3TtvX9/rS847m931LkqPai3x+zFFztL3I58ccHf0chfHPU2kunSxW57kU4bj42ap6zrQ58s4Grq6qtcDVzfaougiYOSH1bNd3ErC2eWwAzh9SjItijpqjmKNdMs45CmOcp9J8dLJYZbKXa52+xOLFwItbjGVJqupTPHZeyNmubz1wSfVcBxyQ5ODhRLoo5miPOdpd5mjPSOcojH2eSnPqarHabynCQ1qKZZAK+ESSG5slEwGeXlX3AzQ/D2otusGY7fpG7Xc+avEuljlqjnbdJOYojE+eSnMa1HKrSzXv5TBH3LFVdV+Sg4DNSb7YdkAtGrXf+ajFu1jm6P81ar/zUYt3sczRR5uU37smSFd7Vidiudaquq/5uQP4CL2v7b66+yub5ueO9iIciNmub9R+56MW76KYo+Zo101ojsL45Kk0p64Wq2O/XGuSJyXZf/dz4D8Bt/LoJRbPAD7aToQDM9v1bQJe3tzJegzw0O6vuDrKHO0xR7vLHO0ZxxyF8clTaU6dHAawjMu1dtnTgY8kgd7v4S+r6mNJbgAuT3ImcA/wkhZjXJIkHwSOAw5Msh04BziX/td3FXAysA34NvDKoQe8AOaoOYo52gVjn6Mw3nkqzYfLrUqSJKmzujoMQJIkSbJYlSRJUndZrEqSJKmzLFYlSZLUWRarkiRJ6iyL1SFL8nNJKsmPtx2L1I85qq4zR6XJYrE6fKcDn6E3QbfUReaous4clSaIxeoQJdkPOBY4k+aPbJLHJfmzJLcluTLJVUlObV57bpK/S3Jjko/vXlpPGhRzVF1njkqTx2J1uF4MfKyq7gQeTHIU8J+BNcC/A34FeD5Akn2AdwKnVtVzgQuBP2gjaE0Uc1RdZ45KE6aTy62OsdOBP2meX9ps7wP8VVV9H/hKkmua158JHAFsbpYS3AtwfWcNmjmqrjNHpQljsTokSZ4GHA8ckaTo/dEs4COzvQW4raqeP6QQNeHMUXWdOSpNJocBDM+pwCVV9aNVtaaqVgFfAh4Afr4Zc/V04Lim/R3AiiQ/+DorybPaCFwTwxxV15mj0gSyWB2e03nsp/8PAc8AtgO3An8BXA88VFWP0PvD/LYknwduBn5qeOFqApmj6jpzVJpAqaq2Y5h4Sfarqoebr7g+CxxbVV9pOy5pN3NUXWeOSuPLMavdcGWSA4B9gd/3D6w6yBxV15mj0piyZ1WSJEmd5ZhVSZIkdZbFqiRJkjrLYlWSJEmdZbEqSZKkzrJYlSRJUmf9/8SJjUAUDBqFAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# visualize the correlation between SibSp and Age\n", + "grid = sns.FacetGrid(df, col='SibSp', col_wrap=4, size=3.0, aspect=0.8, sharey=False)\n", + "grid.map(plt.hist, 'Age', alpha=.5, bins=range(0,105,5))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": { + "_cell_guid": "ceab6b91-e6bd-4c0f-b7c2-31847b1ee61b", + "_execution_state": "idle", + "_uuid": "612aec4215b2e4c16806aa768ddd1f565fdfe78b" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Age
SibSp
030.921766
131.058071
223.569444
316.312500
48.772727
510.166667
814.500000
\n", + "
" + ], + "text/plain": [ + " Age\n", + "SibSp \n", + "0 30.921766\n", + "1 31.058071\n", + "2 23.569444\n", + "3 16.312500\n", + "4 8.772727\n", + "5 10.166667\n", + "8 14.500000" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the mean Age for each SibSp\n", + "df[['SibSp', 'Age']].groupby(['SibSp']).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": { + "_cell_guid": "eb7a277e-0e3c-4507-ba41-2c9d223f293a", + "_execution_state": "idle", + "_uuid": "d40475272e30ea20b5eeff47d49a5664a2a8303f" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Age
SibSp
013.059751
115.974482
214.910640
311.824938
48.005545
55.192944
8NaN
\n", + "
" + ], + "text/plain": [ + " Age\n", + "SibSp \n", + "0 13.059751\n", + "1 15.974482\n", + "2 14.910640\n", + "3 11.824938\n", + "4 8.005545\n", + "5 5.192944\n", + "8 NaN" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the standard deviation of Age for each SibSp\n", + "df[['SibSp', 'Age']].groupby(['SibSp']).std()" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": { + "_cell_guid": "ab9d9000-de9f-4ca0-b817-c53184d74c9a", + "_execution_state": "idle", + "_uuid": "b2201e9af0c379e0b1bcc0e7a23e08043c241645" + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAqkAAAGoCAYAAABlknz/AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzs3X+8XXV95/vXu4DWqiOgURGI0DHaglNRT1Ev0178AQauY7TFMXSuxlYntQOPqrWPEe2tWmwfD+1UaSsWGiUX6FjQqmimjWJKddSpIgERCYhESoeYXAiEogyoE/zcP/Y6sDnZOzknZ/9Ye5/X8/HYj73Xd33X2p/F+bDy2Wt911qpKiRJkqQ2+alxByBJkiTNZZEqSZKk1rFIlSRJUutYpEqSJKl1LFIlSZLUOhapkiRJah2L1D6SPJDk2iTXJ/mbJD8zgHW+Lsm5g4hvznrfnmRrkpuSvHTQ61f7TUq+Jnl8ki8kuXcY/y9oMkxQvp6U5Ook32reXzTI9WsyTFC+Ht/EeW2SbyZ55SDXPw4Wqf3dX1XHVdUzgR8Db5zvgkkOGF5Ye3zXMcBq4FhgJfAXo/x+tcZE5CvwQ+D3gd8d4XeqfSYlX+8E/l1V/RtgDfBXI/xutcek5Ov1wExVHUenHvjLJAeO8PsHziJ1fr4MPA0gyaebX9Rbkqyd7dAcGTo7yZXAC5L8YpJ/bH7NfD3JY5uuT0nyuSQ3J/njAcS2Cri0qn5UVf8EbAWOH8B6Nblam69V9b+q6it0ilUJ2p2v36iq7c3kFuCnkzxysevVRGtzvt5XVbubyZ8GJv5pTRNdYY9C8yvkFOBzTdNvVNWuJI8Crkryyaq6C3g0cH1VvTPJI4BvA6+uqquS/Cvg/mb544BnAz8Cbkrywaq6bc53ngO8sEc4l1bVe+e0HQ58rWt6W9OmJWgC8lV60ITl668C36iqH+3n5mrCTUK+JnkesB54KvCarqJ1Ilmk9veoJNc2n78MXNB8/u2ucR5HAiuAu4AHgE827c8AdlTVVQBV9X2AJABXVNU9zfQNdBLpYUlZVW9ZQJzp0Tbxv560YJOSrxJMWL4mORZ4H3DyQpfVVJiYfK2qK4Fjk/w8cFGSz1bVxJ65skjt7/5mXMeDkpwIvAR4QVXdl+SLdA6pA/ywqh6Y7Ur/QrH7V/gD9PgbLPCX0zY6/3PMOgLYjpaaSclXCSYoX5McAVwGvLaqvtt3izTNJiZfZ1XVjUn+F/BMYHO/fm1nkbowjwPubhLy54Dn9+n3bTpjTX6xObz/WB46vL9PC/zltAH46yQfAJ5C55fc1xewvKZXG/NV6qd1+ZrkYODvgLdX1f+Y73JaEtqYr0cDt1XV7iRPpXMU99b5Lt9GFqkL8zngjUmuA27i4WNBH1RVP07yauCDzViV++n84hq4qtqS5OPADcBu4IyuX3Ba2lqXrwBJbgX+FfCIJK8ATq6qG4b1fZoYbczXM+lcJPP7SX6/aTu5qu4Y0vdpcrQxX/8tcFaS/w38BPhPVXXnkL5rJFLl8EVJkiS1i7egkiRJUutYpErSCCU5Mp2nbt3Y3F/xTU37oUk2NfdM3JTkkD7Lr2n63JxkzWijl6TR8XS/JI1QksOAw6rqmuYiiquBVwCvA3ZV1XuTnAUcUlVvm7PsoXSu1J2hc8Xw1cBzq+ruUW6DJI2CR1IlaYSqakdVXdN8/gFwI50HcKwCLmq6XUSncJ3rpcCmqtrVFKab6Dz+UJKmTiuK1JUrVxadowK+pvc1NczXJfEaiSRH0XnizJXAk6pqB0Dz/sQeixzOw2/23fcJc0nWJtmcZPOxxx477v+evqYgX0fFfezUv+atFUXqnXdO9B0StMSYrxqEJI+h81SaN88+hWY+i/Vo67nTr6p1VTVTVTOPetSj9jdMaeTcx2pWK4pUSVpKkhxEp0D9aFV9qmm+vRmvOjtutde9OH3CnKQlwyJVkkYonYd2XwDcWFUf6Jq1AVjTfF4DfKbH4pcDJyc5pLn6/+SmTZKmjkWqJI3WCcBrgBclubZ5nQq8Fzgpyc3ASc00SWaSfASgqnYB7wGual5nN22SNHV8LKokjVBVfYXeY0sBXtyj/2bgDV3T64H1w4lOmr8kRwIXA0+m8xjOdVX1Z3P6BPgz4FTgPuB1s3e3kPbFIlWSJO2P3cBbu+/5m2RTVd3Q1ecUYEXzeh5wXvMu7ZOn+yVJ0oLt5Z6/3VYBF1fH14CDZy8QlPZlSR5JPWfTd/Zoe8tJTx9DJJom5pUmSa98BXNW+2fOPX+79bu37445y68F1gIsX758WGHuYZT/H/j/3MJ5JFWSJO23fdzzd1739u2+r++yZcuGEaYmkEWqJEnaL33u+dvNe/tqv+2zSE2yPskdSa7vant3ku/NuX3K7Ly3J9ma5KYkLx1W4JIkaXz2cs/fbhuA16bj+cA9s4//lfZlPmNSLwTOpXObiW7nVNWfdDckOQZYDRwLPAX4+yRPr6oHBhCrJElqj9l7/n4rybVN2zuA5QBVdT6wkc7tp7bSuQXVr48hTk2ofRapVfWlZkD0fKwCLq2qHwH/lGQrcDzw1f2OUJIktc4+7vk726eAM0YTkabNYsaknpnkumY4wCFNW7+r+PaQZG2SzUk279y5cxFhSJIkadrsb5F6HvCvgePo3Ebi/U37vK7iA6/k0+gl+ekkX0/yzSRbkvxB0350kiuT3JzkY0keMe5YJUla6varSK2q26vqgar6CfBhOqf0wav41G4/Al5UVc+i8wNrZTOQ/310xlivAO4GXj/GGCVJEvtZpM55WsQrgdkr/zcAq5M8MsnRdB6D9vXFhSgNRvPEk3ubyYOaVwEvAj7RtF8EvGIM4UmSpC77vHAqySXAicATkmwD3gWcmOQ4Ov/A3wr8JkBVbUnyceAGOs/0PcMr+9UmSQ4ArgaeBnwI+C7wL1W1u+nScxz1uJ6GoumTZD3wMuCOqnpm0/Yx4BlNl4Pp5ORxPZa9FfgB8ACwu6pmRhK0JI3BfK7uP71H8wV76f9HwB8tJihpWJofTcclORi4DPj5Xt16LLcOWAcwMzPTc5y1NE8XMue2flX16tnPSd4P3LOX5V9YVXcOLTpJagmfOKUlqar+Bfgi8Hzg4CSzP9gcR62hqqovAbt6zWtujv7vgUtGGpQktZBFqpaMJMuaI6gkeRTwEuBG4AvAaU23NcBnxhOhxC8Bt1fVzX3mF/D5JFc3Q1D68jZ/kibdfJ44JU2Lw4CLmnGpPwV8vKr+NskNwKVJ/hD4BnsZziIN2ens/SjqCVW1PckTgU1Jvt0cmd2DQ1QkTTqLVC0ZVXUd8Owe7bfw0G3UpLFohpz8CvDcfn2qanvzfkeSy+jkbc8iVZImnaf7JakdXgJ8u6q29ZqZ5NFJHjv7GTiZh27/J0lTxyJVkkaoua3fV4FnJNmWZPbhEauZc6o/yVOSbGwmnwR8Jck36dx/+u+q6nOjiluSRs3T/ZI0Qn1u60dVva5H23bg1ObzLcCzhhqcJLWIRWrjnE3f6dn+lpOePuJIJEmS5Ol+SZIktY5FqiRJklrHIlWSJEmtY5EqSZKk1rFIlSRJUutYpEqSJKl1LFIlSZLUOhapkiRJah2LVEmSJLXOPovUJOuT3JHk+q62/5Lk20muS3JZkoOb9qOS3J/k2uZ1/jCDlyRJ0nSaz5HUC4GVc9o2Ac+sql8AvgO8vWved6vquOb1xsGEKUmSpKVkn0VqVX0J2DWn7fNVtbuZ/BpwxBBik6Sp0+fs1LuTfK/rLNSpfZZdmeSmJFuTnDW6qCVp9AYxJvU3gM92TR+d5BtJ/nuSX+q3UJK1STYn2bxz584BhCH1l+TIJF9IcmOSLUne1LTPqziQBuhC9jw7BXBO11mojXNnJjkA+BBwCnAMcHqSY4YaqSSN0YGLWTjJ7wG7gY82TTuA5VV1V5LnAp9OcmxVfX/uslW1DlgHMDMzU4uJQ5qH3cBbq+qaJI8Frk6yqZl3TlX9yRhj0xJSVV9KctR+LHo8sLWqbgFIcimwCrhhcNFJUnvs95HUJGuAlwH/oaoKoKp+VFV3NZ+vBr4LPH0QgUqLUVU7quqa5vMPgBuBw8cblfQwZzYXo65PckiP+YcDt3VNb2MvOezZKkmTbr+K1CQrgbcBL6+q+7ralzWnpEjys8AK4JZBBCoNSnMU69nAlU3TvooD/8HXsJ0H/GvgODpnpN7fo096tPU9C1VV66pqpqpmli1bNpgoJWmE5nMLqkuArwLPSLItyeuBc4HHApvm3Grql4HrknwT+ATwxqra1XPF0hgkeQzwSeDNzTCU+RQH/oOvoaqq26vqgar6CfBhOqf259oGHNk1fQSwfRTxSdI47HNMalWd3qP5gj59P0mnAJBaJ8lBdPLzo1X1KegUB13zPwz87ZjC0xKW5LCq2tFMvhK4vke3q4AVSY4GvgesBn5tRCFKe0iyns6wvzuq6pk95p8IfAb4p6bpU1V19ugi1KRb1IVT0qRIEjo/rm6sqg90tc+nOJAGpjk7dSLwhCTbgHcBJyY5js7p+1uB32z6PgX4SFWdWlW7k5wJXA4cAKyvqi1j2ARp1oV0zqxevJc+X66ql40mHE0bi1QtFScArwG+leTapu0ddG7js0dxIA3LAs9ObQdO7ZreCOxxeyppHBZxpwppXixStSRU1VfofeGJ/+BL0vC8oLlOZTvwu/2O/idZC6wFWL58+QjDU5tZpO7DOZu+s0fbW07yrlqSJO3DNcBTq+re5kEpn6Zz1589eO909WKRKkl6kD/MNSjdD/Kpqo1J/iLJE6rqznHGpckxiMeiSpIkPUySJzcXrZLkeDo1x13jjUqTxCOpkiRpwfrcqeIggKo6HzgN+K0ku4H7gdWzT6iU5sMiVZIkLVifO1V0zz+Xzi2qpP3i6X5JkiS1jkWqJEmSWsciVZIkSa1jkSpJkqTWsUiVJElS61ikSpIkqXUsUiVphJKsT3JHkuu72v5Lkm8nuS7JZUkO7rPsrUm+leTaJJtHF7UkjZ5FqiSN1oXAyjltm4BnVtUvAN8B3r6X5V9YVcdV1cyQ4pOkVphXkdrnl/+hSTYlubl5P6RpT5I/T7K1OSrwnGEFL0mTpqq+BOya0/b5qtrdTH4NOGLkgUlSy8z3SOqF7PnL/yzgiqpaAVzRTAOcAqxoXmuB8xYfpiQtGb8BfLbPvAI+n+TqJGv3tpIka5NsTrJ5586dAw9SkoZtXkVqr1/+wCrgoubzRcArutovro6vAQcnOWwQwUrSNEvye8Bu4KN9upxQVc+hczDgjCS/3G9dVbWuqmaqambZsmVDiFaShmsxY1KfVFU7AJr3JzbthwO3dfXb1rRJY5XkyCRfSHJjki1J3tS09xy6Io1SkjXAy4D/UFXVq09VbW/e7wAuA44fXYSSNFrDuHAqPdr22OF6KkpjsBt4a1X9PPB8OkeijqH/0BVpJJKsBN4GvLyq7uvT59FJHjv7GTgZuL5XX0maBgcuYtnbkxxWVTua0/l3NO3bgCO7+h0BbJ+7cFWtA9YBzMzM9DxqIA1Sc8R/9uj/D5LcSOco/yrgxKbbRcAX6RQM0sAluYROvj0hyTbgXXSu5n8ksCkJwNeq6o1JngJ8pKpOBZ4EXNbMPxD466r63ChiPmfTd/Zoe8tJTx/FV0tawhZTpG4A1gDvbd4/09V+ZpJLgecB98wOC5DaIslRwLOBK5kzdCXJE3v0X0vnQkCWL18+ukA1darq9B7NF/Tpux04tfl8C/CsIYYmSa0y31tQXQJ8FXhGkm1JXk+nOD0pyc3ASc00wEbgFmAr8GHgPw08amkRkjwG+CTw5qr6/nyW8SIUSZJGa15HUvv88gd4cY++BZyxmKCkYUlyEJ0C9aNV9ammud/QFUmSNCY+cUpLRjqD+S4AbqyqD3TNmh26Ag8fuiJJksZkMWNSpUlzAvAa4FtJrm3a3kFnqMrHm2Es/xN41ZjikyRJDYtULRlV9RV63yINegxdkSRJ4+PpfkmSJLWORaokSZJaxyJVkiRJrWORKkmSpNaZ6gunej3KT5IkSe3nkVRJkiS1jkWqJEmSWsciVZJGLMn6JHckub6r7dAkm5Lc3Lwf0mfZNU2fm5Os6dVHkqaBRaokjd6FwMo5bWcBV1TVCuCKZvphkhwKvAt4HnA88K5+xawkTbqpvnBKmiT9LvR7y0lPH3EkGraq+lKSo+Y0rwJObD5fBHwReNucPi8FNlXVLoAkm+gUu5cMKVRJGhuLVElqhydV1Q6AqtqR5Ik9+hwO3NY1va1p20OStcBagOXLlw841MHcPcUfYJL2xtP9kjQ50qOtenWsqnVVNVNVM8uWLRtyWFqKeo2tnjM/Sf48ydYk1yV5zqhj1GSzSJWkdrg9yWEAzfsdPfpsA47smj4C2D6C2KReLmTPsdXdTgFWNK+1wHkjiElTxCJVktphAzB7tf4a4DM9+lwOnJzkkOaCqZObNmnkqupLwK69dFkFXFwdXwMOnv0hJs3Hfo9JTfIM4GNdTT8LvBM4GPiPwM6m/R1VtXG/I5Qm2CAuhuq1DsfyTbYkl9C5SOoJSbbRuWL/vcDHk7we+J/Aq5q+M8Abq+oNVbUryXuAq5pVnT17EZXUQv3GUO+Y23HYY6hhOE+hHPU+fqn9e7DfRWpV3QQcB5DkAOB7wGXArwPnVNWfDCRCSZoyVXV6n1kv7tF3M/CGrun1wPohhSYN0oLGUAPrAGZmZnr20dIzqNP9Lwa+W1X/PKD1SQPX5wbq707yvSTXNq9TxxmjJE0Rx1BrUQZ1C6rVPPw+fWcmeS2wGXhrVd09d4FRHNofFu9nObEuBM4FLp7T7pF/SRq8DXTqgUvpPIDintnbrEnzsegiNckjgJcDb2+azgPeQ+eQ/nuA9wO/MXc5D+1r1PrcQF1Si3gQYHL0GVt9EEBVnQ9sBE4FtgL30RkOKM3bII6kngJcU1W3A8y+AyT5MPC3A/gOaZim+si/JA3DXsZWz84v4IwRhaMpNIgxqafTdap/zu0lXgn0vMmv1BLnAf+azkWAO+gc+d+DN0aXJGm0FnUkNcnPACcBv9nV/MdJjqNzuv/WOfOkVvHIvyRJ7bSoIrWq7gMeP6ftNYuKSBqhJId1DeT3yL8kSS0xqKv7pdbrM8j/RI/8S5LUPhapWjL6DPK/YOSBSJKkfRrUzfwlSZKkgbFIlSRJUutYpEqSJKl1LFIlqQWSPCPJtV2v7yd585w+Jya5p6vPO8cVryQNmxdOSVILVNVNdB4qQZIDgO8Bl/Xo+uWqetkoY5OkcfBIqiS1z4uB71bVP487EEkaF4tUSWqf1XQ9bnqOFyT5ZpLPJjm23wqSrE2yOcnmnTt3DidKSRoii1RJapEkjwBeDvxNj9nXAE+tqmcBHwQ+3W89VbWuqmaqambZsmXDCVaShsgiVZLa5RTgmqq6fe6Mqvp+Vd3bfN4IHJTkCaMOUJJGwSJVktrldPqc6k/y5CRpPh9PZx9+1whjk6SR8ep+SWqJJD8DnAT8ZlfbGwGq6nzgNOC3kuwG7gdWV1WNI1ZJGjaLVElqiaq6D3j8nLbzuz6fC5w76rgkaRw83S9JkqTWsUiVJElS6yz6dH+SW4EfAA8Au6tqJsmhwMeAo4BbgX9fVXcv9rskdZyz6TtDWe9bTnr6vL+vX19JkgZhUEdSX1hVx1XVTDN9FnBFVa0ArmimJUmSpHkZ1un+VcBFzeeLgFcM6XukeUuyPskdSa7vajs0yaYkNzfvh4wzRkmS1DGIq/sL+HySAv6yqtYBT6qqHQBVtSPJEwfwPa3nKdHWu5DOldEXd7XNHvV/b5Kzmum3jSE2SZLUZRBF6glVtb0pRDcl+fZ8FkqyFlgLsHz58gGE0U4Wru1RVV9KctSc5lXAic3ni4AvYpEqSdLYLfp0f1Vtb97vAC4DjgduT3IYQPN+R4/lfK602uBhR/2Bnkf9k6xNsjnJ5p07d440QEmSlqJFFalJHp3ksbOfgZOB64ENwJqm2xrgM4v5Hmnc/FElSdJoLfZ0/5OAy5pHSR8I/HVVfS7JVcDHk7we+J/Aqxb5PdKw3J7ksGbsdM+j/pIkafQWVaRW1S3As3q03wW8eDHrlkZk9qj/e/GovyRJreETp7RkJLkE+CrwjCTbmiP97wVOSnIzcFIzLUmSxmwQV/dLE6GqTu8zy6P+jWE9yUrz1+spfnPmB/gz4FTgPuB1VXXNqOOUpGGzSJWk9nlhVd3ZZ94pwIrm9TzgvOZdkqaKp/slabKsAi6ujq8BB8/e8k8apSQrk9yUZGvzMJS581+XZGeSa5vXG8YRpyaXR1IlqV16PcWv2+HAbV3T25q2Hd2dJuGBKQ4vmVxJDgA+RGcs/zbgqiQbquqGOV0/VlVnjjxATQWPpEpSu5xQVc+hc1r/jCS/PGd+eixTezR4b18N1/HA1qq6pap+DFxK5yi/NDAWqZLUIn2e4tdtG3Bk1/QRwPbRRCc9qN8R/bl+Ncl1ST6R5Mge8wGf6qfeLFIlqSX28hS/bhuA16bj+cA9s4/2lUZoPkf0/xtwVFX9AvD3wEX9VuaRf/XimFRJao9+T/F7I0BVnQ9spHP7qa10bkH162OKVUvbPo/oNw/2mfVh4H0jiEtTxCJVklpiL0/xO7/rcwFnjDIuqYergBVJjga+B6wGfq27w+wjp5vJlwM3jjZETTqLVEmStCBVtTvJmcDlwAHA+qrakuRsYHNVbQB+O8nLgd3ALuB1YwtYE8kiVZIkLVhVbaQz/KS77Z1dn98OvH3UcWl6WKRKkiZSv/usvuWkp484EknD4NX9kiRJah2PpI6Bv/4lSZL2ziOpkiRJah2LVEmSJLXOfhepSY5M8oUkNybZkuRNTfu7k3wvybXN69TBhStJkqSlYDFjUncDb62qa5rH+F2dZFMz75yq+pPFhyeNRpJbgR8ADwC7q2pmvBFJkrS07XeR2jxFYkfz+QdJbgQOH1Rg0hi8sKruHHcQkiRpQGNSkxwFPBu4smk6M8l1SdYnOaTPMmuTbE6yeefOnYMIQ5IkSVNi0begSvIY4JPAm6vq+0nOA94DVPP+fuA35i5XVeuAdQAzMzO12DikRSrg80kK+MsmPx+UZC2wFmD58uWL/rJ+tyGbJN5KTZI0TIs6kprkIDoF6ker6lMAVXV7VT1QVT8BPgwcv/gwpaE7oaqeA5wCnJHkl7tnVtW6qpqpqplly5aNJ0JNtX4Xo87pc2KSe7ouTH1nr3VJ0jTY7yOpSQJcANxYVR/oaj+sGa8K8Erg+sWFKA1fVW1v3u9IchmdH1dfGm9UWmJ6XoxaVTfM6fflqnrZGOKTpJFazOn+E4DXAN9Kcm3T9g7g9CTH0Tl9eivwm4uKcAnpdfrUU6fDl+TRwE81FwA+GjgZOHvMYWmJ2cvFqHOLVElaEhZzdf9XgPSYtXH/w9FcjvsbiScBl3VODnAg8NdV9bnxhqSlrMfFqN1ekOSbwHbgd6tqS591DHQctSSN2qIvnJImXVXdAjxr3HFIsOfFqHNmXwM8tarubR6U8mlgRa/1eHGqpEnnY1ElqSV6XYzaraq+X1X3Np83AgclecKIw5SkkbBIlaQW6Hcx6pw+T276keR4Ovvwu0YXpSSNjqf7Jakd+l2Muhygqs4HTgN+K8lu4H5gdVUtiVP503BvYUkLY5EqSS2wl4tRu/ucC5w7mogkabwsUiUNlLdSkyQNgmNSJUmS1DoWqZIkSWodi1RJkiS1ztSMSfXKT0mSpOnhkVRJkiS1ztQcSZUkqZ+FnG3zbhRSO3gkVZIkSa1jkSpJkqTWsUiVJElS6zgmdYr0G3Pl+CqNm+MBJUkLNbQiNclK4M+AA4CPVNV7h/VdS5G33Bos81VtsK88TPJI4GLgucBdwKur6tZRxynNMmc1TEM53Z/kAOBDwCnAMcDpSY4ZxndJi2W+qg3mmYevB+6uqqcB5wDvG22U0kPMWQ3bsMakHg9srapbqurHwKXAqiF9l7RY5qvaYD55uAq4qPn8CeDFSTLCGKVu5qyGalin+w8Hbuua3gY8bxAr9jT3wjkecJ+Glq/SAswnDx/sU1W7k9wDPB64cyQRSg9nzmqohlWk9vqVVA/rkKwF1jaT9ya5qccyT2A6E7m12/U7i19Fv237XFWtXPzqh8J83beRbdsAcnAh2pSv+8zDefbpdNx3zk5tvv7OIrdtxDm4EG3KVxhgzrYtXxebAwtcfo9tW8jy05yvwypStwFHdk0fAWzv7lBV64B1e1tJks1VNTP48MZrWrcLJnbbzNd9mNZta9l27TMPu/psS3Ig8DhgV6+V7StnW7btAzWt29bC7RpYzpqv07dtg9iuYY1JvQpYkeToJI8AVgMbhvRd0mKZr2qD+eThBmBN8/k04B+qqueRVGkEzFkN1VCOpDbjTs4ELqdzW4r1VbVlGN8lLZb5qjbol4dJzgY2V9UG4ALgr5JspXM0avX4ItZSZ85q2IZ2n9Sq2ghsXORq9np6dYJN63bBhG6b+bpP07ptrdquXnlYVe/s+vxD4FUD+rpWbfuATeu2tW67Rpizrdv2AZrWbVv0dsWj7pIkSWqbYY1JlSRJkvabRaokSZJaxyJVkiRJrWORKkmSpNaxSJUkSVLrWKRKkiSpdSxSJUmS1DoWqZIkSWodi1RJkiS1jkWqJEmSWsciVZIkSa1jkdpHkgeSXJvk+iR/k+RnBrDO1yU5dxDx9Vj38iT3JvndYaxf7TYp+ZrkqCT3N7Fem+T8Qa5fk2FS8rVZ7y8k+WqSLUm+leSnB/0dardJydckj0jy/zZ5+s0kJw5y/eNgkdrf/VV1XFU9E/gx8Mb5LpjkgOGF1dc5wGfH8L1qh0nK1+82sR5XVfOOU1NlIvI1yYHAfwXeWFXHAicC/3tU36/WmIh8Bf4jQFX9G+Ak4P1JJrrOm+jgR+jLwNMAknw6ydXNr+q1sx2ao5hnJ7kSeEGSX0zyj82vma8neWzT9SlJPpfk5iR/PIjgkrwCuAXYMoj1aeK1Ol+lOdqcrycD11XVNwGq6q6qemAA69XkanO+HgNcAVBVdwD/AswMYL1jc+C4A2i75pf0KcDnmqbfqKpdSR4FXJXkk1Wf3yi0AAAZBElEQVR1F/Bo4PqqemeSRwDfBl5dVVcl+VfA/c3yxwHPBn4E3JTkg1V125zvPAd4YY9wLq2q987p+2jgbXR+NXmqf4lre742jk7yDeD7wP9TVV9e3FZrUk1Avj4dqCSXA8uaPv5YW6ImIF+/CaxKcilwJPDc5v3ri9z0sbFI7e9RSa5tPn8ZuKD5/NtJXtl8PhJYAdwFPAB8sml/BrCjqq4CqKrvAyQBuKKq7mmmbwCeCjwsKavqLQuI8w+Ac6rq3mb9WpomJV93AMur6q4kzwU+neTY2e/UkjEp+Xog8G+BXwTuA65IcnVVXbGAdWjyTUq+rgd+HtgM/DPwj8DuBSzfOhap/d1fVcd1NzSDkF8CvKCq7kvyRWB2EP0Pu04DBag+6/1R1+cH6PE3WOAvp+cBpzWnCg4GfpLkh1U1lAu01FoTka9V9aPZdVbV1Um+S+do1eb+m6YpNBH5CmwD/ntV3dksuxF4Ds0pVS0ZE5GvVbUbeLCoTfKPwM19vnsiWKQuzOOAu5uE/Dng+X36fZvOWJNfbA7vP5aHDu/v00J+OVXVL81+TvJu4F4LVDVal69JlgG7quqBJD9L58jDLfNdXlOtdfkKXA7853Su5v4x8H/SuUhVal2+NnmaqvpfSU4CdlfVDfNdvo0sUhfmc8Abk1wH3AR8rVenqvpxklcDH2zGqtxP5xeXNEptzNdfBs5OspvOkYM3VtWuIX2XJkvr8rWq7k7yAeAqOkfDNlbV3w3juzRxWpevwBOBy5P8BPge8Johfc/IpKrfUWhJkiRpPLwFlSRJklrHIlWSJEmtY5EqSZKk1rFIlSRJUutYpEqSJKl1WlGkrly5sujc3sPX9L6mhvm6JF5TxZyd+tdUMV+n/jVvrShS77zzznGHIM2b+apJY85qkpivmtWKIlWSJEnqZpEqSZKk1llQkZrkyCRfSHJjki1J3tSjT5L8eZKtSa5L8pzBhSsNT5IDknwjyd+OOxYtPe5fJenhFnokdTfw1qr6eeD5wBlJjpnT5xRgRfNaC5y36Cil0XgTcOO4g9CS5f5VkrosqEitqh1VdU3z+Qd0/kE/fE63VcDF1fE14OAkhw0kWmlIkhwB/F/AR8Ydi5Ym96+S9HAH7u+CSY4Cng1cOWfW4cBtXdPbmrYdc5ZfS+dIAMuXL9/fMKbeOZu+07P9LSc9fcSRTL0/Bf4z8NheMyc5X82hybPY/WuzjonN2V565bE5LE23/bpwKsljgE8Cb66q78+d3WORPe6LVVXrqmqmqmaWLVu2P2FIA5HkZcAdVXV1vz7mq0ZlEPtXMGclTb4FF6lJDqKzA/1oVX2qR5dtwJFd00cA2/cvPGkkTgBenuRW4FLgRUn+63hD0lLk/lWSHrLQq/sDXADcWFUf6NNtA/Da5irU5wP3VNUep6Kktqiqt1fVEVV1FLAa+Ieq+r/HHJaWGPevkvRwCx2TegLwGuBbSa5t2t4BLAeoqvOBjcCpwFbgPuDXBxOqJE0196+S1GVBRWpVfYXeY6K6+xRwxmKCksalqr4IfHHMYWgJcv8qSQ/nE6ckSZLUOhapkiRJah2LVEmSJLWORaokSZJaxyJVkiRJrWORKkmSpNaxSJUkSX0lWZnkpiRbk5zVY/4jk3ysmX9lkqPmzF+e5N4kvzuqmDUdLFIlSVJPSQ4APgScAhwDnJ7kmDndXg/cXVVPA84B3jdn/jnAZ4cdq6aPRaokSerneGBrVd1SVT8GLgVWzemzCrio+fwJ4MXNY35J8grgFmDLiOLVFLFIlSRJ/RwO3NY1va1p69mnqnYD9wCPT/Jo4G3AH+zrS5KsTbI5yeadO3cOJHBNPotUSZLUT69H9dY8+/wBcE5V3buvL6mqdVU1U1Uzy5Yt248wNY0OHHcAkiSptbYBR3ZNHwFs79NnW5IDgccBu4DnAacl+WPgYOAnSX5YVecOP2xNA4tUSZLUz1XAiiRHA98DVgO/NqfPBmAN8FXgNOAfqqqAX5rtkOTdwL0WqFoIi1RJktRTVe1OciZwOXAAsL6qtiQ5G9hcVRuAC4C/SrKVzhHU1eOLWNPEIlWSJPVVVRuBjXPa3tn1+YfAq/axjncPJThNNS+ckiRJUutYpEqSJKl1LFIlSZLUOhapkiRJah2LVEmSJLWORaokSZJaxyJVkiRJrWORKkmSpNaxSJUkSVLrWKRKkiSpdSxSJUmS1DoWqZIkSWodi1RJkiS1jkWqJEmSWsciVZIkSa1jkSpJkqTWWVCRmmR9kjuSXN9n/olJ7klybfN652DClIYnyU8n+XqSbybZkuQPxh2Tlib3sZL0kAMX2P9C4Fzg4r30+XJVvWy/I5JG70fAi6rq3iQHAV9J8tmq+tq4A9OScyHuYyUJWOCR1Kr6ErBrSLFIY1Ed9zaTBzWvGmNIWqLcx0rSQ4YxJvUFzWnTzyY5tl+nJGuTbE6yeefOnUMIQ5q/JAckuRa4A9hUVVfOmW++qi3cx0paEgZdpF4DPLWqngV8EPh0v45Vta6qZqpqZtmyZQMOQ1qYqnqgqo4DjgCOT/LMOfPNV7WB+1hJS8ZAi9Sq+v7sadOq2ggclOQJg/wOaZiq6l+ALwIrxxyKtAf3sZKWkoEWqUmenCTN5+Ob9d81yO+QBi3JsiQHN58fBbwE+PZ4o5L25D5W45BkZZKbkmxNclaP+Y9M8rFm/pVJjmraT0pydZJvNe8vGnXsmmwLuro/ySXAicATkmwD3kXnIhOq6nzgNOC3kuwG7gdWV5UXoKjtDgMuSnIAnX/0P15VfzvmmLQEuY9V2zT7xQ8BJwHbgKuSbKiqG7q6vR64u6qelmQ18D7g1cCdwL+rqu3NEKrLgcNHuwWaZAsqUqvq9H3MP5fO7VOkiVFV1wHPHncckvtYtdDxwNaqugUgyaXAKqC7SF0FvLv5/Ang3CSpqm909dkC/HSSR1bVj4YftqaBT5ySJEn9HA7c1jW9jT2Phj7Yp6p2A/cAj5/T51eBb/QrUL0bhXqxSJUkSf2kR9vcISZ77dPcKu19wG/2+xLvRqFeLFIlSVI/24Aju6aPALb365PkQOBxNA+lSHIEcBnw2qr67tCj1VSxSJUkSf1cBaxIcnSSRwCrgQ1z+mwA1jSfTwP+oaqquWvK3wFvr6r/MbKINTUsUiVJUk/NGNMz6VyZfyOdu59sSXJ2kpc33S4AHp9kK/A7wOxtqs4Engb8fpJrm9cTR7wJmmALurpfkiQtLc2DIzbOaXtn1+cfAq/qsdwfAn849AA1tTySKkmSpNaxSJUkSVLrWKRKkiSpdSxSJUmS1DoWqZIkSWodi1RJkiS1jkWqJEmSWsciVZIkSa1jkSpJkqTWsUiVJElS61ikSpIkqXUsUiVJktQ6FqmSJElqHYtUSZIktY5FqiRJklrHIlWSJEmtY5EqSZKk1rFIlSRJUutYpEqSJKl1LFIlSZLUOhapkiRJah2LVEmS1FeSlUluSrI1yVk95j8yycea+VcmOapr3tub9puSvHSUcWvyWaRKkqSekhwAfAg4BTgGOD3JMXO6vR64u6qeBpwDvK9Z9hhgNXAssBL4i2Z90rxYpEqSpH6OB7ZW1S1V9WPgUmDVnD6rgIuaz58AXpwkTfulVfWjqvonYGuzPmleLFIlSVI/hwO3dU1va9p69qmq3cA9wOPnuSwASdYm2Zxk886dOwcUuibdgovUJOuT3JHk+j7zk+TPmzEo1yV5zuLDlIYnyZFJvpDkxiRbkrxp3DFpaXL/qhZKj7aaZ5/5LNtprFpXVTNVNbNs2bIFhqhptT9HUi+kM7akn1OAFc1rLXDefnyHNEq7gbdW1c8DzwfO6DHmShqFC3H/qnbZBhzZNX0EsL1fnyQHAo8Dds1zWamvBRepVfUlOsnXzyrg4ur4GnBwksP2N0Bp2KpqR1Vd03z+AXAjfU5JScPk/lUtdBWwIsnRSR5B50KoDXP6bADWNJ9PA/6hqqppX91c/X80nR9XXx9R3JoCBw5hnf3GoOzo7pRkLZ0jASxfvnwIYSzMOZu+07P9LSc9fdHrWMw6NVrNrVOeDVw5p71V+ToIvfLV3Gy9ee1fYTpzVqNXVbuTnAlcDhwArK+qLUnOBjZX1QbgAuCvkmyl8yNrdbPsliQfB26gc8bqjKp6YCwbook0jCJ1XmNQqmodsA5gZmam5xgVaZSSPAb4JPDmqvp+9zzzVS2xoDF+mLMagKraCGyc0/bOrs8/BF7VZ9k/Av5oqAFqag3j6n7HoGjiJDmIToH60ar61Ljjkfpw/yppyRhGkboBeG1zFerzgXuqao9TUVJbNPfzuwC4sao+MO54pL1w/yppyVjw6f4klwAnAk9Isg14F3AQQFWdT+eUwKl0btp7H/DrgwpWGpITgNcA30pybdP2juYUlzQy7l8l6SELLlKr6vR9zC/gjP2OSBqxqvoKvcf6SSPl/lWSHuITpyRJktQ6FqmSJElqHYtUSZIktY5FqiRJklrHIlWSJEmtY5EqSZKk1rFIlSRJUutYpEqSJKl1LFIlSZLUOhapkiRJah2LVEmSJLWORaokSZJaxyJVkiRJrWORKkmSpNaxSJUkSVLrWKRKkiSpdSxSJUmS1DoWqZIkaQ9JDk2yKcnNzfshffqtafrcnGRN0/YzSf4uybeTbEny3tFGr2lgkSpJkno5C7iiqlYAVzTTD5PkUOBdwPOA44F3dRWzf1JVPwc8GzghySmjCVvTwiJVkiT1sgq4qPl8EfCKHn1eCmyqql1VdTewCVhZVfdV1RcAqurHwDXAESOIWVPEIlWSJPXypKraAdC8P7FHn8OB27qmtzVtD0pyMPDv6ByNlebtwHEHIEmSxiPJ3wNP7jHr9+a7ih5t1bX+A4FLgD+vqlv2EsdaYC3A8uXL5/nVmnYWqZIkLVFV9ZJ+85LcnuSwqtqR5DDgjh7dtgEndk0fAXyxa3odcHNV/ek+4ljX9GVmZqb21ldLh6f7JUlSLxuANc3nNcBnevS5HDg5ySHNBVMnN20k+UPgccCbRxCrppBFqiRJ6uW9wElJbgZOaqZJMpPkIwBVtQt4D3BV8zq7qnYlOYLOkIFjgGuSXJvkDePYCE0uT/dLkqQ9VNVdwIt7tG8G3tA1vR5YP6fPNnqPV5XmzSOpkiRJah2LVEmSJLWORaokSZJaxyJVkiRJrWORKkmSpNZZcJGaZGWSm5JsTXJWj/mvS7Kzud2Et5xQ6yVZn+SOJNePOxYtbe5fJekhC7oFVZIDgA/RuV/aNuCqJBuq6oY5XT9WVWcOKEZp2C4EzgUuHnMcWsLcv0rSwy30SOrxwNaquqWqfgxcCqwafFjS6FTVl4Bd445DS577V0nqstAi9XDgtq7pbU3bXL+a5Lokn0hyZK8VJVmbZHOSzTt37lxgGNJoma8agYHtX8GclTT5Flqk9np6RM2Z/m/AUVX1C8DfAxf1WlFVrauqmaqaWbZs2QLDkEbLfNUIDGz/CuaspMm30CJ1G9D9y/0IYHt3h6q6q6p+1Ex+GHju/ocnSUuG+1dJ6rLQIvUqYEWSo5M8AlgNbOjukOSwrsmXAzcuLkRJWhLcv0pSlwVd3V9Vu5OcCVwOHACsr6otSc4GNlfVBuC3k7wc2E3nYpTXDThmaaCSXAKcCDwhyTbgXVV1wXij0lLj/lWSHm5BRSpAVW0ENs5pe2fX57cDb198aNJoVNXp445BAvevktTNJ05JkiSpdSxSJUmS1DoWqZIkSWodi1RJkiS1jkWqJEmSWsciVZIkSa1jkSpJkqTWsUiVJElS61ikSpKkPSQ5NMmmJDc374f06bem6XNzkjU95m9Icv3wI9a0sUiVJEm9nAVcUVUrgCua6YdJcijwLuB5wPHAu7qL2SS/Atw7mnA1bSxSJUlSL6uAi5rPFwGv6NHnpcCmqtpVVXcDm4CVAEkeA/wO8IcjiFVTyCJVkiT18qSq2gHQvD+xR5/Dgdu6prc1bQDvAd4P3LevL0qyNsnmJJt37ty5uKg1NQ4cdwCSJGk8kvw98OQes35vvqvo0VZJjgOeVlVvSXLUvlZSVeuAdQAzMzM1z+/WlLNIlSRpiaqql/Sbl+T2JIdV1Y4khwF39Oi2DTixa/oI4IvAC4DnJrmVTq3xxCRfrKoTkebJ0/2SJKmXDcDs1fprgM/06HM5cHKSQ5oLpk4GLq+q86rqKVV1FPBvge9YoGqhLFIlSVIv7wVOSnIzcFIzTZKZJB8BqKpddMaeXtW8zm7apEXzdL8kSdpDVd0FvLhH+2bgDV3T64H1e1nPrcAzhxCippxHUiVJktQ6FqmSJElqHYtUSZIktY5FqiRJklrHIlWSJEmtY5EqSZKk1rFIlSRJUutYpEqSJKl1LFIlSZLUOhapkiRJah2LVEmSJLWORaokSZJaxyJVkiRJrWORKkmSpNZZcJGaZGWSm5JsTXJWj/mPTPKxZv6VSY4aRKDSMO0rr6VRcP8qSQ9ZUJGa5ADgQ8ApwDHA6UmOmdPt9cDdVfU04BzgfYMIVBqWeea1NFTuXyXp4RZ6JPV4YGtV3VJVPwYuBVbN6bMKuKj5/AngxUmyuDCloZpPXkvD5v5VkrocuMD+hwO3dU1vA57Xr09V7U5yD/B44M7uTknWAmubyXuT3NTj+54wd7lR+53hrHPR2zWMuAak37Z9rqpWjjqYedpnXk9Kvi7WXvJq4retjzbl68D2rzCvnJ34v6n5+qA2718X7Oqrr74zyT/PaZ7WvylM77YtOl8XWqT2+sVe+9GHqloHrNvrlyWbq2pm/uFNhmndLpjYbdtnzi7lfIXp3baWbdfA9q+w75xt2bYP1LRu27Ru11xVtWxu2zRv+7Ru2yC2a6Gn+7cBR3ZNHwFs79cnyYHA44Bd+xugNALzyWtp2Ny/SlKXhRapVwErkhyd5BHAamDDnD4bgDXN59OAf6iqnr/0pZaYT15Lw+b+VZK6LOh0fzMG6kzgcuAAYH1VbUlyNrC5qjYAFwB/lWQrnV/4qxcR315Pr06wad0umMBt65fX+7Gqidv2BZjWbWvNdrl/Hahp3bZp3a75mOZtn9ZtW/R2xR/hkiRJahufOCVJkqTWsUiVJElS67SySJ22R1QmuTXJt5Jcm2Rz03Zokk1Jbm7eDxl3nPORZH2SO5Jc39XWc1vS8efN3/G6JM8ZX+TDY762l/na2zTlrPlqvk4S83Vh+dq6IjXT+4jKF1bVcV33DDsLuKKqVgBXNNOT4EJg7k14+23LKcCK5rUWOG9EMY6M+dp6F2K+PsyU5qz5ar5OEvN1nvnauiKVpfOIyu7HG14EvGKMscxbVX2JPe/L2G9bVgEXV8fXgIOTHDaaSEfGfG0x87WnpZCz5uv0MF9bahT52sYitdejAQ8fUyyDUsDnk1ydzqMKAZ5UVTsAmvcnji26xeu3LdP4t5xrGrfRfO2Yhr9lL9O2neZrx6T/HfuZtu00Xzvm9Xdc6GNRR2Hej/2bICdU1fYkTwQ2Jfn2uAMakWn8W841jdtovj5k0v+WvUzbdpqvD5nkv2M/07ad5utD9vl3bOOR1Kl7RGVVbW/e7wAuo3P64vbZQ93N+x3ji3DR+m3L1P0te5i6bTRfHzTxf8s+pmo7zdcHTfTfcS+majvN1wfN6+/YxiJ1qh5RmeTRSR47+xk4Gbiehz/ecA3wmfFEOBD9tmUD8Nrmqr7nA/fMngaYIubr5FnK+QpTlLPmq/k6SczX/cjXqmrdCzgV+A7wXeD3xh3PIrflZ4FvNq8ts9sDPJ7OlW83N++HjjvWeW7PJcAO4H/T+WX0+n7bQufw/oeav+O3gJlxxz+k/ybma0tf5mvf/y5TkbPmq/k6SS/zdeH56mNRJUmS1DptPN0vSZKkJc4iVZIkSa1jkSpJkqTWsUiVJElS61ikSpIkqXUsUockySuTVJKfG3cs0r6Yr5ok5qsmifm6/yxSh+d04Ct0bjwstZ35qklivmqSmK/7ySJ1CJI8BjiBzo1tVzdtP5XkL5JsSfK3STYmOa2Z99wk/z3J1Ukun32kmDQK5qsmifmqSWK+Lo5F6nC8AvhcVX0H2JXkOcCvAEcB/wZ4A/ACgCQHAR8ETquq5wLrgT8aR9BassxXTRLzVZPEfF2EA8cdwJQ6HfjT5vOlzfRBwN9U1U+A/y/JF5r5zwCeCWxKAnAAnceMSaNivmqSmK+aJObrIlikDliSxwMvAp6ZpOgkWQGX9VsE2FJVLxhRiNKDzFdNEvNVk8R8XTxP9w/eacDFVfXUqjqqqo4E/gm4E/jVZizKk4ATm/43AcuSPHi4P8mx4whcS5L5qklivmqSmK+LZJE6eKez56+kTwJPAbYB1wN/CVwJ3FNVP6aTyO9L8k3gWuD/+P/btWMbhIEYgKJ2Q5WCfRiPlZiCSTIAvWkoKSJFSmzlvQlc/MK683HjcnF6ZRK9Moled8qqOnuGy8jMpao+vy+Ad0Q8qmo9ey74R69Molcm0es2blKP9crMe0TcIuIpSJrTK5PolUn0uoGXVAAA2nGTCgBAO5ZUAADasaQCANCOJRUAgHYsqQAAtPMF30U0961/H98AAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# visualize the correlation between Parch and Age\n", + "grid = sns.FacetGrid(df, col='Parch', col_wrap=4, size=3.0, aspect=0.8, sharey=False)\n", + "grid.map(plt.hist, 'Age', alpha=.5, bins=range(0,105,5))\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": { + "_cell_guid": "38a4399d-3e36-44ab-8269-a10d6b6aecc8", + "_execution_state": "idle", + "_uuid": "42db5280471f9bd3acefe047daccfefcd3bf1a00" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Age
Parch
031.969401
124.965625
218.975979
338.875000
447.600000
539.333333
641.500000
9NaN
\n", + "
" + ], + "text/plain": [ + " Age\n", + "Parch \n", + "0 31.969401\n", + "1 24.965625\n", + "2 18.975979\n", + "3 38.875000\n", + "4 47.600000\n", + "5 39.333333\n", + "6 41.500000\n", + "9 NaN" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the mean Age for each Parch\n", + "df[['Parch', 'Age']].groupby(['Parch']).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": { + "_cell_guid": "4e914c54-23d4-46dc-9a7e-fc9e0e038254", + "_execution_state": "idle", + "_uuid": "79cb7ffd9ea2ac77fe8bae5d7264986be56604ec" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Age
Parch
012.422617
118.410885
214.564066
316.295815
414.432602
51.032796
62.121320
9NaN
\n", + "
" + ], + "text/plain": [ + " Age\n", + "Parch \n", + "0 12.422617\n", + "1 18.410885\n", + "2 14.564066\n", + "3 16.295815\n", + "4 14.432602\n", + "5 1.032796\n", + "6 2.121320\n", + "9 NaN" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# inspect the standard deviation of Age for each Parch\n", + "df[['Parch', 'Age']].groupby(['Parch']).std() " + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": { + "_cell_guid": "020c9de0-ff1c-4eed-be20-cbff19cac018", + "_execution_state": "idle", + "_uuid": "9334adf52175fe53abf8138378f4a83efaf77ffb" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\HP-PC\\Anaconda3\\lib\\site-packages\\pandas\\core\\indexing.py:190: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame\n", + "\n", + "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", + " self._setitem_with_indexer(indexer, value)\n" + ] + }, + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# notice that instead of using Title, we should use its corresponding dummy variables \n", + "df_sub = df[['Age','Master','Miss','Mr','Mrs','Others','Fare-bin','SibSp']]\n", + "\n", + "X_train = df_sub.dropna().drop('Age', axis=1)\n", + "y_train = df['Age'].dropna()\n", + "X_test = df_sub.loc[np.isnan(df.Age)].drop('Age', axis=1)\n", + "\n", + "regressor = RandomForestRegressor(n_estimators = 300)\n", + "regressor.fit(X_train, y_train)\n", + "y_pred = np.round(regressor.predict(X_test),1)\n", + "df.Age.loc[df.Age.isnull()] = y_pred\n", + "\n", + "df.Age.isnull().sum(axis=0) # no more NAN now" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": { + "_cell_guid": "8a4ce8e1-b212-4d6a-8d8b-ad56bf8acf72", + "_execution_state": "idle", + "_uuid": "5f9658c201cd82bf031d49803d4045fe847d81e8" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Age-binSurvived
010.675000
120.452381
230.402597
340.315789
450.427083
560.375000
670.125000
\n", + "
" + ], + "text/plain": [ + " Age-bin Survived\n", + "0 1 0.675000\n", + "1 2 0.452381\n", + "2 3 0.402597\n", + "3 4 0.315789\n", + "4 5 0.427083\n", + "5 6 0.375000\n", + "6 7 0.125000" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bins = [ 0, 4, 12, 18, 30, 50, 65, 100] # This is somewhat arbitrary\n", + "age_index = (1,2,3,4,5,6,7) #('baby','child','teenager','young','mid-age','over-50','senior')\n", + "df['Age-bin'] = pd.cut(df.Age, bins, labels=age_index).astype(int)\n", + "\n", + "df[['Age-bin', 'Survived']].groupby(['Age-bin'],as_index=False).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": { + "_cell_guid": "4f3f0831-83e7-44e4-a69e-5f5fe024e122", + "_execution_state": "idle", + "_uuid": "a7a0e7795ab628123de85df8cb6c2eac84185165" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TicketSurvived
010.630137
120.464481
230.239203
340.200000
450.000000
560.166667
670.111111
780.000000
891.000000
9A0.068966
10C0.340426
11F0.571429
12L0.250000
13P0.646154
14S0.323077
15W0.153846
\n", + "
" + ], + "text/plain": [ + " Ticket Survived\n", + "0 1 0.630137\n", + "1 2 0.464481\n", + "2 3 0.239203\n", + "3 4 0.200000\n", + "4 5 0.000000\n", + "5 6 0.166667\n", + "6 7 0.111111\n", + "7 8 0.000000\n", + "8 9 1.000000\n", + "9 A 0.068966\n", + "10 C 0.340426\n", + "11 F 0.571429\n", + "12 L 0.250000\n", + "13 P 0.646154\n", + "14 S 0.323077\n", + "15 W 0.153846" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[['Ticket', 'Survived']].groupby(['Ticket'], as_index=False).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": { + "_cell_guid": "7b0bc205-5f92-4ba1-9a52-8c6b2bb91093", + "_execution_state": "idle", + "_uuid": "1c5a38e61a105b5ad40fc1f159725c30c65e71bc" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "3 429\n", + "2 278\n", + "1 210\n", + "P 98\n", + "S 98\n", + "C 77\n", + "A 42\n", + "W 19\n", + "7 13\n", + "F 13\n", + "4 11\n", + "6 9\n", + "L 5\n", + "5 3\n", + "9 2\n", + "8 2\n", + "Name: Ticket, dtype: int64" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Ticket'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": { + "_cell_guid": "58fe643e-abf9-4761-bba1-674d3304aba2", + "_execution_state": "idle", + "_uuid": "7f12fa7d7d6ee803835e5768b9da2a92e1468969" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
TicketSurvived
010.630137
120.464481
230.239203
340.166667
4C0.340426
5P0.646154
6S0.323077
\n", + "
" + ], + "text/plain": [ + " Ticket Survived\n", + "0 1 0.630137\n", + "1 2 0.464481\n", + "2 3 0.239203\n", + "3 4 0.166667\n", + "4 C 0.340426\n", + "5 P 0.646154\n", + "6 S 0.323077" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Ticket'] = df['Ticket'].replace(['A','W','F','L','5','6','7','8','9'], '4')\n", + "\n", + "# check the correlation again\n", + "df[['Ticket', 'Survived']].groupby(['Ticket'], as_index=False).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": { + "_cell_guid": "76238516-9a91-4238-8fc7-abdd64af5435", + "_execution_state": "idle", + "_uuid": "ac5457d8c3d71e35495d2aeaca2008cabc1b0bae" + }, + "outputs": [], + "source": [ + "# dummy encoding\n", + "df = pd.get_dummies(df,columns=['Ticket'])" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AgeFareParchPassengerIdPclassSexSibSpSurvivedTitleMaster...FamilyFare-binAge-binTicket_1Ticket_2Ticket_3Ticket_4Ticket_CTicket_PTicket_S
022.07.2500013010.0Mr0...2140001000
138.071.2833021111.0Mrs0...2550000010
226.07.9250033101.0Miss0...1240000001
335.053.1000041111.0Mrs0...2551000000
435.08.0500053000.0Mr0...1250010000
528.88.4583063000.0Mr0...1240010000
654.051.8625071000.0Mr0...1561000000
72.021.0750183030.0Master1...0310010000
827.011.1333293101.0Mrs0...3340010000
914.030.07080102111.0Mrs0...2430100000
104.016.70001113111.0Miss0...3310000010
1158.026.55000121101.0Miss0...1461000000
1220.08.05000133000.0Mr0...1240001000
1339.031.27505143010.0Mr0...0450010000
1414.07.85420153100.0Miss0...1130010000
1555.016.00000162101.0Mrs0...1360100000
162.029.12501173040.0Master1...0410010000
1732.413.00000182001.0Mr0...1350100000
1831.018.00000193110.0Mrs0...2350010000
1934.27.22500203101.0Mrs0...1150100000
2035.026.00000212000.0Mr0...1450100000
2134.013.00000222001.0Mr0...1350100000
2215.08.02920233101.0Miss0...1230010000
2328.035.50000241001.0Mr0...1441000000
248.021.07501253130.0Miss0...0320010000
2538.031.38755263111.0Mrs0...0450010000
2629.57.22500273000.0Mr0...1140100000
2719.0263.00002281030.0Mr0...0541000000
2824.57.87920293101.0Miss0...1240010000
2928.87.89580303000.0Mr0...1240010000
..................................................................
127921.07.750001280300NaNMr0...1140010000
12806.021.075011281303NaNMaster1...0320010000
128123.093.500001282100NaNMr0...1541000000
128251.039.400011283110NaNMrs0...2460000010
128313.020.250021284300NaNMaster1...3330000100
128447.010.500001285200NaNMr0...1250000100
128529.022.025011286303NaNMr0...0440010000
128618.060.000001287111NaNMrs0...2531000000
128724.07.250001288300NaNMr0...1140010000
128848.079.200011289111NaNMrs0...3551000000
128922.07.775001290300NaNMr0...1140010000
129031.07.733301291300NaNMr0...1150100000
129130.0164.866701292110NaNMiss0...1540010000
129238.021.000001293201NaNMr0...2350100000
129322.059.400011294110NaNMiss0...2541000000
129417.047.100001295100NaNMr0...1531000000
129543.027.720801296101NaNMr0...2451000000
129620.013.862501297200NaNMr0...1340000001
129723.010.500001298201NaNMr0...2240100000
129850.0211.500011299101NaNMr0...3551000000
129922.97.720801300310NaNMiss0...1140010000
13003.013.775011301311NaNMiss0...3310000001
130122.97.750001302310NaNMiss0...1140010000
130237.090.000001303111NaNMrs0...2551000000
130328.07.775001304310NaNMiss0...1140010000
130428.88.050001305300NaNMr0...1240001000
130539.0108.900001306110NaNOthers0...1550000010
130638.57.250001307300NaNMr0...1150000001
130728.88.050001308300NaNMr0...1240010000
13083.722.358311309301NaNMaster1...3410100000
\n", + "

1309 rows × 24 columns

\n", + "
" + ], + "text/plain": [ + " Age Fare Parch PassengerId Pclass Sex SibSp Survived \\\n", + "0 22.0 7.2500 0 1 3 0 1 0.0 \n", + "1 38.0 71.2833 0 2 1 1 1 1.0 \n", + "2 26.0 7.9250 0 3 3 1 0 1.0 \n", + "3 35.0 53.1000 0 4 1 1 1 1.0 \n", + "4 35.0 8.0500 0 5 3 0 0 0.0 \n", + "5 28.8 8.4583 0 6 3 0 0 0.0 \n", + "6 54.0 51.8625 0 7 1 0 0 0.0 \n", + "7 2.0 21.0750 1 8 3 0 3 0.0 \n", + "8 27.0 11.1333 2 9 3 1 0 1.0 \n", + "9 14.0 30.0708 0 10 2 1 1 1.0 \n", + "10 4.0 16.7000 1 11 3 1 1 1.0 \n", + "11 58.0 26.5500 0 12 1 1 0 1.0 \n", + "12 20.0 8.0500 0 13 3 0 0 0.0 \n", + "13 39.0 31.2750 5 14 3 0 1 0.0 \n", + "14 14.0 7.8542 0 15 3 1 0 0.0 \n", + "15 55.0 16.0000 0 16 2 1 0 1.0 \n", + "16 2.0 29.1250 1 17 3 0 4 0.0 \n", + "17 32.4 13.0000 0 18 2 0 0 1.0 \n", + "18 31.0 18.0000 0 19 3 1 1 0.0 \n", + "19 34.2 7.2250 0 20 3 1 0 1.0 \n", + "20 35.0 26.0000 0 21 2 0 0 0.0 \n", + "21 34.0 13.0000 0 22 2 0 0 1.0 \n", + "22 15.0 8.0292 0 23 3 1 0 1.0 \n", + "23 28.0 35.5000 0 24 1 0 0 1.0 \n", + "24 8.0 21.0750 1 25 3 1 3 0.0 \n", + "25 38.0 31.3875 5 26 3 1 1 1.0 \n", + "26 29.5 7.2250 0 27 3 0 0 0.0 \n", + "27 19.0 263.0000 2 28 1 0 3 0.0 \n", + "28 24.5 7.8792 0 29 3 1 0 1.0 \n", + "29 28.8 7.8958 0 30 3 0 0 0.0 \n", + "... ... ... ... ... ... ... ... ... \n", + "1279 21.0 7.7500 0 1280 3 0 0 NaN \n", + "1280 6.0 21.0750 1 1281 3 0 3 NaN \n", + "1281 23.0 93.5000 0 1282 1 0 0 NaN \n", + "1282 51.0 39.4000 1 1283 1 1 0 NaN \n", + "1283 13.0 20.2500 2 1284 3 0 0 NaN \n", + "1284 47.0 10.5000 0 1285 2 0 0 NaN \n", + "1285 29.0 22.0250 1 1286 3 0 3 NaN \n", + "1286 18.0 60.0000 0 1287 1 1 1 NaN \n", + "1287 24.0 7.2500 0 1288 3 0 0 NaN \n", + "1288 48.0 79.2000 1 1289 1 1 1 NaN \n", + "1289 22.0 7.7750 0 1290 3 0 0 NaN \n", + "1290 31.0 7.7333 0 1291 3 0 0 NaN \n", + "1291 30.0 164.8667 0 1292 1 1 0 NaN \n", + "1292 38.0 21.0000 0 1293 2 0 1 NaN \n", + "1293 22.0 59.4000 1 1294 1 1 0 NaN \n", + "1294 17.0 47.1000 0 1295 1 0 0 NaN \n", + "1295 43.0 27.7208 0 1296 1 0 1 NaN \n", + "1296 20.0 13.8625 0 1297 2 0 0 NaN \n", + "1297 23.0 10.5000 0 1298 2 0 1 NaN \n", + "1298 50.0 211.5000 1 1299 1 0 1 NaN \n", + "1299 22.9 7.7208 0 1300 3 1 0 NaN \n", + "1300 3.0 13.7750 1 1301 3 1 1 NaN \n", + "1301 22.9 7.7500 0 1302 3 1 0 NaN \n", + "1302 37.0 90.0000 0 1303 1 1 1 NaN \n", + "1303 28.0 7.7750 0 1304 3 1 0 NaN \n", + "1304 28.8 8.0500 0 1305 3 0 0 NaN \n", + "1305 39.0 108.9000 0 1306 1 1 0 NaN \n", + "1306 38.5 7.2500 0 1307 3 0 0 NaN \n", + "1307 28.8 8.0500 0 1308 3 0 0 NaN \n", + "1308 3.7 22.3583 1 1309 3 0 1 NaN \n", + "\n", + " Title Master ... Family Fare-bin Age-bin Ticket_1 Ticket_2 \\\n", + "0 Mr 0 ... 2 1 4 0 0 \n", + "1 Mrs 0 ... 2 5 5 0 0 \n", + "2 Miss 0 ... 1 2 4 0 0 \n", + "3 Mrs 0 ... 2 5 5 1 0 \n", + "4 Mr 0 ... 1 2 5 0 0 \n", + "5 Mr 0 ... 1 2 4 0 0 \n", + "6 Mr 0 ... 1 5 6 1 0 \n", + "7 Master 1 ... 0 3 1 0 0 \n", + "8 Mrs 0 ... 3 3 4 0 0 \n", + "9 Mrs 0 ... 2 4 3 0 1 \n", + "10 Miss 0 ... 3 3 1 0 0 \n", + "11 Miss 0 ... 1 4 6 1 0 \n", + "12 Mr 0 ... 1 2 4 0 0 \n", + "13 Mr 0 ... 0 4 5 0 0 \n", + "14 Miss 0 ... 1 1 3 0 0 \n", + "15 Mrs 0 ... 1 3 6 0 1 \n", + "16 Master 1 ... 0 4 1 0 0 \n", + "17 Mr 0 ... 1 3 5 0 1 \n", + "18 Mrs 0 ... 2 3 5 0 0 \n", + "19 Mrs 0 ... 1 1 5 0 1 \n", + "20 Mr 0 ... 1 4 5 0 1 \n", + "21 Mr 0 ... 1 3 5 0 1 \n", + "22 Miss 0 ... 1 2 3 0 0 \n", + "23 Mr 0 ... 1 4 4 1 0 \n", + "24 Miss 0 ... 0 3 2 0 0 \n", + "25 Mrs 0 ... 0 4 5 0 0 \n", + "26 Mr 0 ... 1 1 4 0 1 \n", + "27 Mr 0 ... 0 5 4 1 0 \n", + "28 Miss 0 ... 1 2 4 0 0 \n", + "29 Mr 0 ... 1 2 4 0 0 \n", + "... ... ... ... ... ... ... ... ... \n", + "1279 Mr 0 ... 1 1 4 0 0 \n", + "1280 Master 1 ... 0 3 2 0 0 \n", + "1281 Mr 0 ... 1 5 4 1 0 \n", + "1282 Mrs 0 ... 2 4 6 0 0 \n", + "1283 Master 1 ... 3 3 3 0 0 \n", + "1284 Mr 0 ... 1 2 5 0 0 \n", + "1285 Mr 0 ... 0 4 4 0 0 \n", + "1286 Mrs 0 ... 2 5 3 1 0 \n", + "1287 Mr 0 ... 1 1 4 0 0 \n", + "1288 Mrs 0 ... 3 5 5 1 0 \n", + "1289 Mr 0 ... 1 1 4 0 0 \n", + "1290 Mr 0 ... 1 1 5 0 1 \n", + "1291 Miss 0 ... 1 5 4 0 0 \n", + "1292 Mr 0 ... 2 3 5 0 1 \n", + "1293 Miss 0 ... 2 5 4 1 0 \n", + "1294 Mr 0 ... 1 5 3 1 0 \n", + "1295 Mr 0 ... 2 4 5 1 0 \n", + "1296 Mr 0 ... 1 3 4 0 0 \n", + "1297 Mr 0 ... 2 2 4 0 1 \n", + "1298 Mr 0 ... 3 5 5 1 0 \n", + "1299 Miss 0 ... 1 1 4 0 0 \n", + "1300 Miss 0 ... 3 3 1 0 0 \n", + "1301 Miss 0 ... 1 1 4 0 0 \n", + "1302 Mrs 0 ... 2 5 5 1 0 \n", + "1303 Miss 0 ... 1 1 4 0 0 \n", + "1304 Mr 0 ... 1 2 4 0 0 \n", + "1305 Others 0 ... 1 5 5 0 0 \n", + "1306 Mr 0 ... 1 1 5 0 0 \n", + "1307 Mr 0 ... 1 2 4 0 0 \n", + "1308 Master 1 ... 3 4 1 0 1 \n", + "\n", + " Ticket_3 Ticket_4 Ticket_C Ticket_P Ticket_S \n", + "0 0 1 0 0 0 \n", + "1 0 0 0 1 0 \n", + "2 0 0 0 0 1 \n", + "3 0 0 0 0 0 \n", + "4 1 0 0 0 0 \n", + "5 1 0 0 0 0 \n", + "6 0 0 0 0 0 \n", + "7 1 0 0 0 0 \n", + "8 1 0 0 0 0 \n", + "9 0 0 0 0 0 \n", + "10 0 0 0 1 0 \n", + "11 0 0 0 0 0 \n", + "12 0 1 0 0 0 \n", + "13 1 0 0 0 0 \n", + "14 1 0 0 0 0 \n", + "15 0 0 0 0 0 \n", + "16 1 0 0 0 0 \n", + "17 0 0 0 0 0 \n", + "18 1 0 0 0 0 \n", + "19 0 0 0 0 0 \n", + "20 0 0 0 0 0 \n", + "21 0 0 0 0 0 \n", + "22 1 0 0 0 0 \n", + "23 0 0 0 0 0 \n", + "24 1 0 0 0 0 \n", + "25 1 0 0 0 0 \n", + "26 0 0 0 0 0 \n", + "27 0 0 0 0 0 \n", + "28 1 0 0 0 0 \n", + "29 1 0 0 0 0 \n", + "... ... ... ... ... ... \n", + "1279 1 0 0 0 0 \n", + "1280 1 0 0 0 0 \n", + "1281 0 0 0 0 0 \n", + "1282 0 0 0 1 0 \n", + "1283 0 0 1 0 0 \n", + "1284 0 0 1 0 0 \n", + "1285 1 0 0 0 0 \n", + "1286 0 0 0 0 0 \n", + "1287 1 0 0 0 0 \n", + "1288 0 0 0 0 0 \n", + "1289 1 0 0 0 0 \n", + "1290 0 0 0 0 0 \n", + "1291 1 0 0 0 0 \n", + "1292 0 0 0 0 0 \n", + "1293 0 0 0 0 0 \n", + "1294 0 0 0 0 0 \n", + "1295 0 0 0 0 0 \n", + "1296 0 0 0 0 1 \n", + "1297 0 0 0 0 0 \n", + "1298 0 0 0 0 0 \n", + "1299 1 0 0 0 0 \n", + "1300 0 0 0 0 1 \n", + "1301 1 0 0 0 0 \n", + "1302 0 0 0 0 0 \n", + "1303 1 0 0 0 0 \n", + "1304 0 1 0 0 0 \n", + "1305 0 0 0 1 0 \n", + "1306 0 0 0 0 1 \n", + "1307 1 0 0 0 0 \n", + "1308 0 0 0 0 0 \n", + "\n", + "[1309 rows x 24 columns]" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": { + "_cell_guid": "e859e686-72db-4608-a89e-24a9e9b52e47", + "_execution_state": "idle", + "_uuid": "df4d6e82a6a6fac2c995d55f829085f9afee3267" + }, + "outputs": [], + "source": [ + "df = df.drop(labels=['SibSp','Parch','Age','Fare','Title'], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PassengerIdPclassSexSurvivedMasterMissMrMrsOthersFamilyFare-binAge-binTicket_1Ticket_2Ticket_3Ticket_4Ticket_CTicket_PTicket_S
01300.0001002140001000
12111.0000102550000010
23311.0010001240000001
34111.0000102551000000
45300.0001001250010000
56300.0001001240010000
67100.0001001561000000
78300.0100000310010000
89311.0000103340010000
910211.0000102430100000
1011311.0010003310000010
1112111.0010001461000000
1213300.0001001240001000
1314300.0001000450010000
1415310.0010001130010000
1516211.0000101360100000
1617300.0100000410010000
1718201.0001001350100000
1819310.0000102350010000
1920311.0000101150100000
2021200.0001001450100000
2122201.0001001350100000
2223311.0010001230010000
2324101.0001001441000000
2425310.0010000320010000
2526311.0000100450010000
2627300.0001001140100000
2728100.0001000541000000
2829311.0010001240010000
2930300.0001001240010000
............................................................
1279128030NaN001001140010000
1280128130NaN100000320010000
1281128210NaN001001541000000
1282128311NaN000102460000010
1283128430NaN100003330000100
1284128520NaN001001250000100
1285128630NaN001000440010000
1286128711NaN000102531000000
1287128830NaN001001140010000
1288128911NaN000103551000000
1289129030NaN001001140010000
1290129130NaN001001150100000
1291129211NaN010001540010000
1292129320NaN001002350100000
1293129411NaN010002541000000
1294129510NaN001001531000000
1295129610NaN001002451000000
1296129720NaN001001340000001
1297129820NaN001002240100000
1298129910NaN001003551000000
1299130031NaN010001140010000
1300130131NaN010003310000001
1301130231NaN010001140010000
1302130311NaN000102551000000
1303130431NaN010001140010000
1304130530NaN001001240001000
1305130611NaN000011550000010
1306130730NaN001001150000001
1307130830NaN001001240010000
1308130930NaN100003410100000
\n", + "

1309 rows × 19 columns

\n", + "
" + ], + "text/plain": [ + " PassengerId Pclass Sex Survived Master Miss Mr Mrs Others \\\n", + "0 1 3 0 0.0 0 0 1 0 0 \n", + "1 2 1 1 1.0 0 0 0 1 0 \n", + "2 3 3 1 1.0 0 1 0 0 0 \n", + "3 4 1 1 1.0 0 0 0 1 0 \n", + "4 5 3 0 0.0 0 0 1 0 0 \n", + "5 6 3 0 0.0 0 0 1 0 0 \n", + "6 7 1 0 0.0 0 0 1 0 0 \n", + "7 8 3 0 0.0 1 0 0 0 0 \n", + "8 9 3 1 1.0 0 0 0 1 0 \n", + "9 10 2 1 1.0 0 0 0 1 0 \n", + "10 11 3 1 1.0 0 1 0 0 0 \n", + "11 12 1 1 1.0 0 1 0 0 0 \n", + "12 13 3 0 0.0 0 0 1 0 0 \n", + "13 14 3 0 0.0 0 0 1 0 0 \n", + "14 15 3 1 0.0 0 1 0 0 0 \n", + "15 16 2 1 1.0 0 0 0 1 0 \n", + "16 17 3 0 0.0 1 0 0 0 0 \n", + "17 18 2 0 1.0 0 0 1 0 0 \n", + "18 19 3 1 0.0 0 0 0 1 0 \n", + "19 20 3 1 1.0 0 0 0 1 0 \n", + "20 21 2 0 0.0 0 0 1 0 0 \n", + "21 22 2 0 1.0 0 0 1 0 0 \n", + "22 23 3 1 1.0 0 1 0 0 0 \n", + "23 24 1 0 1.0 0 0 1 0 0 \n", + "24 25 3 1 0.0 0 1 0 0 0 \n", + "25 26 3 1 1.0 0 0 0 1 0 \n", + "26 27 3 0 0.0 0 0 1 0 0 \n", + "27 28 1 0 0.0 0 0 1 0 0 \n", + "28 29 3 1 1.0 0 1 0 0 0 \n", + "29 30 3 0 0.0 0 0 1 0 0 \n", + "... ... ... ... ... ... ... .. ... ... \n", + "1279 1280 3 0 NaN 0 0 1 0 0 \n", + "1280 1281 3 0 NaN 1 0 0 0 0 \n", + "1281 1282 1 0 NaN 0 0 1 0 0 \n", + "1282 1283 1 1 NaN 0 0 0 1 0 \n", + "1283 1284 3 0 NaN 1 0 0 0 0 \n", + "1284 1285 2 0 NaN 0 0 1 0 0 \n", + "1285 1286 3 0 NaN 0 0 1 0 0 \n", + "1286 1287 1 1 NaN 0 0 0 1 0 \n", + "1287 1288 3 0 NaN 0 0 1 0 0 \n", + "1288 1289 1 1 NaN 0 0 0 1 0 \n", + "1289 1290 3 0 NaN 0 0 1 0 0 \n", + "1290 1291 3 0 NaN 0 0 1 0 0 \n", + "1291 1292 1 1 NaN 0 1 0 0 0 \n", + "1292 1293 2 0 NaN 0 0 1 0 0 \n", + "1293 1294 1 1 NaN 0 1 0 0 0 \n", + "1294 1295 1 0 NaN 0 0 1 0 0 \n", + "1295 1296 1 0 NaN 0 0 1 0 0 \n", + "1296 1297 2 0 NaN 0 0 1 0 0 \n", + "1297 1298 2 0 NaN 0 0 1 0 0 \n", + "1298 1299 1 0 NaN 0 0 1 0 0 \n", + "1299 1300 3 1 NaN 0 1 0 0 0 \n", + "1300 1301 3 1 NaN 0 1 0 0 0 \n", + "1301 1302 3 1 NaN 0 1 0 0 0 \n", + "1302 1303 1 1 NaN 0 0 0 1 0 \n", + "1303 1304 3 1 NaN 0 1 0 0 0 \n", + "1304 1305 3 0 NaN 0 0 1 0 0 \n", + "1305 1306 1 1 NaN 0 0 0 0 1 \n", + "1306 1307 3 0 NaN 0 0 1 0 0 \n", + "1307 1308 3 0 NaN 0 0 1 0 0 \n", + "1308 1309 3 0 NaN 1 0 0 0 0 \n", + "\n", + " Family Fare-bin Age-bin Ticket_1 Ticket_2 Ticket_3 Ticket_4 \\\n", + "0 2 1 4 0 0 0 1 \n", + "1 2 5 5 0 0 0 0 \n", + "2 1 2 4 0 0 0 0 \n", + "3 2 5 5 1 0 0 0 \n", + "4 1 2 5 0 0 1 0 \n", + "5 1 2 4 0 0 1 0 \n", + "6 1 5 6 1 0 0 0 \n", + "7 0 3 1 0 0 1 0 \n", + "8 3 3 4 0 0 1 0 \n", + "9 2 4 3 0 1 0 0 \n", + "10 3 3 1 0 0 0 0 \n", + "11 1 4 6 1 0 0 0 \n", + "12 1 2 4 0 0 0 1 \n", + "13 0 4 5 0 0 1 0 \n", + "14 1 1 3 0 0 1 0 \n", + "15 1 3 6 0 1 0 0 \n", + "16 0 4 1 0 0 1 0 \n", + "17 1 3 5 0 1 0 0 \n", + "18 2 3 5 0 0 1 0 \n", + "19 1 1 5 0 1 0 0 \n", + "20 1 4 5 0 1 0 0 \n", + "21 1 3 5 0 1 0 0 \n", + "22 1 2 3 0 0 1 0 \n", + "23 1 4 4 1 0 0 0 \n", + "24 0 3 2 0 0 1 0 \n", + "25 0 4 5 0 0 1 0 \n", + "26 1 1 4 0 1 0 0 \n", + "27 0 5 4 1 0 0 0 \n", + "28 1 2 4 0 0 1 0 \n", + "29 1 2 4 0 0 1 0 \n", + "... ... ... ... ... ... ... ... \n", + "1279 1 1 4 0 0 1 0 \n", + "1280 0 3 2 0 0 1 0 \n", + "1281 1 5 4 1 0 0 0 \n", + "1282 2 4 6 0 0 0 0 \n", + "1283 3 3 3 0 0 0 0 \n", + "1284 1 2 5 0 0 0 0 \n", + "1285 0 4 4 0 0 1 0 \n", + "1286 2 5 3 1 0 0 0 \n", + "1287 1 1 4 0 0 1 0 \n", + "1288 3 5 5 1 0 0 0 \n", + "1289 1 1 4 0 0 1 0 \n", + "1290 1 1 5 0 1 0 0 \n", + "1291 1 5 4 0 0 1 0 \n", + "1292 2 3 5 0 1 0 0 \n", + "1293 2 5 4 1 0 0 0 \n", + "1294 1 5 3 1 0 0 0 \n", + "1295 2 4 5 1 0 0 0 \n", + "1296 1 3 4 0 0 0 0 \n", + "1297 2 2 4 0 1 0 0 \n", + "1298 3 5 5 1 0 0 0 \n", + "1299 1 1 4 0 0 1 0 \n", + "1300 3 3 1 0 0 0 0 \n", + "1301 1 1 4 0 0 1 0 \n", + "1302 2 5 5 1 0 0 0 \n", + "1303 1 1 4 0 0 1 0 \n", + "1304 1 2 4 0 0 0 1 \n", + "1305 1 5 5 0 0 0 0 \n", + "1306 1 1 5 0 0 0 0 \n", + "1307 1 2 4 0 0 1 0 \n", + "1308 3 4 1 0 1 0 0 \n", + "\n", + " Ticket_C Ticket_P Ticket_S \n", + "0 0 0 0 \n", + "1 0 1 0 \n", + "2 0 0 1 \n", + "3 0 0 0 \n", + "4 0 0 0 \n", + "5 0 0 0 \n", + "6 0 0 0 \n", + "7 0 0 0 \n", + "8 0 0 0 \n", + "9 0 0 0 \n", + "10 0 1 0 \n", + "11 0 0 0 \n", + "12 0 0 0 \n", + "13 0 0 0 \n", + "14 0 0 0 \n", + "15 0 0 0 \n", + "16 0 0 0 \n", + "17 0 0 0 \n", + "18 0 0 0 \n", + "19 0 0 0 \n", + "20 0 0 0 \n", + "21 0 0 0 \n", + "22 0 0 0 \n", + "23 0 0 0 \n", + "24 0 0 0 \n", + "25 0 0 0 \n", + "26 0 0 0 \n", + "27 0 0 0 \n", + "28 0 0 0 \n", + "29 0 0 0 \n", + "... ... ... ... \n", + "1279 0 0 0 \n", + "1280 0 0 0 \n", + "1281 0 0 0 \n", + "1282 0 1 0 \n", + "1283 1 0 0 \n", + "1284 1 0 0 \n", + "1285 0 0 0 \n", + "1286 0 0 0 \n", + "1287 0 0 0 \n", + "1288 0 0 0 \n", + "1289 0 0 0 \n", + "1290 0 0 0 \n", + "1291 0 0 0 \n", + "1292 0 0 0 \n", + "1293 0 0 0 \n", + "1294 0 0 0 \n", + "1295 0 0 0 \n", + "1296 0 0 1 \n", + "1297 0 0 0 \n", + "1298 0 0 0 \n", + "1299 0 0 0 \n", + "1300 0 0 1 \n", + "1301 0 0 0 \n", + "1302 0 0 0 \n", + "1303 0 0 0 \n", + "1304 0 0 0 \n", + "1305 0 1 0 \n", + "1306 0 0 1 \n", + "1307 0 0 0 \n", + "1308 0 0 0 \n", + "\n", + "[1309 rows x 19 columns]" + ] + }, + "execution_count": 84, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [], + "source": [ + "df1 = df[0:891]" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [], + "source": [ + "X = df1[0:891].drop(['Survived','PassengerId'], axis=1).values\n", + "Y = df1[0:891]['Survived'].values" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state = 2)" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": { + "_cell_guid": "1b0b8eed-36fc-4d4c-8ef1-1dc96becdc39", + "_execution_state": "idle", + "_uuid": "1dfd64f73918b8cacdb76f40a0a969c46accaefc" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "_________________________________________________________________\n", + "Layer (type) Output Shape Param # \n", + "=================================================================\n", + "dense_1 (Dense) (None, 16) 288 \n", + "_________________________________________________________________\n", + "dense_2 (Dense) (None, 12) 204 \n", + "_________________________________________________________________\n", + "dense_3 (Dense) (None, 1) 13 \n", + "=================================================================\n", + "Total params: 505\n", + "Trainable params: 505\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n" + ] + } + ], + "source": [ + "# Initialising the NN\n", + "model = Sequential()\n", + "\n", + "# layers\n", + "model.add(Dense(16, kernel_initializer = 'uniform', activation = 'relu', input_dim = 17))\n", + "model.add(Dense(12, kernel_initializer = 'uniform', activation = 'relu'))\n", + "model.add(Dense(1, kernel_initializer = 'uniform', activation = 'sigmoid'))\n", + "\n", + "# summary\n", + "model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/200\n", + "623/623 [==============================] - 0s 715us/step - loss: 0.6916 - acc: 0.6340\n", + "Epoch 2/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.6862 - acc: 0.6244\n", + "Epoch 3/200\n", + "623/623 [==============================] - 0s 51us/step - loss: 0.6725 - acc: 0.6276\n", + "Epoch 4/200\n", + "623/623 [==============================] - 0s 45us/step - loss: 0.6409 - acc: 0.6918\n", + "Epoch 5/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.5856 - acc: 0.7881\n", + "Epoch 6/200\n", + "623/623 [==============================] - 0s 51us/step - loss: 0.5237 - acc: 0.8058\n", + "Epoch 7/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.4724 - acc: 0.8202\n", + "Epoch 8/200\n", + "623/623 [==============================] - 0s 45us/step - loss: 0.4390 - acc: 0.8331\n", + "Epoch 9/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.4260 - acc: 0.8331\n", + "Epoch 10/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.4171 - acc: 0.8315\n", + "Epoch 11/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.4105 - acc: 0.8363\n", + "Epoch 12/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.4068 - acc: 0.8347\n", + "Epoch 13/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.4064 - acc: 0.8331\n", + "Epoch 14/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.4027 - acc: 0.8379\n", + "Epoch 15/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.4018 - acc: 0.8347\n", + "Epoch 16/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.4053 - acc: 0.8283\n", + "Epoch 17/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.4000 - acc: 0.8266\n", + "Epoch 18/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3981 - acc: 0.8331\n", + "Epoch 19/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3968 - acc: 0.8411\n", + "Epoch 20/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3970 - acc: 0.8395\n", + "Epoch 21/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3962 - acc: 0.8427\n", + "Epoch 22/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3955 - acc: 0.8395\n", + "Epoch 23/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3946 - acc: 0.8443\n", + "Epoch 24/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3928 - acc: 0.8411\n", + "Epoch 25/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3938 - acc: 0.8523\n", + "Epoch 26/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3915 - acc: 0.8427\n", + "Epoch 27/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3960 - acc: 0.8347\n", + "Epoch 28/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3946 - acc: 0.8411\n", + "Epoch 29/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3898 - acc: 0.8427\n", + "Epoch 30/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3926 - acc: 0.8427\n", + "Epoch 31/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3947 - acc: 0.8459\n", + "Epoch 32/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3879 - acc: 0.8555\n", + "Epoch 33/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3883 - acc: 0.8475\n", + "Epoch 34/200\n", + "623/623 [==============================] - 0s 45us/step - loss: 0.3874 - acc: 0.8459\n", + "Epoch 35/200\n", + "623/623 [==============================] - 0s 45us/step - loss: 0.3874 - acc: 0.8443\n", + "Epoch 36/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3901 - acc: 0.8491\n", + "Epoch 37/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3901 - acc: 0.8347\n", + "Epoch 38/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3870 - acc: 0.8491\n", + "Epoch 39/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3869 - acc: 0.8459\n", + "Epoch 40/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3864 - acc: 0.8491\n", + "Epoch 41/200\n", + "623/623 [==============================] - 0s 32us/step - loss: 0.3848 - acc: 0.8491\n", + "Epoch 42/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3858 - acc: 0.8459\n", + "Epoch 43/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3845 - acc: 0.8459\n", + "Epoch 44/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3848 - acc: 0.8395\n", + "Epoch 45/200\n", + "623/623 [==============================] - 0s 45us/step - loss: 0.3852 - acc: 0.8507\n", + "Epoch 46/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3829 - acc: 0.8539\n", + "Epoch 47/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3843 - acc: 0.8507\n", + "Epoch 48/200\n", + "623/623 [==============================] - 0s 45us/step - loss: 0.3832 - acc: 0.8507\n", + "Epoch 49/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3821 - acc: 0.8491\n", + "Epoch 50/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3822 - acc: 0.8443\n", + "Epoch 51/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3829 - acc: 0.8411\n", + "Epoch 52/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3825 - acc: 0.8491\n", + "Epoch 53/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3807 - acc: 0.8523\n", + "Epoch 54/200\n", + "623/623 [==============================] - 0s 45us/step - loss: 0.3806 - acc: 0.8475\n", + "Epoch 55/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3847 - acc: 0.8379\n", + "Epoch 56/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3796 - acc: 0.8507\n", + "Epoch 57/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3829 - acc: 0.8475\n", + "Epoch 58/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3842 - acc: 0.8523\n", + "Epoch 59/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3809 - acc: 0.8491\n", + "Epoch 60/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3800 - acc: 0.8459\n", + "Epoch 61/200\n", + "623/623 [==============================] - 0s 32us/step - loss: 0.3798 - acc: 0.8507\n", + "Epoch 62/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3793 - acc: 0.8491\n", + "Epoch 63/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3792 - acc: 0.8507\n", + "Epoch 64/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3786 - acc: 0.8523\n", + "Epoch 65/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3789 - acc: 0.8507\n", + "Epoch 66/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3785 - acc: 0.8507\n", + "Epoch 67/200\n", + "623/623 [==============================] - 0s 32us/step - loss: 0.3791 - acc: 0.8507\n", + "Epoch 68/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3783 - acc: 0.8523\n", + "Epoch 69/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3774 - acc: 0.8523\n", + "Epoch 70/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3776 - acc: 0.8507\n", + "Epoch 71/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3780 - acc: 0.8523\n", + "Epoch 72/200\n", + "623/623 [==============================] - 0s 45us/step - loss: 0.3779 - acc: 0.8491\n", + "Epoch 73/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3789 - acc: 0.8411\n", + "Epoch 74/200\n", + "623/623 [==============================] - 0s 45us/step - loss: 0.3784 - acc: 0.8539\n", + "Epoch 75/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3769 - acc: 0.8523\n", + "Epoch 76/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3773 - acc: 0.8523\n", + "Epoch 77/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3765 - acc: 0.8539\n", + "Epoch 78/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3765 - acc: 0.8459\n", + "Epoch 79/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3762 - acc: 0.8523\n", + "Epoch 80/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3758 - acc: 0.8555\n", + "Epoch 81/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3766 - acc: 0.8507\n", + "Epoch 82/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3765 - acc: 0.8523\n", + "Epoch 83/200\n", + "623/623 [==============================] - 0s 45us/step - loss: 0.3755 - acc: 0.8523\n", + "Epoch 84/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3786 - acc: 0.8443\n", + "Epoch 85/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3769 - acc: 0.8491\n", + "Epoch 86/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3769 - acc: 0.8539\n", + "Epoch 87/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3747 - acc: 0.8523\n", + "Epoch 88/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3759 - acc: 0.8523\n", + "Epoch 89/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3767 - acc: 0.8411\n", + "Epoch 90/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3750 - acc: 0.8523\n", + "Epoch 91/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3746 - acc: 0.8523\n", + "Epoch 92/200\n", + "623/623 [==============================] - 0s 32us/step - loss: 0.3755 - acc: 0.8539\n", + "Epoch 93/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3777 - acc: 0.8411\n", + "Epoch 94/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3765 - acc: 0.8491\n", + "Epoch 95/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3741 - acc: 0.8507\n", + "Epoch 96/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3744 - acc: 0.8539\n", + "Epoch 97/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3753 - acc: 0.8507\n", + "Epoch 98/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3739 - acc: 0.8507\n", + "Epoch 99/200\n", + "623/623 [==============================] - 0s 45us/step - loss: 0.3734 - acc: 0.8539\n", + "Epoch 100/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3737 - acc: 0.8539\n", + "Epoch 101/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3744 - acc: 0.8587\n", + "Epoch 102/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3731 - acc: 0.8587\n", + "Epoch 103/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3737 - acc: 0.8539\n", + "Epoch 104/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3727 - acc: 0.8555\n", + "Epoch 105/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3752 - acc: 0.8539\n", + "Epoch 106/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3723 - acc: 0.8507\n", + "Epoch 107/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3720 - acc: 0.8555\n", + "Epoch 108/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3735 - acc: 0.8587\n", + "Epoch 109/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3714 - acc: 0.8555\n", + "Epoch 110/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3724 - acc: 0.8555\n", + "Epoch 111/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3732 - acc: 0.8571\n", + "Epoch 112/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3717 - acc: 0.8507\n", + "Epoch 113/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3730 - acc: 0.8379\n", + "Epoch 114/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3704 - acc: 0.8507\n", + "Epoch 115/200\n", + "623/623 [==============================] - 0s 45us/step - loss: 0.3708 - acc: 0.8523\n", + "Epoch 116/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3716 - acc: 0.8571\n", + "Epoch 117/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3700 - acc: 0.8491\n", + "Epoch 118/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3696 - acc: 0.8523\n", + "Epoch 119/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3697 - acc: 0.8539\n", + "Epoch 120/200\n", + "623/623 [==============================] - 0s 32us/step - loss: 0.3697 - acc: 0.8571\n", + "Epoch 121/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3689 - acc: 0.8539\n", + "Epoch 122/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3697 - acc: 0.8491\n", + "Epoch 123/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3688 - acc: 0.8491\n", + "Epoch 124/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3701 - acc: 0.8507\n", + "Epoch 125/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3681 - acc: 0.8587\n", + "Epoch 126/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3690 - acc: 0.8539\n", + "Epoch 127/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3678 - acc: 0.8571\n", + "Epoch 128/200\n", + "623/623 [==============================] - 0s 45us/step - loss: 0.3670 - acc: 0.8523\n", + "Epoch 129/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3678 - acc: 0.8555\n", + "Epoch 130/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3681 - acc: 0.8539\n", + "Epoch 131/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3686 - acc: 0.8555\n", + "Epoch 132/200\n", + "623/623 [==============================] - 0s 45us/step - loss: 0.3672 - acc: 0.8523\n", + "Epoch 133/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3669 - acc: 0.8555\n", + "Epoch 134/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3700 - acc: 0.8395\n", + "Epoch 135/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3681 - acc: 0.8539\n", + "Epoch 136/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3662 - acc: 0.8539\n", + "Epoch 137/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3676 - acc: 0.8523\n", + "Epoch 138/200\n", + "623/623 [==============================] - 0s 32us/step - loss: 0.3672 - acc: 0.8443\n", + "Epoch 139/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3650 - acc: 0.8555\n", + "Epoch 140/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3650 - acc: 0.8571\n", + "Epoch 141/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3651 - acc: 0.8555\n", + "Epoch 142/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3647 - acc: 0.8555\n", + "Epoch 143/200\n", + "623/623 [==============================] - 0s 45us/step - loss: 0.3677 - acc: 0.8571\n", + "Epoch 144/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3635 - acc: 0.8555\n", + "Epoch 145/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3631 - acc: 0.8587\n", + "Epoch 146/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3645 - acc: 0.8539\n", + "Epoch 147/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3625 - acc: 0.8555\n", + "Epoch 148/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3633 - acc: 0.8571\n", + "Epoch 149/200\n", + "623/623 [==============================] - 0s 32us/step - loss: 0.3628 - acc: 0.8491\n", + "Epoch 150/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3687 - acc: 0.8507\n", + "Epoch 151/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3627 - acc: 0.8491\n", + "Epoch 152/200\n", + "623/623 [==============================] - 0s 32us/step - loss: 0.3609 - acc: 0.8571\n", + "Epoch 153/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3610 - acc: 0.8571\n", + "Epoch 154/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3622 - acc: 0.8539\n", + "Epoch 155/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3616 - acc: 0.8571\n", + "Epoch 156/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3625 - acc: 0.8571\n", + "Epoch 157/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3601 - acc: 0.8555\n", + "Epoch 158/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3595 - acc: 0.8571\n", + "Epoch 159/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3592 - acc: 0.8587\n", + "Epoch 160/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3591 - acc: 0.8587\n", + "Epoch 161/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3595 - acc: 0.8571\n", + "Epoch 162/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3590 - acc: 0.8539\n", + "Epoch 163/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3583 - acc: 0.8571\n", + "Epoch 164/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3591 - acc: 0.8555\n", + "Epoch 165/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3593 - acc: 0.8571\n", + "Epoch 166/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3596 - acc: 0.8507\n", + "Epoch 167/200\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "623/623 [==============================] - 0s 39us/step - loss: 0.3583 - acc: 0.8587\n", + "Epoch 168/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3577 - acc: 0.8555\n", + "Epoch 169/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3575 - acc: 0.8604\n", + "Epoch 170/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3569 - acc: 0.8587\n", + "Epoch 171/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3571 - acc: 0.8587\n", + "Epoch 172/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3591 - acc: 0.8604\n", + "Epoch 173/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3560 - acc: 0.8539\n", + "Epoch 174/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3571 - acc: 0.8571\n", + "Epoch 175/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3558 - acc: 0.8571\n", + "Epoch 176/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3551 - acc: 0.8571\n", + "Epoch 177/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3596 - acc: 0.8475\n", + "Epoch 178/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3596 - acc: 0.8587\n", + "Epoch 179/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3555 - acc: 0.8555\n", + "Epoch 180/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3551 - acc: 0.8604\n", + "Epoch 181/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3536 - acc: 0.8636\n", + "Epoch 182/200\n", + "623/623 [==============================] - 0s 32us/step - loss: 0.3542 - acc: 0.8555\n", + "Epoch 183/200\n", + "623/623 [==============================] - 0s 45us/step - loss: 0.3546 - acc: 0.8604\n", + "Epoch 184/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3541 - acc: 0.8571\n", + "Epoch 185/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3532 - acc: 0.8555\n", + "Epoch 186/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3540 - acc: 0.8523\n", + "Epoch 187/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3540 - acc: 0.8620\n", + "Epoch 188/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3530 - acc: 0.8571\n", + "Epoch 189/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3530 - acc: 0.8587\n", + "Epoch 190/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3531 - acc: 0.8571\n", + "Epoch 191/200\n", + "623/623 [==============================] - 0s 45us/step - loss: 0.3523 - acc: 0.8587\n", + "Epoch 192/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3521 - acc: 0.8604\n", + "Epoch 193/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3528 - acc: 0.8604\n", + "Epoch 194/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3525 - acc: 0.8555\n", + "Epoch 195/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3511 - acc: 0.8620\n", + "Epoch 196/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3515 - acc: 0.8555\n", + "Epoch 197/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3506 - acc: 0.8604\n", + "Epoch 198/200\n", + "623/623 [==============================] - 0s 45us/step - loss: 0.3514 - acc: 0.8604\n", + "Epoch 199/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3500 - acc: 0.8539\n", + "Epoch 200/200\n", + "623/623 [==============================] - 0s 39us/step - loss: 0.3526 - acc: 0.8571\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Compiling the NN\n", + "model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])\n", + "\n", + "# Train the NN\n", + "model.fit(X_train, y_train, batch_size = 32, epochs = 200)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "_cell_guid": "ae0b0267-706b-4673-a5e1-5821edc2a9e4", + "_execution_state": "idle", + "_uuid": "8f65307ece4155513982147fc7bbc497074af201" + }, + "source": [ + "We can now produce the prediction." + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": { + "_cell_guid": "4a651081-886a-4a9a-8324-a264483d0721", + "_execution_state": "idle", + "_uuid": "7a6c2f2885d7caf67993db99f6880d53e5880372" + }, + "outputs": [], + "source": [ + "y_pred = model.predict(X_test)\n", + "y_final = (y_pred > 0.5).astype(int).reshape(X_test.shape[0])\n", + "\n", + "#output = pd.DataFrame({'PassengerId': df_test['PassengerId'], 'Survived': y_final})\n", + "#output.to_csv('prediction.csv', index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ActualPredicted
01.00
10.00
21.01
30.00
41.00
50.00
61.00
70.00
81.01
90.00
100.00
111.01
121.01
131.00
141.00
151.01
161.01
170.00
180.01
190.00
200.00
211.01
220.00
230.00
240.00
251.01
261.01
270.00
280.00
290.00
.........
2380.00
2391.01
2400.00
2410.00
2421.00
2431.00
2440.00
2450.00
2461.00
2470.01
2480.00
2491.01
2500.00
2510.00
2521.01
2530.00
2541.00
2550.00
2560.00
2571.00
2580.00
2591.01
2600.00
2610.00
2620.00
2631.01
2640.00
2650.00
2660.00
2670.01
\n", + "

268 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " Actual Predicted\n", + "0 1.0 0\n", + "1 0.0 0\n", + "2 1.0 1\n", + "3 0.0 0\n", + "4 1.0 0\n", + "5 0.0 0\n", + "6 1.0 0\n", + "7 0.0 0\n", + "8 1.0 1\n", + "9 0.0 0\n", + "10 0.0 0\n", + "11 1.0 1\n", + "12 1.0 1\n", + "13 1.0 0\n", + "14 1.0 0\n", + "15 1.0 1\n", + "16 1.0 1\n", + "17 0.0 0\n", + "18 0.0 1\n", + "19 0.0 0\n", + "20 0.0 0\n", + "21 1.0 1\n", + "22 0.0 0\n", + "23 0.0 0\n", + "24 0.0 0\n", + "25 1.0 1\n", + "26 1.0 1\n", + "27 0.0 0\n", + "28 0.0 0\n", + "29 0.0 0\n", + ".. ... ...\n", + "238 0.0 0\n", + "239 1.0 1\n", + "240 0.0 0\n", + "241 0.0 0\n", + "242 1.0 0\n", + "243 1.0 0\n", + "244 0.0 0\n", + "245 0.0 0\n", + "246 1.0 0\n", + "247 0.0 1\n", + "248 0.0 0\n", + "249 1.0 1\n", + "250 0.0 0\n", + "251 0.0 0\n", + "252 1.0 1\n", + "253 0.0 0\n", + "254 1.0 0\n", + "255 0.0 0\n", + "256 0.0 0\n", + "257 1.0 0\n", + "258 0.0 0\n", + "259 1.0 1\n", + "260 0.0 0\n", + "261 0.0 0\n", + "262 0.0 0\n", + "263 1.0 1\n", + "264 0.0 0\n", + "265 0.0 0\n", + "266 0.0 0\n", + "267 0.0 1\n", + "\n", + "[268 rows x 2 columns]" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df12345 = pd.DataFrame({'Actual': y_test.flatten(), 'Predicted': y_final.flatten()})\n", + "df12345" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.8059701492537313\n" + ] + } + ], + "source": [ + "from sklearn.metrics import accuracy_score\n", + "Accuracy = accuracy_score(y_test,y_final)\n", + "print(\"Accuracy:\", Accuracy)" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0.0 0.79 0.91 0.85 160\n", + " 1.0 0.83 0.65 0.73 108\n", + "\n", + " micro avg 0.81 0.81 0.81 268\n", + " macro avg 0.81 0.78 0.79 268\n", + "weighted avg 0.81 0.81 0.80 268\n", + "\n" + ] + } + ], + "source": [ + "from sklearn.metrics import classification_report\n", + "C_report = classification_report(y_test,y_final)\n", + "print(C_report)" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[146 14]\n", + " [ 38 70]]\n" + ] + } + ], + "source": [ + "from sklearn.metrics import confusion_matrix\n", + "print(confusion_matrix(y_test,y_final))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [], + "source": [ + "cm = confusion_matrix(y_test,y_final)\n", + "cm_df = pd.DataFrame(cm,\n", + " index = ['survived','Not'], \n", + " columns = ['survived','Not'])" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAVcAAAEWCAYAAADSNdTRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAH6tJREFUeJzt3XmcXfP9x/HXOwlCEoktiC2klpYSYmvV0lK1r5Ui1iKqttq1/FCl1RJKU0v8FLFHVS3VKn61r0lEUDuJbELEFllkZj6/P85J3EwmM3dm7nfm3jvvp8d5zD3fc+73+7kZ+eR7v+d7vkcRgZmZlVan9g7AzKwaObmamSXg5GpmloCTq5lZAk6uZmYJOLmamSXg5GpJSTpP0s3tHUdrSDpM0pPtHYdVFifXKiNpnKSpkroVlB0p6dF2DKvkJG0naWJ7x2G2KE6u1akLcGLqRiR1Sd1Ge+sIn9HScHKtThcDp0rq1dBBSetJekjSdElvSBpYcOxRSUcW7C/wlVhSSDpW0lvAW3nZ5ZImSPpc0ihJWxcT5Lzep6RTJH0oaYqkwwuOLyHpEknv573xqyUtmffK/wn0kTQj3/pImiVp+fy9Z0uqkbR0vn+BpD/mr3tKGi7pI0nj83M7FXzepyRdJmk6cF4DcV8s6UlJPYv5nNYxOblWp5HAo8Cp9Q/kiekh4FagN3AAcKWk9ZtR/17AFsC38v0XgP7Asnm9d0rqWmRdKwE9gVWAI4A/S1omP/Z7YJ287m/k55wTEV8COwOTI6J7vk3O49g2f+82wHhgq4L9x/LXf8rbXCs//xBgflLPP9u7ZH8+F84rlNRJ0rXAhsCOEfFZkZ/ROiAn1+p1DnC8pBXqle8GjIuI6yOiJiJGA3cBP25G3b+LiOkRMQsgIm6OiI/z+oYASwDrFlnXXOD8iJgbEQ8AM4B1JQk4Cjgpb+sL4LfA/o3U9Riwbf5VfkPginy/K7AZ8ISkzsBPgF9GxBcRMQ4YAhxcUM/kiPhT/nlm5WWLAbeR/QOye0TMLPLzWQfl8aQqFRGvSLofOBN4reDQGsAWkj4tKOsC3NSM6icU7kg6BTgS6AMEsDSwfJF1fRwRNQX7M4HuwArAUsCoLM9mTQGdG6nrMeBSYBPgZbIe+nXAlsDbETFN0orA4mS92nnGk/WKG/x8uW8AGwGbR8RXxX0068jcc61u55L1/uonjsciolfB1j0ijsmPf0mW1OZZqYF65y+llo+vngEMBJaJiF7AZ2SJsDWmAbOA9Qvi7BkR3evHUOBpsh7z3mSf8b/A6sCufD0kMI2st7xGwftWByY19PkKvEY2dPBPScX2yq0Dc3KtYhHxNnAHcEJB8f3AOpIOlrRYvm0m6Zv58THAPpKWkvQNsnHQxvQAaoCPgC6SziHrubY29jrgWuAySb0BJK0i6Uf5KVOB5QovKuVf1UcBx/J1Mn0aOHrefkTUAiOACyX1kLQGcDLQ5FzciLgN+BXwsKR+rf2MVt2cXKvf+cD8Oa/52OWOZGOXk4EPyC4cLZGfchnwFVnyuhG4pYn6HyS7cv8m2dfr2TT8tbolzgDeBp6V9DnwMPlYbkS8TjYG+q6kTyX1yd/zGNn46PMF+z2AxwvqPZ6sh/4u8CTZRbi/FBNQRNxI9mf6f5L6tvSDWfWTF8s2Mys991zNzBJwcjUzS8DJ1cwsASdXM7MEyvYmgrnT3vWVtg5gyT5FLUNgVaLmq0ktnv/cnJyw2PJrtXaedau552pmlkDZ9lzNzBZQV9veETSLk6uZVYbamqbPKSNOrmZWEbI7oiuHk6uZVYY6J1czs9Jzz9XMLAFf0DIzS8A9VzOz0gvPFjAzS8AXtMzMEvCwgJlZAr6gZWaWgHuuZmYJ+IKWmVkCvqBlZlZ62VPRK4eTq5lVBo+5mpkl4GEBM7ME3HM1M0ugdm57R9AsTq5mVhk8LGBmloCHBczMEnDP1cwsASdXM7PSC1/QMjNLwGOuZmYJeFjAzCwB91zNzBJwz9XMLAH3XM3MEqiprMWyO7V3AGZmRYm64rcmSPqLpA8lvVJQdrGk1yWNlXS3pF55eV9JsySNyberiwnXydXMKkNdXfFb024AdqpX9hCwQURsCLwJ/LLg2DsR0T/fflZMA06uZlYZSthzjYjHgen1yv4dEfPGHp4FVm1NuE6uZlYZmtFzlTRY0siCbXAzW/sp8M+C/TUlvSjpMUlbF1OBL2iZWWVoxmyBiBgGDGtJM5LOAmqAW/KiKcDqEfGxpAHA3yWtHxGfN1aPk6uZVYY2mC0g6VBgN2D7iAiAiJgDzMlfj5L0DrAOMLKxupxczawyZLkuGUk7AWcA20bEzILyFYDpEVEraS1gbeDdpupzcjWzylDCO7Qk3QZsBywvaSJwLtnsgCWAhyQBPJvPDNgGOF9SDVAL/CwipjdYcQEnVzOrDCVMrhFxQAPF1y3i3LuAu5rbhpOrmVUG3/5qZpZAbW17R9AsTq5mVhm8KpaZWQJOrmZmCXjM1cys9KIu7TzXUnNyNbPK4GEBM7MEPFvAzCwB91zNzBKosOTq9Vzb0Nm/vZRtdt2fvQ5aeCHz62/9KxtstTOffPrZ/LLnR49l30OPZc9BR3PYsae1ZahWQtcOG8LkiS8x5sVHFjp28klHU/PVJJZbbpl2iKzCRBS/lQEn1za01y4/5OpLL1iofMrUj3jmhRdZecXe88s+/2IGFwwZytDfn8s9t1zDkAvOastQrYSGDx/BrrsNWqh81VX7sMP22zB+/MR2iKoClfYxL8k5ubahTft/m55L91io/A9XXMPJPz+CbCGezAMPPcoO227FyitlCXe5ZXq1VZhWYk88+RzTP/l0ofIhl5zHmb+6kCiTnlbZq4vitzKQZMxV0hfAIj9hRCydot1K9J8nnqX3Csuz3tprLVA+7v2J1NTWcthxpzNz5iwG7bcne+68QztFaaW2224/ZNKkKYwd+9/2DqVyeLYAREQPAEnnAx8ANwECBgELd91y+XNuBgNcOeQCjjykoVXBqses2bMZNvx2hl124ULHamvr+O/rb/G/V1zEnDlzGHT0yWy0/nr0Xb1Vz0yzMrDkkl351ZknsNMuB7Z3KBUlyuTrfrFSzxb4UURsUbB/laTngD80dHLhc2/mTnu3PPr2CU2YNIVJkz9g30N/DsDUj6ax30+P5/Zr/8iKvZenV6+lWWrJriy1ZFcG9N+AN95+z8m1CvTr15e+fVdn9MiHAFh11ZV54bkH+c5WuzJ16kftHF0ZK5Ov+8VKnVxrJQ0CbicbJjiAbCVvA9bptyaP/+P2+fs77nsod1x3Bcv06sn3t96S3156JTU1tcytmcvLr77BIT/Zux2jtVJ55ZXX6bPqRvP3337zWbb4zs58/PEn7RhVBaiwtQVSX9A6EBgITM23/fKyDum0cy9i0NEnMe79iWy/10Hcdd+Dizy3X9/V2WqLTdnn0GM44MhfsO/uP2Lttfq2XbBWMjff9GeefPxe1l2nH+PeHcnhh+3f3iFVpgq7oKVyvVLZEYYFDJbsU9Qj4K1K1Hw1SU2f1bAvz9m/6JzQ7fzbW9xOqSTtuUpaR9Ijkl7J9zeUdHbKNs2sSkVd8VsZSD0scC3ZExXnAkTEWMDficys+SpsWCD1Ba2lIuJ5aYEeek3iNs2sCnkq1oKmSepHfkOBpB8DUxK3aWbVqEx6pMVKnVyPJZu3up6kScB7ZDcSmJk1j5PrAsZHxA6SugGdIuKLxO2ZWbWqsNtfU1/Qek/SMGBLYEbitsysikVdFL2Vg9TJdV3gYbLhgfckDZX0vcRtmlk1qrDZAkmTa0TMiogREbEPsDGwNPBYyjbNrEp5PdcFSdpW0pXAaKAr2e2wZmbNU2E916QXtCS9B4wBRgCnRcSXKdszsypWJkmzWKlnC2wUEZ8nbsPMOoCoLY+v+8VK9SSC0yPiD8CFkhb65yYiTkjRrplVMfdcAXgt/zkyUf1m1sGUcoqVpL8AuwEfRsQGedmywB1AX2AcMDAiPlF2//7lwC7ATOCwiBjdVBupHvNyX/5ybES8mKINM+tgSttzvQEYCgwvKDsTeCQiLpJ0Zr5/BrAzsHa+bQFclf9sVOrZApdKel3SbyStn7gtM6tmdc3YmhARjwPT6xXvCdyYv74R2KugfHhkngV6SVq5qTZSz3P9PrAd8BEwTNLLXs/VzFoiauqK3iQNljSyYBtcRBMrRsQUgPxn77x8FWBCwXkT87JGpZ4tQER8AFwh6T/A6cA5wAWp2zWzKtOMyQKFDzstgYaeatDkGEXqJxF8U9J5+ZMIhgJPA358qZk1WxusLTB13tf9/OeHeflEYLWC81YFJjdVWeox1+uBT4AdI2LbiLgqIj5s6k1mZgsp4ZjrItwLHJq/PhS4p6D8EGW2BD6bN3zQmGTDApI6A+9ExOWp2jCzjqPEU7FuI7setLykicC5wEXACElHAO+TPa0a4AGyaVhvk03FOryYNpIl14iolbScpMUj4qtU7ZhZB1HCG7Qi4oBFHNq+gXODbGW/Zkm+WDbwlKR7gfnrCkTEpYnbNbMqExX29L3UyXVyvnUCeiRuy8yqWJk8MbtoSZNrRPw6Zf1m1oE4uX4tn9va0MItP0jZrplVn6rpuUpaurE3FrmU4KkFr7sC+wIVNnJiZuWgapIr8CpZr7Pw7oR5+wGs3lTlETGqXtFTkvyYFzNrtqht6Eap8rXI5BoRqy3qWLHyJbzm6QRsCqzU2nrNrOOppp7rfJL2B9aKiN9KWpVsgYP6vdKGjOLr3u5csjUSj2hhrGbWgUVdZfVcm7z9VdJQ4PvAwXnRTODqIus/A+gfEWsCN5HNdZ3ZgjjNrIOLuuK3clDM2gLfjYijgdkAETEdWLzI+s+OiM8lfQ/4IdkCtVe1JFAz69giVPRWDopJrnMldSKfUiVpOYqfcVab/9wVuDoi7qH4xGxmNl819lz/DNwFrCDp18CTwO+LrH+SpGuAgcADkpYosk0zswXU1arorRw0eUErIoZLGgXskBftFxGvFFn/QGAn4JKI+DRfI/G0loVqZh1ZpV3QKvYOrc5kV/uDZvQ8I2Im8LeC/SlAk+sgmpnVV2nJtZjZAmcBtwF9yFbgvlXSL1MHZmZWKKL4rRwU03M9CBiQ90KRdCHZ/NXfpQzMzKxQpfVci0mu4+ud1wV4N004ZmYNK5cpVsVqbOGWy8jGWGcCr0p6MN/fkWzGgJlZm6ktk1kAxWqs5zpvRsCrwD8Kyp9NF46ZWcOqpucaEde1ZSBmZo2pujFXSf2AC4Fvka3JCkBErJMwLjOzBZTLLIBiFTNn9QbgerKVrXYGRgC3J4zJzGwhUaeit3JQTHJdKiIeBIiIdyLibLJVsszM2kxtXaeit3JQzFSsOZIEvCPpZ8AkoHfasMzMFlRpwwLFJNeTgO7ACWRjrz2Bn6YMysysvrpqmS0wT0Q8l7/8gq8XzDYza1NVMxVL0t008FjseSJinyQRmZk1oJqGBYa2WRQN2HXjn7dn89ZGbl9uu/YOwSpE1QwLRMQjbRmImVljymUWQLGKXc/VzKxdVdiogJOrmVWGqhkWqE/SEhExJ2UwZmaLUmmzBYp5EsHmkl4G3sr3N5L0p+SRmZkVqGvG1hRJ60oaU7B9LukXks6TNKmgfJeWxltMz/UKYDfg7wAR8ZIk3/5qZm0qKF3PNSLeAPoDSOpMdufp3cDhwGURcUlr2ygmuXaKiPHZHbDz1ba2YTOz5qhJNyywPfBOA3muVYqZ2zBB0uZASOos6RfAmyWLwMysCIGK3iQNljSyYBvcSNX7kz2EdZ7jJI2V9BdJy7Q03mKS6zHAycDqwFRgy7zMzKzNNGfMNSKGRcSmBduwhuqUtDiwB3BnXnQV0I9syGAKMKSl8RaztsCHZJndzKzdlHLMtcDOwOiImAow7yeApGuB+1tacTFPIriWBubvRkRj3Wwzs5IqZhZACxxAwZCApJUjYkq+uzdfP0uw2Yq5oPVwweuueYMTWtqgmVlL1Ja45yppKeCHwNEFxX+Q1J+sQzmu3rFmKWZY4I56Ad0EPNTSBs3MWqLUT2+JiJnAcvXKSrasaktuf10TWKNUAZiZFaMuzZhrMsWMuX7C12OunYDpwJkpgzIzq6+qFm7Jn521EdndCwB1EZW2ZK2ZVYNEF7SSaXSea55I746I2nxzYjWzdlEnFb2Vg2JuInhe0ibJIzEza0RtM7Zy0NgztLpERA3wPeAoSe8AXwIi69Q64ZpZmyn1bIHUGhtzfR7YBNirjWIxM1ukapotIICIeKeNYjEzW6RKu+DTWHJdQdLJizoYEZcmiMfMrEHVNCzQGegOFdYXN7OqVGlTsRpLrlMi4vw2i8TMrBG1FdbNa3LM1cysHFRTz3X7NovCzKwJVZNcI2J6WwZiZtaYCnuydotWxTIza3NV03M1Mysn5XJba7GcXM2sIlTTPFczs7LhYQEzswScXM3MEqimtQXMzMqGx1zNzBLwbAEzswTqKmxgwMnVzCqCL2iZmSVQWf1WJ1czqxDuuZqZJVCjyuq7OrmaWUWorNTq5GpmFcLDAmZmCXgqlplZApWVWp1czaxCeFjAzCyB2hL3XSWNA74gu7O2JiI2lbQscAfQFxgHDIyIT1pSf6fShGlmllZdM7Zm+H5E9I+ITfP9M4FHImJt4JF8v0WcXM2sIkQz/muFPYEb89c3Anu1tCInVzOrCM3puUoaLGlkwTa4gSoD+LekUQXHV4yIKQD5z94tjddjru1ksSUWY8hfL2GxxRejc+fOPPHAE9x06c3036o/R511JJ06iVlfzuaSUy5h8rgp7R2utUL3fiuz5dXHz9/vtkZvXr34r4y/8wm2vPp4llptBWZO+Ihnj76CuZ/NbMdIy1tzpmJFxDBgWBOnbRURkyX1Bh6S9Hpr4qvPybWdzJ0zl9N/cgazZ86mc5fOXPa3Ibzwn5Gc8NvjOPeIXzPh7QnsfshuHHjCgVxy8pD2DtdaYcY7U3j4h7/KdjqJ3V4cyuR/jmS94/bgwydf5Y2h97Hucbuz3nF78PKFt7dvsGWs1FOxImJy/vNDSXcDmwNTJa0cEVMkrQx82NL6PSzQjmbPnA1Aly5d6NylC0QQAd26LwVAtx7d+Hjqx+0ZopXYiltvwIxxHzJz4jT6/GgTxo94AoDxI56gz04D2jm68lZDFL01RVI3ST3mvQZ2BF4B7gUOzU87FLinpfEm7blKWjMi3muqrKPq1KkTf37gT/Tp24d7b7yP18e8wWWnX8YFw3/DnNlzmPnFTE7c86T2DtNKaNU9t2TC358GYIkVejL7w08BmP3hpyyxfM/2DK3stfJCVX0rAndLgiwP3hoR/5L0AjBC0hHA+8B+LW0gdc/1rgbK/rqokwsHoSfOmJAwrPJQV1fHMTsdy4GbH8S6/del77prsM+R+3D2If/DoM0P5t8jHuLocxoah7dKpMU60+dHA5h433PtHUpFKuVUrIh4NyI2yrf1I+LCvPzjiNg+ItbOf05vabxJkquk9STtC/SUtE/BdhjQdVHvi4hhEbFpRGy6avfVUoRWlr78/EvGPjOWzbbbjLW+tSavj3kDgEfve4xvDfhmO0dnpbLSD/rz6cvjmDPtcwDmfPQZXXv3AqBr717MmfZZe4ZX9tpoKlbJpOq5rgvsBvQCdi/YNgGOStRmRem5bE+6Ld0NgMW7Ls7GW2/M+2+/T7ce3VhlzVUAGLD1Jrz/dvX34DuK1ff6Du/f/fT8/cn/Hs0aA7cGYI2BWzP5wdHtFVpFSHQTQTJJxlwj4h7gHknfiYhnUrRR6ZbtvSynXXYKnTp3plMn8dh9j/PcI8/zxzMu55xhZ1NXF8z4bAZDTr20vUO1Eui85OL03mYDRp1+3fyyN4bex5bXHE/fA7Zj1qRpPDP4inaMsPzVRnn0SIulSBiwpFWBPwFbkc2keBI4MSImNvXeHVfbqbL+JK1FBtcs194hWBv68ZRb1NL3HrjG3kXnhFvH393idkol9QWt68mmNvQBVgHuy8vMzJrFY64L6h0R10dETb7dAKyQuE0zq0KVNuaaOrl+JOkgSZ3z7SDAs+LNrNnqiKK3cpA6uf4UGAh8AEwBfpyXmZk1S6UNCyS9Qysi3gf2SNmGmXUMlTZbIElylXROI4cjIn6Tol0zq17l8nW/WKl6rl82UNYNOAJYDnByNbNmKZcLVcVKdRPB/DXy8pVnTgQOB24HvH6emTVbuYylFivZmGv+oK+TgUFkj0vYpKUP+jIz87AAIOliYB+ylcC/HREzUrRjZh1HyrtJU0jVcz0FmAOcDZyVr5kIILILWksnatfMqlSpH62dWqoxVz/hwMxKysMCZmYJeFjAzCwB91zNzBLwVCwzswR8+6uZWQIeFjAzS8DJ1cwsAc8WMDNLwD1XM7MEPFvAzCyB2qisRQedXM2sInjM1cwsAY+5mpkl4DFXM7ME6jwsYGZWeu65mpkl4NkCZmYJVNqwgJ8YYGYVIZrxX1MkrSbpP5Jek/SqpBPz8vMkTZI0Jt92aWm87rmaWUUocc+1BjglIkZL6gGMkvRQfuyyiLiktQ04uZpZRSjlBa2ImAJMyV9/Iek1YJWSNYCHBcysQtRGbdGbpMGSRhZsgxdVr6S+wMbAc3nRcZLGSvqLpGVaGq+Tq5lVhIhozjYsIjYt2IY1VKek7sBdwC8i4nPgKqAf0J+sZzukpfF6WMDMKkKpb3+VtBhZYr0lIv4GEBFTC45fC9zf0vqdXM2sIpRy4RZJAq4DXouISwvKV87HYwH2Bl5paRtOrmZWEUo8W2Ar4GDgZUlj8rJfAQdI6g8EMA44uqUNOLmaWUUo8WyBJwE1cOiBUrXh5GpmFcG3v5qZJeDFss3MEqi0tQWcXM2sIrjnamaWgB/zYmaWgHuuZmYJeLaAmVkCvqBlZpaAhwXMzBLwAwrNzBJwz9XMLIFKG3NVpf1rUO0kDV7Uwr5WPfx7rn5+EkH5WeTjKKyq+Pdc5ZxczcwScHI1M0vAybX8eByuY/Dvucr5gpaZWQLuuZqZJeDkamaWgJNrmZG0h6QzS1TXjFLUYy0jKSQNKdg/VdJ5TbxnL0nfSh6cJefk2g4kLfLOuIi4NyIuast4LJk5wD6Slm/Ge/YCnFyrgJNrK0jqJukfkl6S9Iqkn0gaN+8vk6RNJT2avz5P0jBJ/waGS3pO0voFdT0qaYCkwyQNldQzr6tTfnwpSRMkLSapn6R/SRol6QlJ6+XnrCnpGUkvSPpN2/+JWD01ZLMCTqp/QNIakh6RNDb/ubqk7wJ7ABdLGiOpX1sHbKXj5No6OwGTI2KjiNgA+FcT5w8A9oyIA4HbgYEAklYG+kTEqHknRsRnwEvAtnnR7sCDETGX7C/s8RExADgVuDI/53LgqojYDPigFB/QWu3PwCBJPeuVDwWGR8SGwC3AFRHxNHAvcFpE9I+Id9o4VishJ9fWeRnYQdLvJW2dJ8TG3BsRs/LXI4D98tcDgTsbOP8O4Cf56/2BOyR1B74L3ClpDHANsHJ+zlbAbfnrm5r9aazkIuJzYDhwQr1D3wFuzV/fBHyvLeOy9LwqVitExJuSBgC7AL/Lv/LX8PU/Wl3rveXLgvdOkvSxpA3JEujRDTRxb17vsmS93v8DugGfRkT/RYXV4g9kqfwRGA1c38g5/r1VGfdcW0FSH2BmRNwMXAJsAowjS4QA+zZRxe3A6UDPiHi5/sGImAE8T/Z1//6IqM17Qu9J2i+PQZI2yt/yFFkPF2BQiz+YlVRETCf7pnJEQfHTLPi7ejJ//QXQo+2is1ScXFvn28Dz+dfzs4ALgF8Dl0t6Aqht4v1/JfsLNqKRc+4ADsp/zjMIOELSS8CrwJ55+YnAsZJeAOqP8Vn7GgIUzho4AThc0ljgYLLfHWT/4J4m6UVf0Kpsvv3VzCwB91zNzBJwcjUzS8DJ1cwsASdXM7MEnFzNzBJwcu2AJNXm966/IulOSUu1oq7tJN2fv250RS9JvST9vAVtnCfp1GLL651zg6QfN6OtvpJeaW6MZvU5uXZMs/J71zcAvgJ+VngwvzGh2f9vFLGiVy+g2cnVrBI5udoTwDfyHttrkq4ku1VzNUk75qtsjc57uN0BJO0k6XVJTwL7zKto3ope+esVJd2drxj2Ur7i00VAv7zXfHF+3mn5Kl5jJf26oK6zJL0h6WFg3aY+hKSj8npeknRXvd74DvnqYW9K2i0/v7Okiwvabuj2Y7MWc3LtwJStK7sz2QI0kCWx4RGxMdk6CGcDO0TEJsBI4GRJXYFryVbp2hpYaRHVXwE8FhEbkd0W/CpwJvBO3ms+TdKOwNrA5kB/YICkbfL1GvYHNiZL3psV8XH+FhGb5e29xoK3mvYlW11sV+Dq/DMcAXyWryC2GXCUpDWLaMesKF64pWNaMr9lF7Ke63VAH2B8RDybl29JtmjzU5IAFgeeAdYD3ouItwAk3QwMbqCNHwCHAERELfCZpGXqnbNjvr2Y73cnS7Y9gLsjYmbexr1FfKYNJF1ANvTQHXiw4NiIiKgD3pL0bv4ZdgQ2LBiP7Zm3/WYRbZk1ycm1Y5pVf1WtPIF+WVgEPBQRB9Q7rz+lW8FJwO8i4pp6bfyiBW3cAOwVES9JOgzYruBY/boib/v4iChMwkjq28x2zRrkYQFblGeBrSR9A+Y/CWEd4HVgzYJFRQ5YxPsfAY7J39tZ0tIsvOLTg8BPC8ZyV5HUG3gc2FvSkpJ6kA1BNKUHMEXSYiy8Ith+kjrlMa8FvJG3fUx+PpLWkdStiHbMiuKeqzUoIj7Ke4C3SVoiLz47X8N2MPAPSdPIlsrboIEqTgSGSTqCbHWwYyLiGUlP5VOd/pmPu34TeCbvOc8ADoqI0ZLuAMYA48mGLpryP8Bz+fkvs2ASfwN4DFgR+FlEzJb0v2RjsaOVNf4R2fOrzErCq2KZmSXgYQEzswScXM3MEnByNTNLwMnVzCwBJ1czswScXM3MEnByNTNL4P8BXrdhPX7mma4AAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.8059701492537313\n" + ] + } + ], + "source": [ + "plt.figure(figsize=(5.5,4))\n", + "sns.heatmap(cm_df, annot=True, fmt=\"d\")\n", + "plt.title('Neural network'.format(accuracy_score(y_test, y_final)))\n", + "plt.ylabel('True label')\n", + "plt.xlabel('Predicted label')\n", + "plt.savefig('plot_NN.png', dpi=500, bbox_inches='tight')\n", + "plt.show()\n", + "print(\"Accuracy:\",accuracy_score(y_test, y_final))" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.5816522574447647" + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.metrics import cohen_kappa_score\n", + "cohen_kappa_score(y_test, y_final)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}