diff --git a/.gitignore b/.gitignore
index 46fe01e..2807f81 100644
--- a/.gitignore
+++ b/.gitignore
@@ -103,3 +103,6 @@ venv.bak/
# mypy
.mypy_cache/
+
+# vscode stuff
+.vscode/
\ No newline at end of file
diff --git a/chapters/1 Introduction/Example 1 Email Spam .ipynb b/chapters/1 Introduction/Example 1 Email Spam .ipynb
new file mode 100644
index 0000000..691911d
--- /dev/null
+++ b/chapters/1 Introduction/Example 1 Email Spam .ipynb
@@ -0,0 +1,415 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " test | \n",
+ " word_freq_make | \n",
+ " word_freq_address | \n",
+ " word_freq_all | \n",
+ " word_freq_3d | \n",
+ " word_freq_our | \n",
+ " word_freq_over | \n",
+ " word_freq_remove | \n",
+ " word_freq_internet | \n",
+ " word_freq_order | \n",
+ " ... | \n",
+ " char_freq_; | \n",
+ " char_freq_( | \n",
+ " char_freq_[ | \n",
+ " char_freq_! | \n",
+ " char_freq_$ | \n",
+ " char_freq_# | \n",
+ " capital_run_length_average | \n",
+ " capital_run_length_longest | \n",
+ " capital_run_length_total | \n",
+ " spam | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0.00 | \n",
+ " 0.64 | \n",
+ " 0.64 | \n",
+ " 0.0 | \n",
+ " 0.32 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " ... | \n",
+ " 0.00 | \n",
+ " 0.000 | \n",
+ " 0.0 | \n",
+ " 0.778 | \n",
+ " 0.000 | \n",
+ " 0.000 | \n",
+ " 3.756 | \n",
+ " 61 | \n",
+ " 278 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0.21 | \n",
+ " 0.28 | \n",
+ " 0.50 | \n",
+ " 0.0 | \n",
+ " 0.14 | \n",
+ " 0.28 | \n",
+ " 0.21 | \n",
+ " 0.07 | \n",
+ " 0.00 | \n",
+ " ... | \n",
+ " 0.00 | \n",
+ " 0.132 | \n",
+ " 0.0 | \n",
+ " 0.372 | \n",
+ " 0.180 | \n",
+ " 0.048 | \n",
+ " 5.114 | \n",
+ " 101 | \n",
+ " 1028 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 0.06 | \n",
+ " 0.00 | \n",
+ " 0.71 | \n",
+ " 0.0 | \n",
+ " 1.23 | \n",
+ " 0.19 | \n",
+ " 0.19 | \n",
+ " 0.12 | \n",
+ " 0.64 | \n",
+ " ... | \n",
+ " 0.01 | \n",
+ " 0.143 | \n",
+ " 0.0 | \n",
+ " 0.276 | \n",
+ " 0.184 | \n",
+ " 0.010 | \n",
+ " 9.821 | \n",
+ " 485 | \n",
+ " 2259 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 0 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 0.63 | \n",
+ " 0.00 | \n",
+ " 0.31 | \n",
+ " 0.63 | \n",
+ " 0.31 | \n",
+ " ... | \n",
+ " 0.00 | \n",
+ " 0.137 | \n",
+ " 0.0 | \n",
+ " 0.137 | \n",
+ " 0.000 | \n",
+ " 0.000 | \n",
+ " 3.537 | \n",
+ " 40 | \n",
+ " 191 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 0 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 0.63 | \n",
+ " 0.00 | \n",
+ " 0.31 | \n",
+ " 0.63 | \n",
+ " 0.31 | \n",
+ " ... | \n",
+ " 0.00 | \n",
+ " 0.135 | \n",
+ " 0.0 | \n",
+ " 0.135 | \n",
+ " 0.000 | \n",
+ " 0.000 | \n",
+ " 3.537 | \n",
+ " 40 | \n",
+ " 191 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 59 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " test word_freq_make word_freq_address word_freq_all word_freq_3d \\\n",
+ "0 1 0.00 0.64 0.64 0.0 \n",
+ "1 0 0.21 0.28 0.50 0.0 \n",
+ "2 1 0.06 0.00 0.71 0.0 \n",
+ "3 0 0.00 0.00 0.00 0.0 \n",
+ "4 0 0.00 0.00 0.00 0.0 \n",
+ "\n",
+ " word_freq_our word_freq_over word_freq_remove word_freq_internet \\\n",
+ "0 0.32 0.00 0.00 0.00 \n",
+ "1 0.14 0.28 0.21 0.07 \n",
+ "2 1.23 0.19 0.19 0.12 \n",
+ "3 0.63 0.00 0.31 0.63 \n",
+ "4 0.63 0.00 0.31 0.63 \n",
+ "\n",
+ " word_freq_order ... char_freq_; char_freq_( char_freq_[ char_freq_! \\\n",
+ "0 0.00 ... 0.00 0.000 0.0 0.778 \n",
+ "1 0.00 ... 0.00 0.132 0.0 0.372 \n",
+ "2 0.64 ... 0.01 0.143 0.0 0.276 \n",
+ "3 0.31 ... 0.00 0.137 0.0 0.137 \n",
+ "4 0.31 ... 0.00 0.135 0.0 0.135 \n",
+ "\n",
+ " char_freq_$ char_freq_# capital_run_length_average \\\n",
+ "0 0.000 0.000 3.756 \n",
+ "1 0.180 0.048 5.114 \n",
+ "2 0.184 0.010 9.821 \n",
+ "3 0.000 0.000 3.537 \n",
+ "4 0.000 0.000 3.537 \n",
+ "\n",
+ " capital_run_length_longest capital_run_length_total spam \n",
+ "0 61 278 1 \n",
+ "1 101 1028 1 \n",
+ "2 485 2259 1 \n",
+ "3 40 191 1 \n",
+ "4 40 191 1 \n",
+ "\n",
+ "[5 rows x 59 columns]"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_spam = pd.read_csv(\"../../data/Spam.txt\")\n",
+ "df_spam.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Index(['test', 'word_freq_make', 'word_freq_address', 'word_freq_all',\n",
+ " 'word_freq_3d', 'word_freq_our', 'word_freq_over', 'word_freq_remove',\n",
+ " 'word_freq_internet', 'word_freq_order', 'word_freq_mail',\n",
+ " 'word_freq_receive', 'word_freq_will', 'word_freq_people',\n",
+ " 'word_freq_report', 'word_freq_addresses', 'word_freq_free',\n",
+ " 'word_freq_business', 'word_freq_email', 'word_freq_you',\n",
+ " 'word_freq_credit', 'word_freq_your', 'word_freq_font', 'word_freq_000',\n",
+ " 'word_freq_money', 'word_freq_hp', 'word_freq_hpl', 'word_freq_george',\n",
+ " 'word_freq_650', 'word_freq_lab', 'word_freq_labs', 'word_freq_telnet',\n",
+ " 'word_freq_857', 'word_freq_data', 'word_freq_415', 'word_freq_85',\n",
+ " 'word_freq_technology', 'word_freq_1999', 'word_freq_parts',\n",
+ " 'word_freq_pm', 'word_freq_direct', 'word_freq_cs', 'word_freq_meeting',\n",
+ " 'word_freq_original', 'word_freq_project', 'word_freq_re',\n",
+ " 'word_freq_edu', 'word_freq_table', 'word_freq_conference',\n",
+ " 'char_freq_;', 'char_freq_(', 'char_freq_[', 'char_freq_!',\n",
+ " 'char_freq_$', 'char_freq_#', 'capital_run_length_average',\n",
+ " 'capital_run_length_longest', 'capital_run_length_total', 'spam'],\n",
+ " dtype='object')\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(df_spam.columns)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['make', 'address', 'all', '3d', 'our', 'over', 'remove', 'internet', 'order', 'mail', 'receive', 'will', 'people', 'report', 'addresses', 'free', 'business', 'email', 'you', 'credit', 'your', 'font', '000', 'money', 'hp', 'hpl', 'george', '650', 'lab', 'labs', 'telnet', '857', 'data', '415', '85', 'technology', '1999', 'parts', 'pm', 'direct', 'cs', 'meeting', 'original', 'project', 're', 'edu', 'table', 'conference']\n"
+ ]
+ }
+ ],
+ "source": [
+ "words_feature = [word for word in df_spam.columns if 'word_freq' in word]\n",
+ "words = [word[10:] for word in words_feature]\n",
+ "# print(words_feature)\n",
+ "print(words)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " george | \n",
+ " you | \n",
+ " your | \n",
+ " hp | \n",
+ " free | \n",
+ " hpl | \n",
+ " our | \n",
+ " re | \n",
+ " edu | \n",
+ " remove | \n",
+ " 000 | \n",
+ " business | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " spam | \n",
+ " 0.00 | \n",
+ " 2.26 | \n",
+ " 1.38 | \n",
+ " 0.02 | \n",
+ " 0.52 | \n",
+ " 0.01 | \n",
+ " 0.51 | \n",
+ " 0.13 | \n",
+ " 0.01 | \n",
+ " 0.28 | \n",
+ " 0.25 | \n",
+ " 0.29 | \n",
+ "
\n",
+ " \n",
+ " email | \n",
+ " 1.27 | \n",
+ " 1.27 | \n",
+ " 0.44 | \n",
+ " 0.90 | \n",
+ " 0.07 | \n",
+ " 0.43 | \n",
+ " 0.18 | \n",
+ " 0.42 | \n",
+ " 0.29 | \n",
+ " 0.01 | \n",
+ " 0.01 | \n",
+ " 0.05 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " george you your hp free hpl our re edu remove 000 business\n",
+ "spam 0.00 2.26 1.38 0.02 0.52 0.01 0.51 0.13 0.01 0.28 0.25 0.29\n",
+ "email 1.27 1.27 0.44 0.90 0.07 0.43 0.18 0.42 0.29 0.01 0.01 0.05"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "temp = df_spam[words_feature + ['spam']].groupby('spam').mean().sort_index(ascending=False)\n",
+ "temp.columns = words\n",
+ "temp.index = ['spam','email']\n",
+ "with pd.option_context(\"float_format\",'{:,.2f}'.format):\n",
+ " display(temp[(temp.iloc[0] - temp.iloc[1]).abs().sort_values(ascending=False).index[:12]])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "interpreter": {
+ "hash": "58dd21b98941f49f3ff1e43b315fad01188084135da91d73015e8a9aac331f94"
+ },
+ "kernelspec": {
+ "display_name": "Python 3.9.5 64-bit ('env_esl': virtualenvwrapper)",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.5"
+ },
+ "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
\ No newline at end of file