diff --git a/ch06/01_main-chapter-code/ch06.ipynb b/ch06/01_main-chapter-code/ch06.ipynb
index 856d618..9299460 100644
--- a/ch06/01_main-chapter-code/ch06.ipynb
+++ b/ch06/01_main-chapter-code/ch06.ipynb
@@ -79,6 +79,28 @@
"
"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "946c3e56-b04b-4b0f-b35f-b485ce5b28df",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Utility to prevent certain cells from being executed twice\n",
+ "\n",
+ "from IPython.core.magic import register_line_cell_magic\n",
+ "\n",
+ "executed_cells = set()\n",
+ "\n",
+ "@register_line_cell_magic\n",
+ "def run_once(line, cell):\n",
+ " if line not in executed_cells:\n",
+ " get_ipython().run_cell(cell)\n",
+ " executed_cells.add(line)\n",
+ " else:\n",
+ " print(f\"Cell '{line}' has already been executed.\")"
+ ]
+ },
{
"cell_type": "markdown",
"id": "3a84cf35-b37f-4c15-8972-dfafc9fadc1c",
@@ -167,7 +189,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 3,
"id": "def7c09b-af9c-4216-90ce-5e67aed1065c",
"metadata": {
"colab": {
@@ -181,7 +203,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "File downloaded and saved as sms_spam_collection/SMSSpamCollection.tsv\n"
+ "sms_spam_collection/SMSSpamCollection.tsv already exists. Skipping download and extraction.\n"
]
}
],
@@ -230,7 +252,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 4,
"id": "da0ed4da-ac31-4e4d-8bdd-2153be4656a4",
"metadata": {
"colab": {
@@ -344,7 +366,7 @@
"[5572 rows x 2 columns]"
]
},
- "execution_count": 3,
+ "execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
@@ -368,7 +390,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 5,
"id": "495a5280-9d7c-41d4-9719-64ab99056d4c",
"metadata": {
"colab": {
@@ -406,7 +428,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 6,
"id": "7be4a0a2-9704-4a96-b38f-240339818688",
"metadata": {
"colab": {
@@ -428,6 +450,9 @@
}
],
"source": [
+ "%%run_once balance_df\n",
+ "\n",
+ "\n",
"def create_balanced_dataset(df):\n",
" \n",
" # Count the instances of \"spam\"\n",
@@ -441,6 +466,7 @@
"\n",
" return balanced_df\n",
"\n",
+ "\n",
"balanced_df = create_balanced_dataset(df)\n",
"print(balanced_df[\"Label\"].value_counts())"
]
@@ -457,14 +483,133 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 7,
"id": "c1b10c3d-5d57-42d0-8de8-cf80a06f5ffd",
"metadata": {
"id": "c1b10c3d-5d57-42d0-8de8-cf80a06f5ffd"
},
"outputs": [],
"source": [
- "balanced_df[\"Label\"] = balanced_df[\"Label\"].map({\"ham\": 0, \"spam\": 1})"
+ "%%run_once label_mapping\n",
+ "balanced_df[\"Label\"] = balanced_df[\"Label\"].map({\"ham\": 0, \"spam\": 1}) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "e6f7f062-ef4e-4020-8275-71990cab4414",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
| \n", + " | Label | \n", + "Text | \n", + "
|---|---|---|
| 4307 | \n", + "0 | \n", + "Awww dat is sweet! We can think of something t... | \n", + "
| 4138 | \n", + "0 | \n", + "Just got to <#> | \n", + "
| 4831 | \n", + "0 | \n", + "The word \"Checkmate\" in chess comes from the P... | \n", + "
| 4461 | \n", + "0 | \n", + "This is wishing you a great day. Moji told me ... | \n", + "
| 5440 | \n", + "0 | \n", + "Thank you. do you generally date the brothas? | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "
| 5537 | \n", + "1 | \n", + "Want explicit SEX in 30 secs? Ring 02073162414... | \n", + "
| 5540 | \n", + "1 | \n", + "ASKED 3MOBILE IF 0870 CHATLINES INCLU IN FREE ... | \n", + "
| 5547 | \n", + "1 | \n", + "Had your contract mobile 11 Mnths? Latest Moto... | \n", + "
| 5566 | \n", + "1 | \n", + "REMINDER FROM O2: To get 2.50 pounds free call... | \n", + "
| 5567 | \n", + "1 | \n", + "This is the 2nd time we have tried 2 contact u... | \n", + "
1494 rows × 2 columns
\n", + "