From b358c276ff49c76ab945ffc51760e51328f574b1 Mon Sep 17 00:00:00 2001
From: A Farzat <a@farzat.xyz>
Date: Fri, 31 Oct 2025 22:32:50 +0300
Subject: Improve the content of the blog post

---
 content/blog/csca5622-final/index.md | 65 ++++++++++++++++++++++++++++++++----
 1 file changed, 58 insertions(+), 7 deletions(-)

(limited to 'content/blog/csca5622-final/index.md')
diff --git a/content/blog/csca5622-final/index.md b/content/blog/csca5622-final/index.md
index f3747e6..d4115b7 100644
--- a/content/blog/csca5622-final/index.md
+++ b/content/blog/csca5622-final/index.md
@@ -1,6 +1,6 @@
 +++
-title = "Spam Email Classification (non-DL)"
-description = "Comparing different machine learning algorithms on the Spam Email Classification problem (deep learning not included)."
+title = "📧 Is This Spam? Testing Email Classification Models"
+description = "Exploring which machine learning models best detect spam emails—and why ensemble methods like AdaBoost and Random Forest come out on top."
 date = 2025-10-22
 [taxonomies]
 tags = ["machine_learning"]
@@ -8,13 +8,64 @@ tags = ["machine_learning"]
 styles = ["notebooks.css", ]
 +++
 
-This is a small research I made on the performance of different machine learning
-models when classifying spam email. The focus is on supervised models, but without
-including deep learning models.
+Spam filters are something we rely on every day, often without thinking about
+how they work. In this project, I explored how different machine learning
+models perform when tasked with identifying spam emails using a dataset from
+the UCI Machine Learning Repository.
 
-You can also view the notebook as [a separate page](notebook.html).
+## About the Dataset
+
+The dataset includes over 4,600 emails, each described by 57 features. These
+features capture things like how often certain words or characters appear
+(e.g., “free”, “$”, “!”), and how long sequences of capital letters are. Each
+email is labeled as either spam or not spam.
+
+Some features are surprisingly specific—like the presence of the word “george”
+or the area code “650”—which turned out to be strong indicators of non-spam.
+These quirks reflect the personal nature of the original email sources.
+
+## What I Tried
+
+The goal was to test a few different models and see which one did the best job.
+I compared:
+
+* Logistic Regression
+* Random Forest
+* AdaBoost
+* Support Vector Machines (SVMs)
+
+Each model was tuned to find the best settings, and then evaluated based on
+accuracy, precision, and recall.
+
+## What Worked Best
+
+The ensemble models—Random Forest and AdaBoost—stood out. They consistently
+delivered high accuracy and precision, outperforming the benchmarks published
+on UCI’s website.
+
+Logistic Regression also did well, especially when regularization was used to
+handle overlapping features. SVMs, on the other hand, didn’t perform as
+strongly. Interestingly, the simpler LinearSVC model did better than the more
+complex RBF kernel version.
+
+## Why Precision Matters
+
+In spam detection, false positives (marking a legitimate email as spam) are
+worse than false negatives. So precision is more important than raw accuracy.
+Fortunately, the best-performing models had strong precision scores, especially
+the ensemble ones.
+
+## Final Thoughts
+
+This project was a great way to see how different models handle a real-world
+classification task. While the results were solid, there’s still room to
+improve—especially when it comes to minimizing false positives. Adjusting
+thresholds or tweaking model weights could help push precision even higher.
+
+The full notebook with code and visualizations is embedded below 👇
 
 <!-- markdownlint-disable MD033 -->
 <iframe title="Spam Email Classification notebook" class="notebook-embed" src="notebook.html"></iframe>
 
-You can also check it on [GitHub](https://github.com/Farzat07/introduction-to-machine-learning-supervised-learning-final-assignment).
+You can also view the notebook in [a separate page](notebook.html), or check it
+on [GitHub](https://github.com/Farzat07/introduction-to-machine-learning-supervised-learning-final-assignment).
-- 
cgit v1.2.3-70-g09d2