From f8b1a98a8b5b9c1d2c16a9dc88916e85e316cf97 Mon Sep 17 00:00:00 2001 From: A Farzat Date: Mon, 3 Nov 2025 19:42:41 +0300 Subject: Add a blog post about the disaster tweets project --- content/blog/csca5642-w4/index.md | 73 + content/blog/csca5642-w4/notebook.html | 9537 ++++++++++++++++++++++++++++++++ 2 files changed, 9610 insertions(+) create mode 100644 content/blog/csca5642-w4/index.md create mode 100644 content/blog/csca5642-w4/notebook.html diff --git a/content/blog/csca5642-w4/index.md b/content/blog/csca5642-w4/index.md new file mode 100644 index 0000000..52175b9 --- /dev/null +++ b/content/blog/csca5642-w4/index.md @@ -0,0 +1,73 @@ ++++ +title = "🌪️ Classifying Disaster Tweets with Deep Learning" +description = "A comparative study of GRU and LSTM architectures for binary classification of disaster-related tweets." +date = 2025-11-03 +[taxonomies] +tags = ["machine_learning"] +[extra] +styles = ["notebooks.css", ] ++++ + +## Project Overview + +Social media platforms like Twitter often serve as early indicators of +real-world events. This project explores how machine learning can be used to +automatically classify tweets as either disaster-related or not—an application +with potential value for emergency response teams, journalists, and public +safety organizations. + +The dataset comes from the Kaggle NLP with Disaster Tweets competition, and +includes over 7,600 labeled tweets. The challenge lies in the ambiguity of +language: disaster-related terms are often used metaphorically, making +classification non-trivial. + +## Approach + +The analysis began with a baseline model using metadata features (like location +and keywords), followed by a more advanced pipeline using text-based features +and pretrained GloVe embeddings. The text was cleaned and tokenized to align +with GloVe’s training conventions, and padded for input into recurrent neural +networks. + +Two types of RNN architectures were explored: + +* **LSTM (Long Short-Term Memory)** +* **GRU (Gated Recurrent Unit)** + +Each was tested across multiple configurations, varying the number of layers, +units, and dropout rates. Early stopping and custom callbacks were used to +optimize for F1-score, which was chosen as the primary evaluation metric. + +## Key Findings + +* **GRU outperformed LSTM**, achieving an F1-score of **0.857** and an accuracy +of **88.2%**. +* **Dropout** helped reduce overfitting but significantly increased training +time due to limitations in GPU optimization. +* **Model performance fluctuated**, suggesting that randomness and training +dynamics played a larger role than expected. +* **Metadata alone** (via Random Forest) was insufficient, with an F1-score of +just **0.617**. + +## Reflections + +While GRU emerged as the best model, the results showed sensitivity to +hyperparameters and training conditions. Dropout and early stopping helped +mitigate overfitting, but the trade-offs in training time and reproducibility +were notable. + +Future improvements could include: + +* Exploring alternative regularization techniques +* Leveraging attention mechanisms or transformer-based models +* Incorporating tweet length and other linguistic features more explicitly + +*** + +If you're curious about the details, the full notebook is embedded below 👇 + + + + +You can also view the notebook in [a separate page](notebook.html), or check it +on [GitHub](https://github.com/Farzat07/Kaggle-Mini-Project-NLP-Disaster-Tweets). diff --git a/content/blog/csca5642-w4/notebook.html b/content/blog/csca5642-w4/notebook.html new file mode 100644 index 0000000..9c6a455 --- /dev/null +++ b/content/blog/csca5642-w4/notebook.html @@ -0,0 +1,9537 @@ + + + + + +cours3w4submission + + + + + + + + + + + + +
+ + + + + + + + + + + + + + + +# This cell can be left without running if the checkpoint files are available +class F1ScoreCallback(tf.keras.callbacks.Callback): + def __init__(self, validation_data, patience=5): + super().__init__() + self.X_val, self.y_val = validation_data + self.patience = patience + self.best_f1 = 0 + self.best_weights = None + self.wait = 0 + + def on_epoch_end(self, epoch, logs=None): + y_pred = self.model.predict(self.X_val, verbose=0) + y_pred_binary = np.round(y_pred).flatten() + f1 = f1_score(self.y_val.flatten(), y_pred_binary) + + logs['val_f1'] = f1 # This adds val_f1 to the logs + + print(" - val_f1: %.4f" % f1, end='') + + # Early stopping logic based on F1 + if f1 > self.best_f1: + self.best_f1 = f1 + self.best_weights = self.model.get_weights() + self.wait = 0 + else: + self.wait += 1 + if self.wait >= self.patience: + self.model.stop_training = True + self.model.set_weights(self.best_weights) + +for i, model in enumerate(rnn_models): + f1_callback = F1ScoreCallback(validation_data=(X_cv_rnn, y_cv_rnn), patience=10) + model_checkpoint = ModelCheckpoint(model["checkpoint"], save_best_only=True, monitor='val_f1', mode='max') + hist_logger = CSVLogger(model["history_file"]) + print("Training model number", i + 1, "of", len(rnn_models)) + print(model["name"]) + model["instance"].fit( + X_train_rnn, y_train_rnn, + batch_size=32, epochs=50, + validation_data=(X_cv_rnn, y_cv_rnn), + callbacks=[f1_callback, model_checkpoint, hist_logger], + verbose=1 + ) + + + + + + +
+ + -- cgit v1.2.3-70-g09d2