From 6a79fad795cf527e4263494979c1dc1fd483afec Mon Sep 17 00:00:00 2001 From: A Farzat Date: Sun, 2 Nov 2025 18:29:23 +0300 Subject: Add the cancer detection project --- content/blog/csca5642-w3/index.md | 68 + content/blog/csca5642-w3/notebook.html | 9244 ++++++++++++++++++++++++++++++++ 2 files changed, 9312 insertions(+) create mode 100644 content/blog/csca5642-w3/index.md create mode 100644 content/blog/csca5642-w3/notebook.html (limited to 'content/blog') diff --git a/content/blog/csca5642-w3/index.md b/content/blog/csca5642-w3/index.md new file mode 100644 index 0000000..8b8aec2 --- /dev/null +++ b/content/blog/csca5642-w3/index.md @@ -0,0 +1,68 @@ ++++ +title = "🧬 Detecting Cancer in Histopathology Images with CNNs" +description = "A practical deep learning project for binary classification using the PatchCamelyon dataset." +date = 2025-11-02 +[taxonomies] +tags = ["machine_learning"] +[extra] +styles = ["notebooks.css", ] ++++ + +## Overview + +This project explores the use of convolutional neural networks (CNNs) to detect +metastatic cancer in histopathologic images of lymph node tissue. The task is +framed as a binary classification problem, distinguishing between cancerous and +non-cancerous image patches. + +The dataset, sourced from the PatchCamelyon (PCam) benchmark, offers a +realistic simulation of the challenges faced by pathologists. With over 220,000 +labeled 96x96 RGB image patches, it strikes a balance between complexity and +computational feasibility—making it ideal for experimentation on a single GPU. + +## Approach + +The workflow began with a thorough exploratory data analysis to understand the +dataset’s structure, class distribution, and pixel intensity characteristics. +Data augmentation and normalization were applied to improve generalization and +training efficiency. + +A flexible CNN builder was implemented to test different architectures—ranging +from simple to deeper and wider networks. After identifying the best-performing +architecture, various regularization techniques were evaluated, including L1/L2 +penalties, dropout, and batch normalization. + +To ensure fair comparisons and mitigate overfitting, training was supported by +callbacks such as early stopping, learning rate scheduling, and model +checkpointing. + +## Results + +The deeper CNN architecture consistently outperformed the others, achieving a +validation AUC of **0.9331**. Among regularization strategies, **additional +batch normalization** provided the best boost in performance, pushing the final +model’s validation AUC to **0.9878** when trained on the full dataset. + +The final model demonstrated strong generalization, with balanced precision and +recall across both classes. Predictions on the test set were generated and +compiled into a submission-ready format. + +## Reflections + +While the performance metrics are promising, the project also highlighted some +challenges—particularly the variability in validation scores during early +training. This variability diminished with larger datasets and longer training, +suggesting that data volume plays a key role in stabilizing model performance. + +Future work could explore more advanced architectures, ensemble methods, or +semi-supervised learning to further improve robustness and accuracy. + +*** + +If you're curious about the details, the full notebook is embedded below 👇 + + + + +You can also view the notebook in [a separate page](notebook.html), or check it +on [GitHub](https://github.com/Farzat07/Kaggle-Mini-Project-CNN-Cancer-Detection). diff --git a/content/blog/csca5642-w3/notebook.html b/content/blog/csca5642-w3/notebook.html new file mode 100644 index 0000000..f4d23e7 --- /dev/null +++ b/content/blog/csca5642-w3/notebook.html @@ -0,0 +1,9244 @@ + + + + + +cours3w3submission + + + + + + + + + + + + +
+ + + + +# Train each model architecture +# This cell can be left without running if the checkpoint files are available +for model in arch_models: + print("\nTraining %s..." % model["name"]) + + model_instance = create_cnn_model(**model["params"]) + + # Callbacks + early_stopping = EarlyStopping(monitor='val_auc', patience=5, restore_best_weights=True, mode='max') + reduce_lr = ReduceLROnPlateau(monitor='val_auc', factor=.5, patience=3, min_lr=1e-7, mode='max') + model_checkpoint = ModelCheckpoint(model["checkpoint"], save_best_only=True, monitor='val_auc', mode='max') + hist_logger = CSVLogger(model["history_file"]) + + # Reset generators to ensure consistent training across models + train_generator.reset() + val_generator.reset() + + model_instance.fit( + train_generator, + epochs=15, # Fewer epochs for tuning + validation_data=val_generator, + callbacks=[early_stopping, reduce_lr, model_checkpoint, hist_logger], + verbose=1 + )# Train each regularization model +# This cell can be left without running if the checkpoint files are available +for model in reg_models: + print("\nTraining %s..." % model["name"]) + + model_instance = create_cnn_model(**model["params"]) + + # Callbacks + early_stopping = EarlyStopping(monitor='val_auc', patience=5, restore_best_weights=True, mode='max') + reduce_lr = ReduceLROnPlateau(monitor='val_auc', factor=.5, patience=3, min_lr=1e-7, mode='max') + model_checkpoint = ModelCheckpoint(model["checkpoint"], save_best_only=True, monitor='val_auc', mode='max') + hist_logger = CSVLogger(model["history_file"]) + + # Reset generators to ensure consistent training + train_generator.reset() + val_generator.reset() + + model_instance.fit( + train_generator, + epochs=15, + validation_data=val_generator, + callbacks=[early_stopping, reduce_lr, model_checkpoint, hist_logger], + verbose=1 + )# This cell can be left without running if the checkpoint files are available +# Create the best regularized model +final_model = create_cnn_model(**best_reg_model["params"]) + +# Train the final model +final_model.fit( + full_train_generator, + epochs=EPOCHS, + validation_data=full_val_generator, + callbacks=[early_stopping, reduce_lr, final_checkpoint, final_csv_logger], + verbose=1 +) + +
+ + -- cgit v1.2.3-70-g09d2