import cv2
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from scipy import stats
+import seaborn as sns
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import roc_auc_score, confusion_matrix, classification_report
+from sklearn.utils import shuffle
+
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras.models import load_model, Sequential
+from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
+from tensorflow.keras.preprocessing.image import ImageDataGenerator
+from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, CSVLogger
+from tensorflow.keras.regularizers import l1, l2, l1_l2
+
+import os
+from PIL import Image
+from tqdm import tqdm
+
+# Set random seeds for reproducibility
+np.random.seed(42)
+tf.random.set_seed(42)
+

2025-09-10 10:43:25.735520: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
+To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
+

# Configuration
+DATA_DIR = '/kaggle/input/histopathologic-cancer-detection' if 'KAGGLE_URL_BASE' in os.environ else 'data_3'
+TRAIN_DIR = os.path.join(DATA_DIR, 'train')
+TEST_DIR = os.path.join(DATA_DIR, 'test')
+
+IMG_SIZE = 96
+BATCH_SIZE = 64
+EPOCHS = 30
+

# Load and explore the data
+train_labels = pd.read_csv(os.path.join(DATA_DIR, 'train_labels.csv'))
+
+# Inspect the data
+display(train_labels.head())
+train_labels.info()
+
+# Add full paths to the dataframe
+train_labels['path'] = train_labels['id'].apply(lambda img_id: os.path.join(TRAIN_DIR, f"{img_id}.tif"))
+

<class 'pandas.core.frame.DataFrame'>
+RangeIndex: 220025 entries, 0 to 220024
+Data columns (total 2 columns):
+ #   Column  Non-Null Count   Dtype 
+---  ------  --------------   ----- 
+ 0   id      220025 non-null  object
+ 1   label   220025 non-null  int64 
+dtypes: int64(1), object(1)
+memory usage: 3.4+ MB
+

# Check for missing values
+print("Missing values in labels:", train_labels.isnull().sum().sum())
+print("Duplicate IDs:", train_labels['id'].duplicated().sum())
+

Missing values in labels: 0
+Duplicate IDs: 0
+

# Visualize class distribution
+plt.figure(figsize=(10, 5))
+plt.subplot(1, 2, 1)
+sns.countplot(x='label', data=train_labels)
+plt.title('Class Distribution')
+
+plt.subplot(1, 2, 2)
+plt.pie(train_labels['label'].value_counts(),
+        labels=['0', '1'],
+        autopct='%1.1f%%', colors=['lightblue', 'lightcoral'])
+plt.title('Class Proportions')
+plt.tight_layout()
+plt.show()
+

# Image metadata analysis
+def analyze_image_metadata(df, sample_size=1000):
+    """Analyze image dimensions, formats, and basic statistics"""
+    print("Analysis done on a sample of %d images" % sample_size)
+
+    # Sample images for analysis
+    sample_df = df.sample(min(sample_size, len(df)), random_state=42)
+
+    dimensions = []
+    formats = []
+    mean_intensities = []
+    std_intensities = []
+
+    for _, row in tqdm(sample_df.iterrows(), desc="Analyzing image metadata"):
+        img = Image.open(row['path'])
+        img_array = np.array(img)
+
+        dimensions.append(img_array.shape)
+        formats.append(img.format if img.format else 'Unknown')
+        mean_intensities.append(np.mean(img_array))
+        std_intensities.append(np.std(img_array))
+
+    # Analyze dimensions
+    unique_dims = set(dimensions)
+    print("Unique image dimensions:", unique_dims)
+    print("All images same size:", len(unique_dims) == 1)
+
+    # Analyze formats
+    print("All images have a format of %s:" % formats[0], all(map(lambda x: x == formats[0], formats)))
+
+    # Analyze intensity statistics
+    print("\nIntensity statistics:")
+    print("Mean intensity: %.2f ± %.2f" % (np.mean(mean_intensities), np.std(mean_intensities)))
+    print("Std intensity: %.2f ± %.2f" % (np.mean(std_intensities), np.std(std_intensities)))
+
+# Run metadata analysis
+analyze_image_metadata(train_labels)
+

Analysis done on a sample of 1000 images
+

Analyzing image metadata: 1000it [00:00, 1217.26it/s]

Unique image dimensions: {(96, 96, 3)}
+All images same size: True
+All images have a format of TIFF: True
+
+Intensity statistics:
+Mean intensity: 164.00 ± 38.53
+Std intensity: 51.66 ± 13.62
+

+

# Data leakage check - ensure no duplicate images between train and test
+overlap = set(train_labels['id']).intersection((os.path.splitext(f)[0] for f in os.listdir(TEST_DIR)))
+print("Number of overlapping files:", len(overlap))
+print("No leakage detected:", len(overlap) == 0)
+

Number of overlapping files: 0
+No leakage detected: True
+

# Sample some images from each class
+def visualize_samples(n_samples=5):
+    fig, axes = plt.subplots(2, n_samples, figsize=(15, 6))
+
+    for label in [0, 1]:
+        sample_df = train_labels[train_labels['label'] == label].sample(n_samples)
+
+        for i, (_, row) in enumerate(sample_df.iterrows()):
+            img = Image.open(row['path'])
+
+            axes[label, i].imshow(img)
+            axes[label, i].set_title("Label: %s" % label)
+            axes[label, i].axis('off')
+
+    plt.tight_layout()
+    plt.show()
+
+visualize_samples()
+

def analyze_pixel_intensities(df, sample_size=1000):
+    """Analyze pixel intensity distributions for each class"""
+    np.random.seed(42)
+
+    # Sample images from each class
+    samples_0 = df[df['label'] == 0].sample(sample_size // 2)
+    samples_1 = df[df['label'] == 1].sample(sample_size // 2)
+
+    pixels_0 = []
+    pixels_1 = []
+
+    # Process negative samples
+    for _, row in tqdm(samples_0.iterrows(), desc="Processing negative samples"):
+        img = Image.open(row['path'])
+        img_array = np.array(img)
+        pixels_0.extend(img_array.flatten())
+
+    # Process positive samples
+    for _, row in tqdm(samples_1.iterrows(), desc="Processing positive samples"):
+        img = Image.open(row['path'])
+        img_array = np.array(img)
+        pixels_1.extend(img_array.flatten())
+
+    return pixels_0, pixels_1
+
+# Analyze pixel intensities
+pixels_0, pixels_1 = analyze_pixel_intensities(train_labels, sample_size=1000)
+

Processing negative samples: 500it [00:01, 489.64it/s]
+Processing positive samples: 500it [00:01, 453.75it/s]
+

# Plot pixel intensity histograms
+plt.figure(figsize=(15, 10))
+
+plt.subplot(2, 2, 1)
+plt.hist(pixels_0, bins=50, alpha=0.7, label='0', color='blue', density=True)
+plt.hist(pixels_1, bins=50, alpha=0.7, label='1', color='red', density=True)
+plt.title('Pixel Intensity Distribution (All Channels)')
+plt.xlabel('Pixel Intensity')
+plt.ylabel('Density')
+plt.legend()
+
+plt.subplot(2, 2, 2)
+# Sample a smaller subset for individual channels
+sample_img = Image.open(train_labels.iloc[0]['path'])
+sample_array = np.array(sample_img)
+for i, color in enumerate(['Red', 'Green', 'Blue']):
+    plt.hist(sample_array[:, :, i].flatten(), bins=50, alpha=0.7, label=color, density=True)
+plt.title('Channel Intensity Distribution (Single Image)')
+plt.xlabel('Pixel Intensity')
+plt.ylabel('Density')
+plt.legend()
+
+plt.subplot(2, 2, 3)
+# Sample a smaller subset for individual channels
+sample_img = Image.open(train_labels.iloc[1]['path'])
+sample_array = np.array(sample_img)
+for i, color in enumerate(['Red', 'Green', 'Blue']):
+    plt.hist(sample_array[:, :, i].flatten(), bins=50, alpha=0.7, label=color, density=True)
+plt.title('Channel Intensity Distribution (Another Image)')
+plt.xlabel('Pixel Intensity')
+plt.ylabel('Density')
+plt.legend()
+
+plt.subplot(2, 2, 4)
+# Compare mean intensities
+means_0 = []
+means_1 = []
+
+for i in tqdm(range(200)):  # Smaller sample for performance
+    neg_img = Image.open(train_labels[train_labels['label'] == 0].iloc[i]['path'])
+    pos_img = Image.open(train_labels[train_labels['label'] == 1].iloc[i]['path'])
+
+    means_0.append(np.mean(np.array(neg_img)))
+    means_1.append(np.mean(np.array(pos_img)))
+
+plt.hist(means_0, bins=30, alpha=0.7, label='0', color='blue', density=True)
+plt.hist(means_1, bins=30, alpha=0.7, label='1', color='red', density=True)
+plt.title('Mean Image Intensity Distribution')
+plt.xlabel('Mean Pixel Intensity')
+plt.ylabel('Density')
+plt.legend()
+
+plt.tight_layout()
+plt.show()
+

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:04<00:00, 41.99it/s]
+

# Convert labels to strings for the generator
+train_labels['label_str'] = train_labels['label'].astype(str)
+
+# Prepare a subset for hyperparameter tuning
+tuning_subset = pd.concat([
+    train_labels[train_labels['label'] == 0].sample(5000, random_state=42),
+    train_labels[train_labels['label'] == 1].sample(5000, random_state=42)
+])
+
+# Split into train and validation
+train_df, val_df = train_test_split(
+    tuning_subset,
+    test_size=0.2,
+    stratify=tuning_subset['label'],
+    random_state=42
+)
+

# Prepare data generators - use class_mode='binary' with string labels
+datagen = ImageDataGenerator(
+    rescale=1./255,
+    rotation_range=20,
+    width_shift_range=0.1,
+    height_shift_range=0.1,
+    horizontal_flip=True,
+    vertical_flip=True,
+    zoom_range=0.1
+)
+
+# Create data generators using the path column and string labels
+train_generator = datagen.flow_from_dataframe(
+    dataframe=train_df,
+    x_col='path',
+    y_col='label_str',
+    target_size=(IMG_SIZE, IMG_SIZE),
+    batch_size=BATCH_SIZE,
+    class_mode='binary'
+)
+
+val_generator = datagen.flow_from_dataframe(
+    dataframe=val_df,
+    x_col='path',
+    y_col='label_str',
+    target_size=(IMG_SIZE, IMG_SIZE),
+    batch_size=BATCH_SIZE,
+    class_mode='binary',
+    shuffle=False
+)
+

Found 8000 validated image filenames belonging to 2 classes.
+Found 2000 validated image filenames belonging to 2 classes.
+

# Single flexible function to create CNN models with different architectures and regularization
+def create_cnn_model(architecture='deeper', regularization=None, reg_strength=.001,
+                     dropout_rate=.5, additional_dropout=False, additional_bn=False):
+    """
+    Create a CNN model with specified architecture and regularization.
+
+    Parameters:
+    - architecture: 'simple', 'deeper', or 'wider'
+    - regularization: None, 'l1', 'l2', 'l1_l2'
+    - reg_strength: regularization strength
+    - dropout_rate: dropout rate for dense layers
+    - additional_dropout: whether to add dropout after convolutional layers
+    - additional_bn: whether to add additional batch normalization layers
+
+    Returns:
+    - Compiled Keras model
+    """
+
+    # Define regularization function
+    if regularization == 'l1':
+        reg = l1(reg_strength)
+    elif regularization == 'l2':
+        reg = l2(reg_strength)
+    elif regularization == 'l1_l2':
+        reg = l1_l2(l1=reg_strength/2, l2=reg_strength/2)
+    else:  # 'none'
+        reg = None
+
+    # Define architecture parameters
+    if architecture == 'simple':
+        conv_layers = [
+            (32, 3),  # (filters, kernel_size)
+            (64, 3)
+        ]
+        dense_units = 64
+    elif architecture == 'deeper':
+        conv_layers = [
+            (32, 3),
+            (64, 3),
+            (128, 3)
+        ]
+        dense_units = 128
+    elif architecture == 'wider':
+        conv_layers = [
+            (64, 3),
+            (128, 3),
+            (256, 3)
+        ]
+        dense_units = 256
+    else:
+        raise ValueError("Architecture must be 'simple', 'deeper', or 'wider'")
+
+    # Build the model
+    model = Sequential()
+
+    # Input layer
+    model.add(Conv2D(conv_layers[0][0], (conv_layers[0][1], conv_layers[0][1]),
+                    activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3),
+                    kernel_regularizer=reg))
+    model.add(BatchNormalization())
+    model.add(MaxPooling2D((2, 2)))
+
+    if additional_dropout:
+        model.add(Dropout(0.2))
+    if additional_bn:
+        model.add(BatchNormalization())
+
+    # Additional convolutional layers
+    for filters, kernel_size in conv_layers[1:]:
+        model.add(Conv2D(filters, (kernel_size, kernel_size),
+                        activation='relu', kernel_regularizer=reg))
+        model.add(BatchNormalization())
+        model.add(MaxPooling2D((2, 2)))
+
+        if additional_dropout:
+            model.add(Dropout(0.2))
+        if additional_bn:
+            model.add(BatchNormalization())
+
+    # Dense layers
+    model.add(Flatten())
+    model.add(Dense(dense_units, activation='relu', kernel_regularizer=reg))
+
+    if additional_bn:
+        model.add(BatchNormalization())
+
+    model.add(Dropout(dropout_rate))
+    model.add(Dense(1, activation='sigmoid'))
+
+    # Compile the model
+    model.compile(optimizer='adam',
+                  loss='binary_crossentropy',
+                  metrics=['accuracy', tf.keras.metrics.AUC(name='auc')])
+
+    return model
+

# Test different model architectures
+arch_models = [
+    {
+        "name": "Simple CNN",
+        "params": { 'architecture': 'simple', },
+    },
+    {
+        "name": "Deeper CNN",
+        "params": { 'architecture': 'deeper', },
+    },
+    {
+        "name": "Wider CNN",
+        "params": { 'architecture': 'wider', },
+    },
+]
+
+# Create directories for checkpoints and histories
+os.makedirs("checkpoints", exist_ok=True)
+os.makedirs("histories", exist_ok=True)
+
+for model in arch_models:
+    model["checkpoint"] = os.path.join("checkpoints", "%s.keras" % model["name"])
+    model["history_file"] = os.path.join("histories", "%s.csv" % model["name"])
+

# Load and evaluate each model
+for model in arch_models:
+    model["instance"] = load_model(model["checkpoint"])
+
+    # Evaluate the model
+    val_generator.reset()
+    val_preds = model["instance"].predict(val_generator)
+    model["auc"] = roc_auc_score(val_generator.classes, val_preds)
+
+    model["history"] = pd.read_csv(model["history_file"], sep=',', engine='python')
+
+    print("%s Validation AUC: %.4f" % (model["name"], model["auc"]))
+
+arch_models_df = pd.DataFrame(arch_models)
+

WARNING: All log messages before absl::InitializeLog() is called are written to STDERR
+I0000 00:00:1757490264.572239 1667087 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3546 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1660 Ti, pci bus id: 0000:01:00.0, compute capability: 7.5
+/home/farzat/files/University/Colorado/courses/csca5632=unsupervised-algorithms-in-machine-learning/reviews/venv/lib/python3.13/site-packages/keras/src/trainers/data_adapters/py_dataset_adapter.py:121: UserWarning: Your `PyDataset` class should call `super().__init__(**kwargs)` in its constructor. `**kwargs` can include `workers`, `use_multiprocessing`, `max_queue_size`. Do not pass these arguments to `fit()`, as they will be ignored.
+  self._warn_if_super_not_called()
+2025-09-10 10:44:25.930818: I external/local_xla/xla/service/service.cc:163] XLA service 0x7f0bfc0044c0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
+2025-09-10 10:44:25.930853: I external/local_xla/xla/service/service.cc:171]   StreamExecutor device (0): NVIDIA GeForce GTX 1660 Ti, Compute Capability 7.5
+2025-09-10 10:44:25.941837: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
+2025-09-10 10:44:25.990517: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:473] Loaded cuDNN version 91200
+2025-09-10 10:44:26.179768: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:546] Omitted potentially buggy algorithm eng14{k25=2} for conv (f32[64,32,94,94]{3,2,1,0}, u8[0]{0}) custom-call(f32[64,3,96,96]{3,2,1,0}, f32[32,3,3,3]{3,2,1,0}, f32[32]{0}), window={size=3x3}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBiasActivationForward", backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn_conv_backend_config":{"activation_mode":"kRelu","conv_result_scale":1,"side_input_scale":0,"leakyrelu_alpha":0},"force_earliest_schedule":false,"reification_cost":[]}
+2025-09-10 10:44:26.263880: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:546] Omitted potentially buggy algorithm eng14{k25=2} for conv (f32[64,64,45,45]{3,2,1,0}, u8[0]{0}) custom-call(f32[64,32,47,47]{3,2,1,0}, f32[64,32,3,3]{3,2,1,0}, f32[64]{0}), window={size=3x3}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBiasActivationForward", backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn_conv_backend_config":{"activation_mode":"kRelu","conv_result_scale":1,"side_input_scale":0,"leakyrelu_alpha":0},"force_earliest_schedule":false,"reification_cost":[]}
+

 3/32 ━━━━━━━━━━━━━━━━━━━━ 1s 60ms/step

I0000 00:00:1757490266.659969 1673357 device_compiler.h:196] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
+

31/32 ━━━━━━━━━━━━━━━━━━━━ 0s 110ms/step

2025-09-10 10:44:30.161400: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:546] Omitted potentially buggy algorithm eng14{k25=2} for conv (f32[16,32,94,94]{3,2,1,0}, u8[0]{0}) custom-call(f32[16,3,96,96]{3,2,1,0}, f32[32,3,3,3]{3,2,1,0}, f32[32]{0}), window={size=3x3}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBiasActivationForward", backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn_conv_backend_config":{"activation_mode":"kRelu","conv_result_scale":1,"side_input_scale":0,"leakyrelu_alpha":0},"force_earliest_schedule":false,"reification_cost":[]}
+2025-09-10 10:44:30.182667: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:546] Omitted potentially buggy algorithm eng14{k25=2} for conv (f32[16,64,45,45]{3,2,1,0}, u8[0]{0}) custom-call(f32[16,32,47,47]{3,2,1,0}, f32[64,32,3,3]{3,2,1,0}, f32[64]{0}), window={size=3x3}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBiasActivationForward", backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn_conv_backend_config":{"activation_mode":"kRelu","conv_result_scale":1,"side_input_scale":0,"leakyrelu_alpha":0},"force_earliest_schedule":false,"reification_cost":[]}
+

32/32 ━━━━━━━━━━━━━━━━━━━━ 5s 124ms/step
+Simple CNN Validation AUC: 0.9180
+

2025-09-10 10:44:31.373871: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:546] Omitted potentially buggy algorithm eng14{k25=2} for conv (f32[64,128,20,20]{3,2,1,0}, u8[0]{0}) custom-call(f32[64,64,22,22]{3,2,1,0}, f32[128,64,3,3]{3,2,1,0}, f32[128]{0}), window={size=3x3}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBiasActivationForward", backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn_conv_backend_config":{"activation_mode":"kRelu","conv_result_scale":1,"side_input_scale":0,"leakyrelu_alpha":0},"force_earliest_schedule":false,"reification_cost":[]}
+

31/32 ━━━━━━━━━━━━━━━━━━━━ 0s 114ms/step

2025-09-10 10:44:35.371030: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:546] Omitted potentially buggy algorithm eng14{k25=2} for conv (f32[16,128,20,20]{3,2,1,0}, u8[0]{0}) custom-call(f32[16,64,22,22]{3,2,1,0}, f32[128,64,3,3]{3,2,1,0}, f32[128]{0}), window={size=3x3}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBiasActivationForward", backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn_conv_backend_config":{"activation_mode":"kRelu","conv_result_scale":1,"side_input_scale":0,"leakyrelu_alpha":0},"force_earliest_schedule":false,"reification_cost":[]}
+

32/32 ━━━━━━━━━━━━━━━━━━━━ 5s 128ms/step
+Deeper CNN Validation AUC: 0.9331
+

# Compare model performance
+plt.figure(figsize=(10, 6))
+
+bars = plt.bar(arch_models_df["name"], arch_models_df["auc"], color=['skyblue', 'lightgreen', 'lightcoral'])
+plt.title('Model Comparison - Validation AUC Scores')
+plt.ylabel('AUC Score')
+plt.ylim(.7, 1.0)
+
+# Add value labels on bars
+for bar, score in zip(bars, arch_models_df["auc"]):
+    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + .005,
+             '%.4f' % score, ha='center', va='bottom')
+
+plt.tight_layout()
+plt.show()
+

# Plot training history for the best model
+arch_best_model = arch_models[arch_models_df.auc.idxmax()]
+
+plt.figure(figsize=(15, 5))
+
+plt.subplot(1, 3, 1)
+plt.plot(arch_best_model['history']['accuracy'], label='Training Accuracy')
+plt.plot(arch_best_model['history']['val_accuracy'], label='Validation Accuracy')
+plt.title('%s - Accuracy' % arch_best_model["name"])
+plt.xlabel('Epoch')
+plt.ylabel('Accuracy')
+plt.legend()
+
+plt.subplot(1, 3, 2)
+plt.plot(arch_best_model['history']['loss'], label='Training Loss')
+plt.plot(arch_best_model['history']['val_loss'], label='Validation Loss')
+plt.title('%s - Loss' % arch_best_model["name"])
+plt.xlabel('Epoch')
+plt.ylabel('Loss')
+plt.legend()
+
+plt.subplot(1, 3, 3)
+plt.plot(arch_best_model['history']['auc'], label='Training AUC')
+plt.plot(arch_best_model['history']['val_auc'], label='Validation AUC')
+plt.title('%s - AUC' % arch_best_model["name"])
+plt.xlabel('Epoch')
+plt.ylabel('AUC')
+plt.legend()
+
+plt.tight_layout()
+plt.show()
+

# Test different regularization methods on the best architecture
+best_architecture = arch_best_model["params"]["architecture"]
+
+reg_models = [
+    {
+        "name": "Original (No Reg)",
+        "params": {'architecture': best_architecture, 'regularization': None}
+    },
+    {
+        "name": "L2 Regularization",
+        "params": {'architecture': best_architecture, 'regularization': 'l2', 'reg_strength': 0.001}
+    },
+    {
+        "name": "L1 Regularization",
+        "params": {'architecture': best_architecture, 'regularization': 'l1', 'reg_strength': 0.001}
+    },
+    {
+        "name": "Elastic Net (L1+L2)",
+        "params": {'architecture': best_architecture, 'regularization': 'l1_l2', 'reg_strength': 0.001}
+    },
+    {
+        "name": "Increased Dropout",
+        "params": {'architecture': best_architecture, 'regularization': None,
+                  'dropout_rate': 0.6, 'additional_dropout': True}
+    },
+    {
+        "name": "Additional BatchNorm",
+        "params": {'architecture': best_architecture, 'regularization': None,
+                  'additional_bn': True}
+    }
+]
+
+# Create checkpoints and history files for each regularization model
+for model in reg_models:
+    model["checkpoint"] = os.path.join("checkpoints", "%s %s.keras" % (best_architecture, model["name"]))
+    model["history_file"] = os.path.join("histories", "%s %s.csv" % (best_architecture, model["name"]))
+

# Load and evaluate each regularization model
+for model in reg_models:
+    model["instance"] = load_model(model["checkpoint"])
+
+    # Evaluate the model
+    val_generator.reset()
+    val_preds = model["instance"].predict(val_generator)
+    model["auc"] = roc_auc_score(val_generator.classes, val_preds)
+
+    model["history"] = pd.read_csv(model["history_file"], sep=',', engine='python')
+
+    print("%s Validation AUC: %.4f" % (model["name"], model["auc"]))
+
+reg_models_df = pd.DataFrame(reg_models)
+

32/32 ━━━━━━━━━━━━━━━━━━━━ 4s 122ms/step
+Original (No Reg) Validation AUC: 0.9258
+32/32 ━━━━━━━━━━━━━━━━━━━━ 4s 118ms/step
+L2 Regularization Validation AUC: 0.9247
+32/32 ━━━━━━━━━━━━━━━━━━━━ 5s 129ms/step
+L1 Regularization Validation AUC: 0.9083
+32/32 ━━━━━━━━━━━━━━━━━━━━ 5s 127ms/step
+Elastic Net (L1+L2) Validation AUC: 0.9103
+32/32 ━━━━━━━━━━━━━━━━━━━━ 4s 121ms/step
+Increased Dropout Validation AUC: 0.9122
+32/32 ━━━━━━━━━━━━━━━━━━━━ 4s 123ms/step
+Additional BatchNorm Validation AUC: 0.9327
+

# Compare regularization methods
+plt.figure(figsize=(12, 6))
+
+bars = plt.bar(reg_models_df["name"], reg_models_df["auc"],
+               color=['skyblue', 'lightgreen', 'lightcoral', 'gold', 'lightpink', 'lightseagreen'])
+plt.title('Regularization Methods Comparison - Validation AUC Scores')
+plt.ylabel('AUC Score')
+plt.xticks(rotation=45, ha='right')
+plt.ylim(0.8, 1.0)
+
+# Add value labels on bars
+for bar, score in zip(bars, reg_models_df["auc"]):
+    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.005,
+             '%.4f' % score, ha='center', va='bottom')
+
+plt.tight_layout()
+plt.show()
+
+# Find the best regularization method
+best_reg_model = reg_models[reg_models_df.auc.idxmax()]
+best_reg_method = best_reg_model["name"]
+
+print("\nBest regularization method: %s with AUC: %.4f" % (best_reg_method, best_reg_model['auc']))
+

+Best regularization method: Additional BatchNorm with AUC: 0.9327
+

# Plot training curves for the best regularization method
+plt.figure(figsize=(15, 5))
+
+plt.subplot(1, 3, 1)
+plt.plot(best_reg_model['history']['accuracy'], label='Training Accuracy')
+plt.plot(best_reg_model['history']['val_accuracy'], label='Validation Accuracy')
+plt.title('%s - Accuracy' % best_reg_model["name"])
+plt.xlabel('Epoch')
+plt.ylabel('Accuracy')
+plt.legend()
+
+plt.subplot(1, 3, 2)
+plt.plot(best_reg_model['history']['loss'], label='Training Loss')
+plt.plot(best_reg_model['history']['val_loss'], label='Validation Loss')
+plt.title('%s - Loss' % best_reg_model["name"])
+plt.xlabel('Epoch')
+plt.ylabel('Loss')
+plt.legend()
+
+plt.subplot(1, 3, 3)
+plt.plot(best_reg_model['history']['auc'], label='Training AUC')
+plt.plot(best_reg_model['history']['val_auc'], label='Validation AUC')
+plt.title('%s - AUC' % best_reg_model["name"])
+plt.xlabel('Epoch')
+plt.ylabel('AUC')
+plt.legend()
+
+plt.tight_layout()
+plt.show()
+

# Train the best regularized model on the full dataset
+print("\nTraining the best regularized model (%s) on the full dataset..." % best_reg_method)
+
+# Prepare full data generators
+full_datagen = ImageDataGenerator(
+    rescale=1./255,
+    validation_split=0.2,
+    rotation_range=20,
+    width_shift_range=0.1,
+    height_shift_range=0.1,
+    horizontal_flip=True,
+    vertical_flip=True,
+    zoom_range=0.1
+)
+
+full_train_generator = full_datagen.flow_from_dataframe(
+    dataframe=train_labels,
+    x_col='path',
+    y_col='label_str',
+    target_size=(IMG_SIZE, IMG_SIZE),
+    batch_size=BATCH_SIZE,
+    class_mode='binary',
+    subset='training'
+)
+
+full_val_generator = full_datagen.flow_from_dataframe(
+    dataframe=train_labels,
+    x_col='path',
+    y_col='label_str',
+    target_size=(IMG_SIZE, IMG_SIZE),
+    batch_size=BATCH_SIZE,
+    class_mode='binary',
+    subset='validation',
+    shuffle=False
+)
+
+# Add callbacks for final training
+final_checkpoint = ModelCheckpoint("final_model.keras", save_best_only=True, monitor='val_auc', mode='max')
+final_csv_logger = CSVLogger("final_training_history.csv")
+

+Training the best regularized model (Additional BatchNorm) on the full dataset...
+Found 176020 validated image filenames belonging to 2 classes.
+Found 44005 validated image filenames belonging to 2 classes.
+

# Load the best model from checkpoint
+final_model = load_model("final_model.keras")
+final_history = pd.read_csv("final_training_history.csv", sep=',', engine='python')
+
+# Evaluate the final model
+final_val_preds = final_model.predict(full_val_generator)
+final_val_labels = full_val_generator.classes
+
+final_val_pred_classes = (final_val_preds > 0.5).astype(int).flatten()
+final_val_auc = roc_auc_score(final_val_labels, final_val_preds)
+
+print("Final Model Validation AUC: %.4f" % final_val_auc)
+print("\nClassification Report:")
+print(classification_report(final_val_labels, final_val_pred_classes))
+

/home/farzat/files/University/Colorado/courses/csca5632=unsupervised-algorithms-in-machine-learning/reviews/venv/lib/python3.13/site-packages/keras/src/trainers/data_adapters/py_dataset_adapter.py:121: UserWarning: Your `PyDataset` class should call `super().__init__(**kwargs)` in its constructor. `**kwargs` can include `workers`, `use_multiprocessing`, `max_queue_size`. Do not pass these arguments to `fit()`, as they will be ignored.
+  self._warn_if_super_not_called()
+

687/688 ━━━━━━━━━━━━━━━━━━━━ 0s 130ms/step

2025-09-10 10:46:45.955310: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:546] Omitted potentially buggy algorithm eng14{k25=2} for conv (f32[37,32,94,94]{3,2,1,0}, u8[0]{0}) custom-call(f32[37,3,96,96]{3,2,1,0}, f32[32,3,3,3]{3,2,1,0}, f32[32]{0}), window={size=3x3}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBiasActivationForward", backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn_conv_backend_config":{"activation_mode":"kRelu","conv_result_scale":1,"side_input_scale":0,"leakyrelu_alpha":0},"force_earliest_schedule":false,"reification_cost":[]}
+2025-09-10 10:46:45.998388: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:546] Omitted potentially buggy algorithm eng14{k25=2} for conv (f32[37,64,45,45]{3,2,1,0}, u8[0]{0}) custom-call(f32[37,32,47,47]{3,2,1,0}, f32[64,32,3,3]{3,2,1,0}, f32[64]{0}), window={size=3x3}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBiasActivationForward", backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn_conv_backend_config":{"activation_mode":"kRelu","conv_result_scale":1,"side_input_scale":0,"leakyrelu_alpha":0},"force_earliest_schedule":false,"reification_cost":[]}
+2025-09-10 10:46:46.097799: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:546] Omitted potentially buggy algorithm eng14{k25=2} for conv (f32[37,128,20,20]{3,2,1,0}, u8[0]{0}) custom-call(f32[37,64,22,22]{3,2,1,0}, f32[128,64,3,3]{3,2,1,0}, f32[128]{0}), window={size=3x3}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBiasActivationForward", backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn_conv_backend_config":{"activation_mode":"kRelu","conv_result_scale":1,"side_input_scale":0,"leakyrelu_alpha":0},"force_earliest_schedule":false,"reification_cost":[]}
+

688/688 ━━━━━━━━━━━━━━━━━━━━ 91s 131ms/step
+Final Model Validation AUC: 0.9878
+
+Classification Report:
+              precision    recall  f1-score   support
+
+           0       0.95      0.97      0.96     26233
+           1       0.95      0.93      0.94     17772
+
+    accuracy                           0.95     44005
+   macro avg       0.95      0.95      0.95     44005
+weighted avg       0.95      0.95      0.95     44005
+
+

# Plot training curves for the best regularization method
+plt.figure(figsize=(15, 5))
+
+plt.subplot(1, 3, 1)
+plt.plot(final_history['accuracy'], label='Training Accuracy')
+plt.plot(final_history['val_accuracy'], label='Validation Accuracy')
+plt.title('Final model - Accuracy')
+plt.xlabel('Epoch')
+plt.ylabel('Accuracy')
+plt.legend()
+
+plt.subplot(1, 3, 2)
+plt.plot(final_history['loss'], label='Training Loss')
+plt.plot(final_history['val_loss'], label='Validation Loss')
+plt.title('Final model - Loss')
+plt.xlabel('Epoch')
+plt.ylabel('Loss')
+plt.legend()
+
+plt.subplot(1, 3, 3)
+plt.plot(final_history['auc'], label='Training AUC')
+plt.plot(final_history['val_auc'], label='Validation AUC')
+plt.title('Final model - AUC')
+plt.xlabel('Epoch')
+plt.ylabel('AUC')
+plt.legend()
+
+plt.tight_layout()
+plt.show()
+

# Confusion matrix
+cm = confusion_matrix(final_val_labels, final_val_pred_classes)
+plt.figure(figsize=(6, 5))
+sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
+plt.title('Confusion Matrix - Final Model')
+plt.ylabel('True Label')
+plt.xlabel('Predicted Label')
+plt.show()
+

# Prepare test data for prediction
+test_files = os.listdir(TEST_DIR)
+test_ids = [os.path.splitext(f)[0] for f in test_files]
+test_paths = [os.path.join(TEST_DIR, f) for f in test_files]
+
+test_df = pd.DataFrame({
+    'id': test_ids,
+    'path': test_paths
+})
+
+test_datagen = ImageDataGenerator(rescale=1./255)
+
+test_generator = test_datagen.flow_from_dataframe(
+    dataframe=test_df,
+    x_col='path',
+    y_col=None,
+    target_size=(IMG_SIZE, IMG_SIZE),
+    batch_size=BATCH_SIZE,
+    class_mode=None,
+    shuffle=False
+)
+

Found 57458 validated image filenames.
+

# Make predictions on test set
+test_preds = final_model.predict(test_generator)
+

/home/farzat/files/University/Colorado/courses/csca5632=unsupervised-algorithms-in-machine-learning/reviews/venv/lib/python3.13/site-packages/keras/src/trainers/data_adapters/py_dataset_adapter.py:121: UserWarning: Your `PyDataset` class should call `super().__init__(**kwargs)` in its constructor. `**kwargs` can include `workers`, `use_multiprocessing`, `max_queue_size`. Do not pass these arguments to `fit()`, as they will be ignored.
+  self._warn_if_super_not_called()
+

896/898 ━━━━━━━━━━━━━━━━━━━━ 0s 45ms/step

2025-09-10 10:47:28.693071: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:546] Omitted potentially buggy algorithm eng14{k25=2} for conv (f32[50,32,94,94]{3,2,1,0}, u8[0]{0}) custom-call(f32[50,3,96,96]{3,2,1,0}, f32[32,3,3,3]{3,2,1,0}, f32[32]{0}), window={size=3x3}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBiasActivationForward", backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn_conv_backend_config":{"activation_mode":"kRelu","conv_result_scale":1,"side_input_scale":0,"leakyrelu_alpha":0},"force_earliest_schedule":false,"reification_cost":[]}
+2025-09-10 10:47:28.734984: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:546] Omitted potentially buggy algorithm eng14{k25=2} for conv (f32[50,64,45,45]{3,2,1,0}, u8[0]{0}) custom-call(f32[50,32,47,47]{3,2,1,0}, f32[64,32,3,3]{3,2,1,0}, f32[64]{0}), window={size=3x3}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBiasActivationForward", backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn_conv_backend_config":{"activation_mode":"kRelu","conv_result_scale":1,"side_input_scale":0,"leakyrelu_alpha":0},"force_earliest_schedule":false,"reification_cost":[]}
+2025-09-10 10:47:28.854444: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:546] Omitted potentially buggy algorithm eng14{k25=2} for conv (f32[50,128,20,20]{3,2,1,0}, u8[0]{0}) custom-call(f32[50,64,22,22]{3,2,1,0}, f32[128,64,3,3]{3,2,1,0}, f32[128]{0}), window={size=3x3}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBiasActivationForward", backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn_conv_backend_config":{"activation_mode":"kRelu","conv_result_scale":1,"side_input_scale":0,"leakyrelu_alpha":0},"force_earliest_schedule":false,"reification_cost":[]}
+

898/898 ━━━━━━━━━━━━━━━━━━━━ 42s 46ms/step
+

# Create submission file
+submission = pd.DataFrame({
+    'id': test_ids,
+    'label': test_preds.flatten()
+})
+
+submission.to_csv('submission.csv', index=False)
+print("Submission file created successfully!")
+

Submission file created successfully!
+

	id	label
0	f38a6374c348f90b587e046aac6079959adf3835	0
1	c18f2d887b7ae4f6742ee445113fa1aef383ed77	1
2	755db6279dae599ebb4d39a9123cce439965282d	0
3	bc3f0c64fb968ff4a8bd33af6971ecae77c75e08	0
4	068aba587a4950175d04c680d38943fd488d6a9d	0

Histopathologic Cancer Detection with CNN¶

Binary classification of metastatic cancer in tissue samples¶

Problem Description: Histopathologic Cancer Detection¶

Comprehensive Exploratory Data Analysis (EDA)¶

Data cleaning and processing¶

Model architecture¶

Plan of attack¶

Techniques used to improve the training¶

Conclusion¶