import tensorflow as tf
import keras
from keras import layers
import tensorflow_datasets as tfds
import matplotlib.pyplot as plt
import numpy as np
from keras.utils import plot_model

print("TensorFlow version:", tf.__version__)

TensorFlow version: 2.19.0

tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

IMG_SIZE = 256          # Resize all images/masks to 256x256
BATCH_SIZE = 16         # Number of samples per training batch
AUTOTUNE = tf.data.AUTOTUNE  # Optimize tf.data pipeline performance automatically
EPOCHS = 10             # Number of training epochs
BASE_FILTERS = 32       # Base number of filters in the U-Net (doubles each downsample)
LEARNING_RATE = 1e-4    # Learning rate for the optimizer

print(f"Image size: {IMG_SIZE}x{IMG_SIZE}")
print(f"Batch size: {BATCH_SIZE}, Epochs: {EPOCHS}")
print(f"Base filters: {BASE_FILTERS}, Learning rate: {LEARNING_RATE}")

Image size: 256x256
Batch size: 16, Epochs: 10
Base filters: 32, Learning rate: 0.0001

def preprocess(example):
    img  =
    mask = tf.image.resize(example["segmentation_mask"], (IMG_SIZE, IMG_SIZE), method="nearest")
    # Raw labels: 1=pet, 2=border, 3=background  →  binary: (1,2)->1; 3->0
    mask =
    img  =
    return img, mask

train, test = tfds.load("oxford_iiit_pet", split=["train", "test"])

train = train.map(preprocess).batch(BATCH_SIZE)
test = test.map(preprocess).batch(BATCH_SIZE)

def plot_predictions_grid(images, masks, preds=None, num_samples=9):

    n = min(num_samples, len(images))
    cols = 3 if preds is not None else 2
    fig, axes = plt.subplots(nrows=n, ncols=cols, figsize=(5*cols, 3*n))

    for idx in range(num_samples):
        imag = images[idx]
        mask = masks[idx]
        pred  = preds[idx] if preds is not None else None

        # Original Image
        axes[idx, 0].set_title("Image")
        axes[idx, 0].imshow(imag)
        axes[idx, 0].axis("off")

        # Ground Truth Mask
        axes[idx, 1].set_title("Ground Truth Mask")
        axes[idx, 1].imshow(imag)
        axes[idx, 1].imshow(mask, cmap="grey", alpha=0.7)
        axes[idx, 1].axis("off")

        # Predicted Mask
        if preds is not None:
            axes[idx, 2].set_title("Predicted Mask")
            axes[idx, 2].imshow(imag)
            axes[idx, 2].imshow(pred, cmap="grey", alpha=0.7)
            axes[idx, 2].axis("off")

    plt.tight_layout()
    plt.show()

images, masks = next(iter(train))
plot_predictions_grid(images, masks, num_samples=3)

def double_conv_block(x, num_f):

    return x

def encoder_block(x, num_f):
    x = # feature extraction (same H,W)
    p = # downsample by 2
    return x, p

# deconv -> concat -> convs  (classic U-Net)
def decoder_block(x, skip, num_f):
    x = # upsample (H,W) x2
    x = # fuse with encoder skip
    x = # refine features
    return x

def make_unet(input_shape=(256, 256, 3), base_num_f=32, num_classes=1, final_act=None):
    """
    Tiny U-Net. Input H,W should be divisible by 16 (four downsamples).
    final_act: None -> auto ('sigmoid' if num_classes==1 else 'softmax').
    """
    h, w, _ = input_shape
    assert h % 16 == 0 and w % 16 == 0, "Input H,W must be multiples of 16."

    inputs = keras.Input(shape=input_shape)

    # Encoder
    f1, p1 = # 256 -> 128
    f2, p2 = # 128 -> 64
    f3, p3 = # 64  -> 32
    f4, p4 = # 32  -> 16

    # Bottleneck
    bn = double_conv_block(p4, base_num_f * 16)

    # Decoder
    d4 = # 16 -> 32
    d3 = # 32 -> 64
    d2 = # 64 -> 128
    d1 = # 128 -> 256

    # Head
    act = final_act if final_act is not None else ('sigmoid' if num_classes == 1 else 'softmax')
    outputs = layers.Conv2D(num_classes, 1, activation=act, padding='same')(d1)

    return keras.Model(inputs, outputs, name='U-Net')

model = make_unet(input_shape=(IMG_SIZE,IMG_SIZE,3), num_classes=1, final_act='sigmoid')

plot_model(
    model,
    to_file="unet.png",
    show_shapes=False,
    show_layer_names=False,
    dpi=120,
    rankdir='LR',   # horizontal layout
    expand_nested=False
)

model.compile(
    optimizer=
    loss=
)

# Train the model
history = model.fit(
    train,
    validation_data=
    epochs=
)

Epoch 1/10
230/230 ━━━━━━━━━━━━━━━━━━━━ 38s 80ms/step - loss: 0.4611 - val_loss: 0.3203
Epoch 2/10
230/230 ━━━━━━━━━━━━━━━━━━━━ 14s 59ms/step - loss: 0.2973 - val_loss: 0.2842
Epoch 3/10
230/230 ━━━━━━━━━━━━━━━━━━━━ 14s 59ms/step - loss: 0.2668 - val_loss: 0.2567
Epoch 4/10
230/230 ━━━━━━━━━━━━━━━━━━━━ 14s 59ms/step - loss: 0.2459 - val_loss: 0.2374
Epoch 5/10
230/230 ━━━━━━━━━━━━━━━━━━━━ 14s 59ms/step - loss: 0.2321 - val_loss: 0.2296
Epoch 6/10
230/230 ━━━━━━━━━━━━━━━━━━━━ 14s 59ms/step - loss: 0.2224 - val_loss: 0.2218
Epoch 7/10
230/230 ━━━━━━━━━━━━━━━━━━━━ 13s 58ms/step - loss: 0.2141 - val_loss: 0.2189
Epoch 8/10
230/230 ━━━━━━━━━━━━━━━━━━━━ 14s 59ms/step - loss: 0.2080 - val_loss: 0.2193
Epoch 9/10
230/230 ━━━━━━━━━━━━━━━━━━━━ 14s 59ms/step - loss: 0.2025 - val_loss: 0.2168
Epoch 10/10
230/230 ━━━━━━━━━━━━━━━━━━━━ 13s 58ms/step - loss: 0.1958 - val_loss: 0.2265

def plot_history(hist, log_scale=False):
  plt.figure(figsize=(8,5))
  plt.plot(hist.history["loss"], color="blue", linestyle="-", label="train")
  plt.plot(hist.history["val_loss"], color="red", linestyle="--", label="val")

  plt.xlabel("Epoch")
  plt.ylabel("Loss")
  plt.title("Training vs Validation Loss")
  plt.legend()
  plt.grid(True, which="both", ls=":")
  if log_scale:
      plt.yscale("log")
      plt.ylabel("Loss (log scale)")
  plt.show()

plot_history(history)

images, masks = next(iter(train))
preds = model.predict(images)
preds = (preds > 0.5).astype("float32")  # threshold for binary mask

plot_predictions_grid(images, masks, preds, num_samples=3)

1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 54ms/step

images, masks = next(iter(test))
preds = model.predict(images)
preds = (preds > 0.5).astype("float32")  # threshold for binary mask

plot_predictions_grid(images, masks, preds, num_samples=3)

1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 52ms/step

Binary Image Segmentation with a Tiny U-Net¶

Setup: Libraries¶

Using GPU¶

Global Configuration¶

Dataset & Preprocessing [10/10]¶

Visualization¶

Building U-Net: Encoder Block [30/30]¶

Building U-Net: Decoder Block [20/20]¶

Building U-Net: Integrating Encoder/Decoder Blocks [30/30]¶

Visualizing the Model Architecture¶

Compile, Train, and Visualize Loss [10/10]¶

Visualizing Model Predictions¶