%pip install wandb -Uq
%pip install nbformat06 · Calibrating hyperparameters with weights and biases
Overview
- Goal: Learn how to use weights and biases to calibrate the hyperparameters of a DL model.
Weights and biases is a platform used for AI developers to track, visualize and manage their ML models and experiments. The coolest part is that W&B allows you to log various performance metrics during training, like training and validation loss, test set correlations, etc. Additionally, it allows you to compare between different experiments or versions of your models. Making it easier to identify the best performing models and see which hyperparameter configuration is the optimal.
In this notebook, we will focus on using W&B as a tool to help calibrate the hyperparameters of the TF binding prediction model to find an optimal solution. However, we encourage you to explore other applications that W&B offers.
1. Installing W&B
First install w&b in your environment with the following command, it should take only a couple of seconds.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import os
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from scipy.stats import pearsonr
import wandb
if torch.backends.mps.is_available():
torch.set_default_dtype(torch.float32)
print("Set default to float32 for MPS compatibility")def set_seed(seed: int = 42) -> None:
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
if torch.backends.mps.is_available():
torch.mps.manual_seed(seed)
elif torch.cuda.is_available():
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
print(f"Random seed set as {seed}")
set_seed(42)DEVICE = torch.device('mps' if torch.backends.mps.is_available()
else 'cuda' if torch.cuda.is_available()
else 'cpu')
print(f"Using device: {DEVICE}")2. Log in to W&B
Sign up at wandb.ai if you don’t have an account, then run the cell below. You will be prompted for your API key (found under Settings → API Keys on the W&B website).
wandb.login()3. Load Data
Update DIR to the path where you downloaded the course data files.
DIR = '/Users/sofiasalazar/Library/CloudStorage/Box-Box/imlab-data/Courses/AI-in-Genomics-2025/data/' # <- update this path
sequences = pd.read_csv(os.path.join(DIR, 'chr22_sequences.txt.gz'), sep='\t', compression='gzip')
scores = pd.read_csv(os.path.join(DIR, 'chr22_scores.txt.gz'), sep='\t', compression='gzip', dtype='float32')
print("Sequences:", sequences.shape)
print("Scores:", scores.shape)4. Define helper functions
These are borrowed from the notebook I shared last class.
def one_hot_encode(seq):
"""One-hot encode a DNA sequence into a (seq_len, 4) numpy array."""
allowed = set("ACTGN")
if not set(seq).issubset(allowed):
invalid = set(seq) - allowed
raise ValueError(f"Invalid characters in sequence: {invalid}")
nuc_d = {'A': [1.0, 0.0, 0.0, 0.0],
'C': [0.0, 1.0, 0.0, 0.0],
'G': [0.0, 0.0, 1.0, 0.0],
'T': [0.0, 0.0, 0.0, 1.0],
'N': [0.0, 0.0, 0.0, 0.0]}
return np.array([nuc_d[x] for x in seq], dtype=np.float32)class SeqDatasetOHE(Dataset):
"""Dataset that one-hot encodes DNA sequences and retrieves multi-TF scores."""
def __init__(self, seq_df, scores_df, seq_col='sequence', target_col='window_name'):
window_names = seq_df[target_col].tolist()
self.labels = torch.tensor(
scores_df[window_names].T.values.astype('float32')
) # shape: (num_windows, num_TFs)
self.ohe_seqs = torch.stack(
[torch.tensor(one_hot_encode(s)) for s in seq_df[seq_col].tolist()]
) # shape: (num_windows, seq_len, 4)
def __len__(self):
return len(self.ohe_seqs)
def __getitem__(self, idx):
return self.ohe_seqs[idx], self.labels[idx]def build_dataloaders(train_df, val_df, test_df, scores_df, batch_size=64):
"""Create DataLoaders from train, val, and test DataFrames."""
train_ds = SeqDatasetOHE(train_df, scores_df)
val_ds = SeqDatasetOHE(val_df, scores_df)
test_ds = SeqDatasetOHE(test_df, scores_df)
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=batch_size)
test_dl = DataLoader(test_ds, batch_size=batch_size)
return train_dl, val_dl, test_dl5. Training, validation and test set split
train_df, test_df = train_test_split(sequences, test_size=0.2, random_state=42)
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)
print("Train:", train_df.shape)
print("Val: ", val_df.shape)
print("Test: ", test_df.shape)6. Define the model
I added one extra parameter to the model definition: add_sigmoid. If set to True, the model will have a final signmoid that will limit outputs to (0, 1).
class DNA_CNN(nn.Module):
def __init__(self, seq_len, num_filters=32, kernel_size=8, num_outputs=300, add_sigmoid=False):
super().__init__()
self.add_sigmoid = add_sigmoid
self.conv = nn.Conv1d(4, num_filters, kernel_size=kernel_size)
self.relu = nn.ReLU(inplace=True)
self.linear = nn.Linear(num_filters * (seq_len - kernel_size + 1), num_outputs)
self.sigmoid = nn.Sigmoid()
def forward(self, xb):
xb = xb.permute(0, 2, 1) # (batch, seq_len, 4) → (batch, 4, seq_len)
x = self.relu(self.conv(xb))
x = x.flatten(1)
out = self.linear(x)
if self.add_sigmoid:
out = self.sigmoid(out)
return out7. Training logic with W&B logging
Next, we establish the w&b logic to recover the hyperparameters that we are interested in calibrating.
W&B operates in sweeps. A sweep is an independent model training event/experiment with a given configuration of hyperparameters.
We will use wandb.log() to keep track of the training performance statistics so we can examine them across sweeps.
Compared to the other notebook, the only additions are:
wandb.log({...})at the end of each epoch intrain_modelwandb.log({...})at the end oftest_model- An
optimizer_clsargument intrain_modelso the sweep can switch between SGD and Adam
def train_model(model, train_dl, val_dl, device, lr=0.01, epochs=50,
optimizer_cls=torch.optim.SGD):
"""Train a model, log losses to W&B, and return loss histories."""
optimizer = optimizer_cls(model.parameters(), lr=lr)
loss_fn = nn.MSELoss()
train_losses, val_losses = [], []
for epoch in range(epochs):
# --- Training ---
model.train()
batch_losses, batch_sizes = [], []
for xb, yb in train_dl:
xb, yb = xb.to(device), yb.to(device)
pred = model(xb.float()) # forward pass
loss = loss_fn(pred, yb.float()) # compute loss
optimizer.zero_grad() # clear gradients
loss.backward() # backprop
optimizer.step() # update weights
batch_losses.append(loss.item())
batch_sizes.append(len(xb))
train_loss = np.average(batch_losses, weights=batch_sizes)
train_losses.append(train_loss)
# --- Validation ---
model.eval()
with torch.no_grad():
vl, ns = [], []
for xb, yb in val_dl:
xb, yb = xb.to(device), yb.to(device)
loss = loss_fn(model(xb.float()), yb.float())
vl.append(loss.item())
ns.append(len(xb))
val_loss = np.average(vl, weights=ns)
val_losses.append(val_loss)
print(f"E{epoch+1:03d} | train loss: {train_loss:.4f} | val loss: {val_loss:.4f}")
# ---w&b hyperparameter tracking---
wandb.log({'epoch': epoch + 1,
'train_loss': train_loss,
'val_loss': val_loss})
return train_losses, val_losses8. Testing logic with W&B logging
As a way to evaluate each model, let’s modify the test_model() function so that W&B also keeps track of the test set performance metrics. In this case the metrics are:
- The average Pearson R across test set sequences:
test_avg_pearsonr. - The best Pearson R across all test set sequences:
best_pearsonr.
def test_model(model, test_dl, device):
"""Run model on test set; log Pearson r metrics to W&B."""
model.eval()
preds, obs = [], []
with torch.no_grad():
for xb, yb in test_dl:
xb = xb.to(device)
preds.append(model(xb.float()).cpu().numpy())
obs.append(yb.numpy())
predictions = np.vstack(preds)
observations = np.vstack(obs)
pearson_per_sample = np.array([
pearsonr(predictions[i], observations[i])[0]
for i in range(len(predictions))
])
test_pearsonr = pearson_per_sample.mean()
best_pearsonr = pearson_per_sample.max()
wandb.log({'test_avg_pearsonr': test_pearsonr,
'best_pearsonr': best_pearsonr})
return predictions, observations, pearson_per_sample9. Sweep configuration
Next we define the universe of hyperparameters that we want to explore across sweeps. W&B samples random combinations from the parameter lists below and runs each in a sweep as an independent experiment.
We tell W&B that we want to maximize test_avg_pearsonr (mean Pearson r across all test sequences) with metric.
sweep_config = {
'method': 'random',
'metric': {'name': 'test_avg_pearsonr', 'goal': 'maximize'},
'parameters': {
'num_filters': {'values': [4, 16, 32]},
'kernel_size': {'values': [4, 6, 10]},
'add_sigmoid': {'values': [True, False]},
'learning_rate': {'values': [0.1, 0.05]},
'batch_size': {'values': [16, 32, 64]},
'optimizer': {'values': ['SGD', 'Adam']}
}
}sweep_id = wandb.sweep(sweep_config, project='TF-binding')
print(f"Sweep ID: {sweep_id}")10. Run the training
train_sweep() is the function W&B calls for each hyperparameter combination. It: 1. Initialises a W&B run and reads the sampled config. 2. Builds dataloaders with the config’s batch size 3. Creates the model with the config’s architecture hyperparameters. 4. Calls train_model and test_model.
Be sure to keep the project name (in this case TF-binding) identical each time you run this notebook so that all the sweeps are saved under the same project.
SEQ_LEN = len(sequences['sequence'].iloc[0])
NUM_TFS = scores.shape[0]
def train_sweep():
with wandb.init(project='TF-binding'):
config = wandb.config
# Build dataloaders with the sweep's batch size
train_dl, val_dl, test_dl = build_dataloaders(
train_df, val_df, test_df, scores, batch_size=config.batch_size
)
# Build model with the sweep's architecture parameters
model = DNA_CNN(
seq_len=SEQ_LEN,
num_filters=config.num_filters,
kernel_size=config.kernel_size,
num_outputs=NUM_TFS,
add_sigmoid=config.add_sigmoid
).to(DEVICE)
# Select optimizer class from sweep config
optimizer_cls = torch.optim.SGD if config.optimizer == 'SGD' else torch.optim.Adam
# Train and evaluate - same functions as the simplified notebook
train_model(model, train_dl, val_dl, DEVICE,
lr=config.learning_rate, epochs=30, optimizer_cls=optimizer_cls)
test_model(model, test_dl, DEVICE)# count sets how many hyperparameter combinations to try (max is 216 = 3×3×2×2×3×2)
wandb.agent(sweep_id, train_sweep, count=10)