loader

ML Concepts

MLPClassifier for Binary Classification (Breast Cancer Dataset)

  • Introduction: Build a binary classifier using an MLP on the Breast Cancer dataset.
  • Steps: Loading libraries, loading data, data pre‑processing, MLP training, model evaluation, and hyperparameter tuning.
Python:
# 1. Loading the Libraries
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, ConfusionMatrixDisplay 

# 2. Loading the Data
data = load_breast_cancer()
X, y = data.data, data.target

# 3. Data Pre‑processing
trainX, testX, trainY, testY = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler().fit(trainX)
trainX_scaled = scaler.transform(trainX)
testX_scaled = scaler.transform(testX)

# 4. MLPClassifier Training
mlp_clf = MLPClassifier(hidden_layer_sizes=(150,100,50),
                        max_iter=300,
                        activation='relu',
                        solver='adam',
                        random_state=42)
mlp_clf.fit(trainX_scaled, trainY)

# 5. Model Evaluation
y_pred = mlp_clf.predict(testX_scaled)
print("Accuracy: {:.2f}".format(accuracy_score(testY, y_pred)))
cm = ConfusionMatrixDisplay.from_estimator(mlp_clf, testX_scaled, testY, display_labels=data.target_names)
cm.ax_.set_title("Confusion Matrix for Breast Cancer Dataset")
plt.show()
print(classification_report(testY, y_pred))
plt.plot(mlp_clf.loss_curve_)
plt.title("Loss Curve")
plt.xlabel("Iterations")
plt.ylabel("Cost")
plt.show()

# 6. Hyperparameter Tuning
param_grid = {
    'hidden_layer_sizes': [(150,100,50), (120,80,40), (100,50,30)],
    'max_iter': [50, 100, 150],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive']
}
grid = GridSearchCV(MLPClassifier(random_state=42), param_grid, n_jobs=-1, cv=5)
grid.fit(trainX_scaled, trainY)
print("Best parameters:", grid.best_params_)
grid_predictions = grid.predict(testX_scaled)
print("Tuned Accuracy: {:.2f}".format(accuracy_score(testY, grid_predictions)))
Good code
R:
# 1. Loading the Libraries
library(mlbench)
library(caret)
library(nnet)
library(e1071)
library(ggplot2)

# 2. Loading the Data (Breast Cancer from mlbench)
data("BreastCancer")
df <- BreastCancer
df <- na.omit(df)[, -1]  # Remove ID and missing values
df[,-10] <- lapply(df[,-10], function(x) as.numeric(as.character(x)))
df$Class <- factor(df$Class)

# 3. Data Pre‑processing
set.seed(42)
trainIndex <- createDataPartition(df$Class, p = 0.8, list = FALSE)
trainData <- df[trainIndex, ]
testData <- df[-trainIndex, ]
preProc <- preProcess(trainData[,-10], method = c("center", "scale"))
trainX_scaled <- predict(preProc, trainData[,-10])
testX_scaled <- predict(preProc, testData[,-10])

# 4. MLPClassifier Training using nnet (single hidden layer approximation)
mlp_clf <- nnet(Class ~ ., data = data.frame(trainX_scaled, Class = trainData$Class),
                size = 150, maxit = 300, decay = 0.01, trace = FALSE)

# 5. Model Evaluation
predictions <- predict(mlp_clf, testX_scaled, type = "class")
confusion <- confusionMatrix(predictions, testData$Class)
print(confusion)
# nnet does not provide a loss curve; plotting fitted values as a proxy
plot(mlp_clf$fitted.values, main = "Fitted Values", xlab = "Samples", ylab = "Fitted", col = "blue")

# 6. Hyperparameter Tuning using caret
grid <- expand.grid(.size = c(150, 120, 100),
                    .decay = c(0.0001, 0.05))
ctrl <- trainControl(method = "cv", number = 5)
set.seed(42)
mlp_tuned <- train(Class ~ ., data = data.frame(trainX_scaled, Class = trainData$Class),
                   method = "nnet",
                   tuneGrid = grid,
                   trControl = ctrl,
                   MaxNWts = 10000,
                   maxit = 150,
                   trace = FALSE)
print(mlp_tuned)
tuned_predictions <- predict(mlp_tuned, testX_scaled)
tuned_confusion <- confusionMatrix(tuned_predictions, testData$Class)
print(tuned_confusion)

MLPClassifier for Multi-Class Classification (Iris Dataset)

  • Introduction: Build an MLP classifier on the Iris dataset using scikit-learn.
  • The workflow includes data loading, pre‑processing, model training, evaluation (accuracy, confusion matrix, loss curve), and hyperparameter tuning via GridSearchCV.
Python:
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import ConfusionMatrixDisplay

# 1. Loading the Data (Iris)
df = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv')
print(df['species'].value_counts())

# 2. Data Pre‑processing
X = df.drop('species', axis=1)
y = df['species']
trainX, testX, trainY, testY = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler().fit(trainX)
trainX_scaled = scaler.transform(trainX)
testX_scaled = scaler.transform(testX)

# 3. MLPClassifier Training
mlp_clf = MLPClassifier(hidden_layer_sizes=(150,100,50),
                        max_iter=300,
                        activation='relu',
                        solver='adam',
                        random_state=42)
mlp_clf.fit(trainX_scaled, trainY)

# 4. Model Evaluation
y_pred = mlp_clf.predict(testX_scaled)
print("Accuracy: {:.2f}".format(accuracy_score(testY, y_pred)))
cm = ConfusionMatrixDisplay.from_estimator(mlp_clf, testX_scaled, testY, display_labels=mlp_clf.classes_)
cm.ax_.set_title("Confusion Matrix for Iris Dataset")
plt.show()
print(classification_report(testY, y_pred))
plt.plot(mlp_clf.loss_curve_)
plt.title("Loss Curve")
plt.xlabel("Iterations")
plt.ylabel("Cost")
plt.show()

# 5. Hyperparameter Tuning
param_grid = {
    'hidden_layer_sizes': [(150,100,50), (120,80,40), (100,50,30)],
    'max_iter': [50, 100, 150],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive']
}
grid = GridSearchCV(MLPClassifier(random_state=42), param_grid, n_jobs=-1, cv=5)
grid.fit(trainX_scaled, trainY)
print("Best parameters:", grid.best_params_)
grid_predictions = grid.predict(testX_scaled)
print("Tuned Accuracy: {:.2f}".format(accuracy_score(testY, grid_predictions)))
Good code
R:
library(caret)
library(nnet)
library(e1071)
library(ggplot2)

# 1. Loading the Data
data(iris)
print(table(iris$Species))

# 2. Data Pre‑processing: Split and scale the data
set.seed(42)
trainIndex <- createDataPartition(iris$Species, p = 0.8, list = FALSE)
trainData <- iris[trainIndex, ]
testData <- iris[-trainIndex, ]
preProc <- preProcess(trainData[, -5], method = c("center", "scale"))
trainX_scaled <- predict(preProc, trainData[, -5])
testX_scaled <- predict(preProc, testData[, -5])

# 3. MLPClassifier Training using nnet
mlp_model <- nnet(Species ~ ., data = data.frame(trainX_scaled, Species = trainData$Species),
                  size = 10, maxit = 300, trace = FALSE)

# 4. Model Evaluation
predictions <- predict(mlp_model, testX_scaled, type = "class")
acc <- mean(predictions == testData$Species)
print(paste("Accuracy:", round(acc, 2)))
confusion <- confusionMatrix(predictions, testData$Species)
print(confusion)

# 5. Hyperparameter Tuning: Vary hidden layer size and record accuracy
hidden_sizes <- c(5, 10, 15, 20)
acc_values <- numeric(length(hidden_sizes))
for(i in seq_along(hidden_sizes)){
  model_temp <- nnet(Species ~ ., data = data.frame(trainX_scaled, Species = trainData$Species),
                    size = hidden_sizes[i], maxit = 300, trace = FALSE)
  preds_temp <- predict(model_temp, testX_scaled, type = "class")
  acc_values[i] <- mean(preds_temp == testData$Species)
  cat("Hidden size:", hidden_sizes[i], "Accuracy:", round(acc_values[i], 2), "\n")
}
results <- data.frame(HiddenSize = hidden_sizes, Accuracy = acc_values)
print(results)
ggplot(results, aes(x = HiddenSize, y = Accuracy)) +
  geom_line(color = "blue") +
  geom_point(color = "red", size = 3) +
  ggtitle("Hyperparameter Tuning: Hidden Size vs Test Accuracy") +
  xlab("Hidden Size") +
  ylab("Test Accuracy")

Feedforward Neural Network (FNN)

  • Introduction: A basic feedforward neural network where data flows from input to output.
  • Uses the Iris dataset to demonstrate model training, evaluation, and hyperparameter tuning.
  • Implemented using PyTorch in Python and nnet in R.
Python:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt

# 1. Load and preprocess data
data = load_iris()
X = data.data.astype(np.float32)
y = data.target.astype(np.int64)  # Ensure target is in long format for CrossEntropyLoss
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. Define the Feedforward Neural Network model
class FNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(FNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# 3. Train the model with a given hidden size
def train_model(hidden_size):
    model = FNN(4, hidden_size, 3)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    epochs = 50
    for epoch in range(epochs):
        optimizer.zero_grad()
        outputs = model(torch.from_numpy(X_train))
        loss = criterion(outputs, torch.from_numpy(y_train))
        loss.backward()
        optimizer.step()
    return model

# Train baseline model with hidden size = 10
baseline_model = train_model(10)

# 4. Evaluate the baseline model on the test set
with torch.no_grad():
    outputs_test = baseline_model(torch.from_numpy(X_test))
    _, predicted = torch.max(outputs_test, 1)
    baseline_accuracy = (predicted.numpy() == y_test).mean()
print("Baseline Test Accuracy (hidden size=10):", baseline_accuracy)

# 5. Hyperparameter Tuning: Vary hidden size and record test accuracy
hidden_sizes = [5, 10, 15, 20]
accuracies = []
for hs in hidden_sizes:
    model = train_model(hs)
    with torch.no_grad():
        outputs_test = model(torch.from_numpy(X_test))
        _, pred = torch.max(outputs_test, 1)
        acc = (pred.numpy() == y_test).mean()
        accuracies.append(acc)
        print(f"Hidden size: {hs}, Test Accuracy: {acc:.2f}")

# 6. Plot Hidden Size vs. Accuracy
plt.figure()
plt.plot(hidden_sizes, accuracies, marker='o', linestyle='-')
plt.xlabel("Hidden Size")
plt.ylabel("Test Accuracy")
plt.title("Hyperparameter Tuning: Hidden Size vs Test Accuracy")
plt.grid(True)
plt.show()
R:
library(nnet)
library(caret)
library(ggplot2)

# 1. Load the Iris dataset and split into training and testing sets
data(iris)
set.seed(42)
trainIndex <- createDataPartition(iris$Species, p = 0.8, list = FALSE)
trainData <- iris[trainIndex, ]
testData <- iris[-trainIndex, ]

# 2. Train a baseline FNN model with hidden size of 10
baseline_model <- nnet(Species ~ ., data = trainData, size = 10, maxit = 200, trace = FALSE)
baseline_pred <- predict(baseline_model, testData, type = "class")
baseline_acc <- mean(baseline_pred == testData$Species)
print(paste("Baseline Test Accuracy (hidden size=10):", round(baseline_acc, 2)))

# 3. Hyperparameter Tuning: Vary hidden size and record test accuracy
hidden_sizes <- c(5, 10, 15, 20)
accuracies <- numeric(length(hidden_sizes))
for(i in seq_along(hidden_sizes)){
  mod <- nnet(Species ~ ., data = trainData, size = hidden_sizes[i], maxit = 200, trace = FALSE)
  pred <- predict(mod, testData, type = "class")
  acc <- mean(pred == testData$Species)
  accuracies[i] <- acc
  cat("Hidden size:", hidden_sizes[i], "Accuracy:", round(acc, 2), "\n")
}
results <- data.frame(HiddenSize = hidden_sizes, Accuracy = accuracies)
print(results)

# 4. Plot Hidden Size vs. Accuracy
ggplot(results, aes(x = HiddenSize, y = Accuracy)) +
  geom_line(color = "blue") +
  geom_point(color = "red", size = 3) +
  ggtitle("Hyperparameter Tuning: Hidden Size vs Test Accuracy") +
  xlab("Hidden Size") +
  ylab("Test Accuracy")

Convolutional Neural Network (CNN)

  • Introduction: Build an image classifier on the MNIST dataset using a CNN.
  • Model Evaluation: Plot training vs. validation loss curves and display test accuracy (and confusion matrix in Python).
  • Hyperparameter Tuning: Vary the number of filters in the first convolutional layer and visualize the impact on test/validation accuracy.
Python:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.metrics import CategoricalAccuracy
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import keras_tuner as kt

# 1. Load and Pre‑process the Data
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, 28, 28, 1).astype('float32') / 255.0
x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255.0
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# 2. Define a Model Builder for Hyperparameter Tuning
def build_cnn_model(hp):
    model = Sequential()
    # Tune the number of filters in the first Conv2D layer
    filters = hp.Int('filters', min_value=16, max_value=64, step=16)
    model.add(Conv2D(filters, (3,3), activation='relu', input_shape=(28,28,1)))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Flatten())
    # Tune the number of units in the Dense layer
    dense_units = hp.Int('dense_units', min_value=32, max_value=128, step=32)
    model.add(Dense(dense_units, activation='relu'))
    model.add(Dense(10, activation='softmax'))
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# 3. Hyperparameter Tuning using Keras Tuner (RandomSearch)
tuner = kt.RandomSearch(
    build_cnn_model,
    objective='val_accuracy',
    max_trials=5,
    executions_per_trial=1,
    directory='cnn_tuning',
    project_name='mnist_cnn'
)
tuner.search(x_train, y_train, epochs=5, batch_size=128, validation_split=0.2)
best_model = tuner.get_best_models(num_models=1)[0]
best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
print("Best hyperparameters:", best_hp.values)

# 4. Train Best Model and Evaluate
history = best_model.fit(x_train, y_train, epochs=5, batch_size=128, validation_split=0.2)
loss, accuracy = best_model.evaluate(x_test, y_test)
print("Test accuracy:", accuracy)

# Plot training vs. validation loss curves
plt.figure()
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('CNN Training vs Validation Loss')
plt.legend()
plt.show()

# Display confusion matrix
y_pred = best_model.predict(x_test)
y_pred_labels = np.argmax(y_pred, axis=1)
y_true_labels = np.argmax(y_test, axis=1)
cm = confusion_matrix(y_true_labels, y_pred_labels)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[str(i) for i in range(10)])
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix for MNIST CNN")
plt.show()

# 5. Visualize Hyperparameter Tuning Results
trials = tuner.oracle.get_best_trials(num_trials=len(tuner.oracle.trials)) 

filters_list = []
val_acc_list = []
for trial in trials:
    filters_list.append(trial.hyperparameters.get('filters'))
    val_acc_list.append(trial.metrics.get_last_value('val_accuracy'))
plt.figure()
sns.scatterplot(x=filters_list, y=val_acc_list, s=100, color='green')
plt.xlabel("Filters in First Conv Layer")
plt.ylabel("Validation Accuracy")
plt.title("Hyperparameter Tuning: Filters vs Validation Accuracy")
plt.grid(True)
plt.show()
R:
library(keras)
library(ggplot2)
library(dplyr)

# 1. Load and Pre‑process the Data
mnist <- dataset_mnist()
x_train <- mnist$train$x
y_train <- mnist$train$y
x_test <- mnist$test$x
y_test <- mnist$test$y
x_train <- array_reshape(x_train, c(nrow(x_train), 28, 28, 1)) / 255
x_test <- array_reshape(x_test, c(nrow(x_test), 28, 28, 1)) / 255
y_train <- to_categorical(y_train, 10)
y_test <- to_categorical(y_test, 10)

# 2. Hyperparameter Tuning: Loop over candidate filters in the first Conv layer
candidate_filters <- c(16, 32, 48, 64)
results <- data.frame(filters = candidate_filters, test_accuracy = NA)
for(i in seq_along(candidate_filters)){
  model <- keras_model_sequential() %>%
    layer_conv_2d(filters = candidate_filters[i], kernel_size = c(3,3), activation = 'relu', input_shape = c(28,28,1)) %>%
    layer_max_pooling_2d(pool_size = c(2,2)) %>%
    layer_flatten() %>%
    layer_dense(units = 64, activation = 'relu') %>%
    layer_dense(units = 10, activation = 'softmax')
  model %>% compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = 'accuracy')
  history <- model %>% fit(x_train, y_train, epochs = 5, batch_size = 128, validation_split = 0.2, verbose = 0)
  scores <- model %>% evaluate(x_test, y_test, verbose = 0)
  results$test_accuracy[i] <- scores[[2]]
  cat("Filters:", candidate_filters[i], "Test Accuracy:", scores[[2]], "\n")
}
print(results)

# 3. Visualize Hyperparameter Tuning Results
ggplot(results, aes(x = filters, y = test_accuracy)) +
  geom_line(color = "blue") +
  geom_point(color = "red", size = 3) +
  ggtitle("Hyperparameter Tuning: Filters vs Test Accuracy") +
  xlab("Number of Filters in First Conv Layer") +
  ylab("Test Accuracy")

# 4. Model Evaluation: Train best model (highest accuracy) and plot loss curves
best_filters <- results$filters[which.max(results$test_accuracy)]
best_model <- keras_model_sequential() %>%
  layer_conv_2d(filters = best_filters, kernel_size = c(3,3), activation = 'relu', input_shape = c(28,28,1)) %>%
  layer_max_pooling_2d(pool_size = c(2,2)) %>%
  layer_flatten() %>%
  layer_dense(units = 64, activation = 'relu') %>%
  layer_dense(units = 10, activation = 'softmax')
best_model %>% compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = 'accuracy')
history <- best_model %>% fit(x_train, y_train, epochs = 5, batch_size = 128, validation_split = 0.2)
plot(1:length(history$metrics$loss), history$metrics$loss, type = 'l', col = 'blue',
     ylim = range(c(history$metrics$loss, history$metrics$val_loss)),
     xlab = "Epochs", ylab = "Loss", main = "Training vs Validation Loss")
lines(1:length(history$metrics$val_loss), history$metrics$val_loss, col = 'red')
legend("topright", legend = c("Train Loss", "Validation Loss"), col = c("blue", "red"), lty = 1)

Recurrent Neural Network (RNN)

  • Introduction: Build an RNN on synthetic sequential data and evaluate its performance.
  • Model Evaluation: Plot training and validation loss curves as well as final accuracy.
  • Hyperparameter Tuning: Tune the number of units in the first RNN layer and visualize its impact on validation accuracy.
Python:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense
import keras_tuner as kt

# Generate synthetic sequential data
num_samples = 1000
timesteps = 10
input_dim = 1
X = np.random.rand(num_samples, timesteps, input_dim)
y = np.random.randint(2, size=(num_samples, 1))

# Define model builder function for hyperparameter tuning
def build_rnn_model(hp):
    model = Sequential()
    # Tune number of units in first RNN layer: 20 to 100 in steps of 10
    units1 = hp.Int('units1', min_value=20, max_value=100, step=10)
    model.add(SimpleRNN(units=units1, activation='relu', return_sequences=True, input_shape=(timesteps, input_dim)))
    # Optionally tune second RNN layer units (fixed here for simplicity)
    units2 = hp.Int('units2', min_value=10, max_value=50, step=5)
    model.add(SimpleRNN(units=units2, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Set up Keras Tuner RandomSearch
tuner = kt.RandomSearch(
    build_rnn_model,
    objective='val_accuracy',
    max_trials=5,
    executions_per_trial=1,
    directory='rnn_tuning',
    project_name='rnn_hyperparam'
)

# Run the hyperparameter search (using 20% of data for validation)
tuner.search(X, y, epochs=10, batch_size=32, validation_split=0.2)

# Get the best model and hyperparameters
best_model = tuner.get_best_models(num_models=1)[0]
best_hp = tuner.get_best_hyperparameters(num_trials=1)[0]
print("Best Hyperparameters:", best_hp.values)

# Evaluate the best model
history = best_model.fit(X, y, epochs=10, batch_size=32, validation_split=0.2)
loss, accuracy = best_model.evaluate(X, y)
print("Best RNN Accuracy:", accuracy)

# Plot training and validation loss curves
plt.figure()
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("RNN Training vs Validation Loss")
plt.legend()
plt.show()

# Extract hyperparameter tuning results for visualization
trials = tuner.oracle.get_best_trials(num_trials=len(tuner.oracle.trials)) 
units1_vals = []
val_accuracies = []
for trial in trials:  # Iterate directly over the trials list
    units1_vals.append(trial.hyperparameters.get('units1'))
    val_acc = trial.metrics.get_last_value('val_accuracy')
    val_accuracies.append(val_acc)
plt.figure()
plt.scatter(units1_vals, val_accuracies, color='green')
plt.xlabel("Units in First RNN Layer")
plt.ylabel("Validation Accuracy")
plt.title("Hyperparameter Tuning: Units vs Validation Accuracy")
plt.grid(True)
plt.show()
R:
library(keras)
library(ggplot2)

# Generate synthetic sequential data
num_samples <- 1000
timesteps <- 10
input_dim <- 1
X <- array(runif(num_samples * timesteps * input_dim), dim = c(num_samples, timesteps, input_dim))
y <- sample(0:1, num_samples, replace = TRUE)

# Hyperparameter Tuning: Vary the number of units in the first RNN layer
candidate_units <- seq(20, 100, by = 10)
accuracy_results <- data.frame(units = candidate_units, accuracy = NA)
for (i in seq_along(candidate_units)) {
  model <- keras_model_sequential() %>%
    layer_simple_rnn(units = candidate_units[i], activation = "relu", return_sequences = TRUE, input_shape = c(timesteps, input_dim)) %>%
    layer_simple_rnn(units = 20, activation = "relu") %>%
    layer_dense(units = 1, activation = "sigmoid")
  model %>% compile(optimizer = "adam", loss = "binary_crossentropy", metrics = "accuracy")
  history <- model %>% fit(X, y, epochs = 10, batch_size = 32, validation_split = 0.2, verbose = 0)
  scores <- model %>% evaluate(X, y, verbose = 0)
  accuracy_results$accuracy[i] <- scores[[2]]
  cat("Units:", candidate_units[i], "Accuracy:", scores[[2]], "\n")
}
print(accuracy_results)
# Plot hyperparameter tuning results
ggplot(accuracy_results, aes(x = units, y = accuracy)) +
  geom_line(color = "blue") +
  geom_point(color = "red", size = 3) +
  ggtitle("RNN Hyperparameter Tuning: Units vs Accuracy") +
  xlab("Units in First RNN Layer") +
  ylab("Accuracy")

# Model Evaluation: Train the best model (e.g., highest accuracy) and plot loss curves
best_units <- accuracy_results$units[which.max(accuracy_results$accuracy)]
best_model <- keras_model_sequential() %>%
  layer_simple_rnn(units = best_units, activation = "relu", return_sequences = TRUE, input_shape = c(timesteps, input_dim)) %>%
  layer_simple_rnn(units = 20, activation = "relu") %>%
  layer_dense(units = 1, activation = "sigmoid")
best_model %>% compile(optimizer = "adam", loss = "binary_crossentropy", metrics = "accuracy")
history <- best_model %>% fit(X, y, epochs = 10, batch_size = 32, validation_split = 0.2)
plot(1:length(history$metrics$loss), history$metrics$loss, type = 'l', col = 'blue',
     ylim = range(c(history$metrics$loss, history$metrics$val_loss)),
     xlab = "Epoch", ylab = "Loss", main = "Training vs Validation Loss")
lines(1:length(history$metrics$val_loss), history$metrics$val_loss, col = 'red')
legend("topright", legend = c("Train Loss", "Validation Loss"), col = c("blue", "red"), lty = 1)

Deep Belief Network (DBN)

  • Introduction: Build a DBN using unsupervised pre‑training on the Iris dataset.
  • Model Evaluation: Compute test accuracy and display a confusion matrix.
  • Hyperparameter Tuning: Use grid search (Python) or a tuning loop (R) to explore key parameters and visualize the effect on model accuracy.
Python:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from dbn.tensorflow import SupervisedDBNClassification
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay

# 1. Loading the Data
data = load_iris()
X, y = data.data, data.target

# 2. Data Pre‑processing
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
scaler = StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 3. Train a DBN Model with Initial Hyperparameters
clf = SupervisedDBNClassification(hidden_layers_structure=[256, 256],
                                  learning_rate=0.01,
                                  n_epochs=10,
                                  batch_size=32,
                                  activation_function='relu')
clf.fit(X_train_scaled, y_train)
base_acc = clf.score(X_test_scaled, y_test)
print("DBN Accuracy (base):", base_acc)

# 4. Model Evaluation: Confusion Matrix
y_pred = clf.predict(X_test_scaled)
cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=data.target_names)
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix for DBN on Iris")
plt.show()

# 5. Hyperparameter Tuning using GridSearchCV
param_grid = {
    'hidden_layers_structure': [[256,256], [128,128], [64,64]],
    'learning_rate': [0.01, 0.001],
    'n_epochs': [10, 20]
}
grid = GridSearchCV(SupervisedDBNClassification(activation_function='relu', batch_size=32, random_state=42),
                    param_grid, cv=3, n_jobs=-1)
grid.fit(X_train_scaled, y_train)
print("Best parameters:", grid.best_params_)
best_model = grid.best_estimator_
y_pred_best = best_model.predict(X_test_scaled)
tuned_acc = accuracy_score(y_test, y_pred_best)
print("Tuned DBN Accuracy:", tuned_acc)

# 6. Visualizing Hyperparameter Tuning Results
results = pd.DataFrame(grid.cv_results_)
# For example, visualize mean test score for different hidden layer sizes and n_epochs with fixed learning_rate
subset = results[results['param_learning_rate'] == 0.01]
pivot_table = subset.pivot(index='param_hidden_layers_structure', columns='param_n_epochs', values='mean_test_score')
plt.figure(figsize=(8,6))
sns.heatmap(pivot_table, annot=True, fmt=".3f", cmap="viridis")
plt.title("Grid Search Mean Test Score\n(learning_rate=0.01)")
plt.xlabel("n_epochs")
plt.ylabel("hidden_layers_structure")
plt.show()
R:
library(deepnet)
library(caret)
library(ggplot2)

# 1. Loading the Data
data(iris)
X <- as.matrix(iris[, 1:4])
# One-hot encode the target variable
y <- class.ind(iris$Species)
true_labels <- max.col(y)

# 2. Split the Data
set.seed(42)
trainIndex <- sample(1:nrow(iris), size = 0.8 * nrow(iris))
trainX <- X[trainIndex, ]
testX <- X[-trainIndex, ]
trainY <- y[trainIndex, ]
testY <- y[-trainIndex, ]
true_test_labels <- max.col(testY)

# 3. Hyperparameter Tuning: Vary number of epochs and record accuracy
compute_accuracy <- function(numepochs) {
  model <- dbn.dnn.train(x = trainX, y = trainY, hidden = c(256,256), numepochs = numepochs)
  pred <- nn.predict(model, testX)
  pred_labels <- max.col(pred)
  acc <- mean(pred_labels == true_test_labels)
  return(acc)
}

epochs_seq <- c(10, 20, 30, 40, 50)
acc_values <- sapply(epochs_seq, compute_accuracy)
results <- data.frame(numepochs = epochs_seq, accuracy = acc_values)
print(results)

# 4. Plot the hyperparameter tuning results
p <- ggplot(results, aes(x = numepochs, y = accuracy)) +
  geom_line(color = "blue") +
  geom_point(color = "red", size = 3) +
  ggtitle("DBN Hyperparameter Tuning: Epochs vs Accuracy") +
  xlab("Number of Epochs") +
  ylab("Test Accuracy") +
  theme_minimal()
print(p)

# 5. Model Evaluation: Train the best model (e.g., using 20 epochs if best) and display confusion matrix
best_epochs <- epochs_seq[which.max(acc_values)]
best_model <- dbn.dnn.train(x = trainX, y = trainY, hidden = c(256,256), numepochs = best_epochs)
pred <- nn.predict(best_model, testX)
pred_labels <- max.col(pred)
cm <- table(Predicted = pred_labels, Actual = true_test_labels)
print(cm)

Autoencoder (AE)

  • Introduction: Build an autoencoder on the MNIST dataset to learn compressed representations.
  • Model Evaluation: Plot training and validation loss curves to assess reconstruction performance.
  • Hyperparameter Tuning: Vary the encoding dimension and visualize its impact on validation loss.
Python:
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.datasets import mnist
import keras_tuner as kt

# 1. Loading and Pre‑processing the Data
(x_train, _), (x_test, _) = mnist.load_data()
x_train = x_train.reshape((len(x_train), 784)).astype('float32') / 255.0
x_test = x_test.reshape((len(x_test), 784)).astype('float32') / 255.0

# 2. Define a model builder function for hyperparameter tuning
def build_autoencoder(hp):
    input_dim = 784
    # Hyperparameter: encoding dimension from 16 to 128 in steps of 16
    encoding_dim = hp.Int('encoding_dim', min_value=16, max_value=128, step=16)
    input_layer = Input(shape=(input_dim,))
    encoded = Dense(encoding_dim, activation='relu')(input_layer)
    decoded = Dense(input_dim, activation='sigmoid')(encoded)
    autoencoder = Model(input_layer, decoded)
    autoencoder.compile(optimizer='adam', loss='binary_crossentropy')
    return autoencoder

# 3. Hyperparameter Tuning using Keras Tuner
tuner = kt.RandomSearch(
    build_autoencoder,
    objective='val_loss',
    max_trials=5,
    executions_per_trial=1,
    directory='autoencoder_tuning',
    project_name='ae_tuning'
)
tuner.search(x_train, x_train, epochs=10, validation_data=(x_test, x_test))
tuner.results_summary()

# Extract tuning results: plot encoding_dim vs. validation loss
trials = tuner.oracle.get_best_trials(num_trials=len(tuner.oracle.trials)) 
encoding_dims = []
val_losses = []
for trial in trials:
    encoding_dims.append(trial.hyperparameters.get('encoding_dim'))
    val_losses.append(trial.metrics.get_last_value('val_loss'))
plt.figure()
plt.scatter(encoding_dims, val_losses, color='purple')
plt.xlabel("Encoding Dimension")
plt.ylabel("Validation Loss")
plt.title("Hyperparameter Tuning Results")
plt.grid(True)
plt.show()

# 4. Model Evaluation: Train the best model and plot loss curves
best_model = tuner.get_best_models(num_models=1)[0]
history = best_model.fit(x_train, x_train, epochs=10, batch_size=256, validation_data=(x_test, x_test))
plt.figure()
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Autoencoder Training Loss")
plt.legend()
plt.show()
R:
library(keras)
library(ggplot2)

# 1. Loading and Pre‑processing the Data
mnist <- dataset_mnist()
x_train <- mnist$train$x
x_test <- mnist$test$x
x_train <- array_reshape(x_train, c(nrow(x_train), 784)) / 255
x_test <- array_reshape(x_test, c(nrow(x_test), 784)) / 255

# 2. Define a function to build and train an autoencoder given an encoding dimension
train_autoencoder <- function(encoding_dim, epochs = 10, batch_size = 256) {
  input_dim <- 784
  input_layer <- layer_input(shape = c(input_dim))
  encoded <- layer_dense(input_layer, units = encoding_dim, activation = 'relu')
  decoded <- layer_dense(encoded, units = input_dim, activation = 'sigmoid')
  autoencoder <- keras_model(input_layer, decoded)
  autoencoder %>% compile(optimizer = 'adam', loss = 'binary_crossentropy')
  history <- autoencoder %>% fit(x_train, x_train, epochs = epochs, batch_size = batch_size,
                                 validation_data = list(x_test, x_test), verbose = 0)
  final_val_loss <- tail(history$metrics$val_loss, 1)
  list(model = autoencoder, history = history, val_loss = final_val_loss)
}

# 3. Hyperparameter Tuning: Try different encoding dimensions and record validation loss
encoding_dims <- seq(16, 128, by = 16)
results <- data.frame(encoding_dim = numeric(), val_loss = numeric())
for (dim in encoding_dims) {
  res <- train_autoencoder(encoding_dim = dim)
  results <- rbind(results, data.frame(encoding_dim = dim, val_loss = res$val_loss))
  cat("Encoding dim:", dim, "Validation Loss:", res$val_loss, "\n")
}

# 4. Plot hyperparameter tuning results
ggplot(results, aes(x = encoding_dim, y = val_loss)) +
  geom_point(color = "darkgreen", size = 3) +
  geom_line(color = "blue") +
  ggtitle("Encoding Dimension vs Validation Loss") +
  xlab("Encoding Dimension") +
  ylab("Validation Loss")

# 5. Model Evaluation: Train the best model (lowest validation loss) and plot loss curves
best_dim <- results$encoding_dim[which.min(results$val_loss)]
best_res <- train_autoencoder(encoding_dim = best_dim)
history <- best_res$history
plot(1:length(history$metrics$loss), history$metrics$loss, type = 'l', col = 'blue',
     ylim = range(c(history$metrics$loss, history$metrics$val_loss)),
     xlab = "Epoch", ylab = "Loss", main = "Training vs Validation Loss")
lines(1:length(history$metrics$val_loss), history$metrics$val_loss, col = 'red')
legend("topright", legend = c("Train Loss", "Validation Loss"), col = c("blue", "red"), lty = 1)

Support Vector Machine (SVM)

  • Introduction: Build an SVM classifier using an RBF kernel on the Iris dataset.
  • Model Evaluation: Evaluate the model using accuracy and a confusion matrix.
  • Hyperparameter Tuning: Use grid search to tune parameters (C and gamma) and visualize the tuning results via a heatmap.
Python:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay

# 1. Loading the Data
data = load_iris()
X, y = data.data, data.target

# 2. Data Pre‑processing
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
scaler = StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 3. Model Training with Default Hyperparameters
model = SVC(kernel='rbf')
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)
base_acc = accuracy_score(y_test, y_pred)
print("SVM Accuracy (default):", base_acc)

# 4. Model Evaluation: Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=data.target_names)
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix for SVM on Iris")
plt.show()

# 5. Hyperparameter Tuning using GridSearchCV
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': [0.001, 0.01, 0.1, 1]
}
grid = GridSearchCV(SVC(kernel='rbf'), param_grid, cv=5, n_jobs=-1)
grid.fit(X_train_scaled, y_train)
print("Best parameters:", grid.best_params_)
best_model = grid.best_estimator_
y_pred_best = best_model.predict(X_test_scaled)
tuned_acc = accuracy_score(y_test, y_pred_best)
print("Tuned SVM Accuracy:", tuned_acc)

# 6. Visualizing Hyperparameter Tuning Results (Heatmap)
results = grid.cv_results_
# Reshape the mean test scores to form a grid
scores_mean = results['mean_test_score'].reshape(len(param_grid['C']), len(param_grid['gamma']))
df_scores = pd.DataFrame(scores_mean, index=param_grid['C'], columns=param_grid['gamma'])
plt.figure(figsize=(8,6))
sns.heatmap(df_scores, annot=True, fmt=".3f", cmap="viridis")
plt.xlabel("gamma")
plt.ylabel("C")
plt.title("Grid Search Accuracy")
plt.show()
Good code
R:
library(e1071)
library(caret)
library(ggplot2)
library(gridExtra)

# 1. Loading the Data
data(iris)

# 2. Data Pre‑processing: Splitting the Data
set.seed(42)
trainIndex <- createDataPartition(iris$Species, p = 0.8, list = FALSE)
trainData <- iris[trainIndex, ]
testData <- iris[-trainIndex, ]

# 3. Model Training with Default Hyperparameters
model <- svm(Species ~ ., data = trainData, kernel = "radial")
predictions <- predict(model, testData)
base_acc <- mean(predictions == testData$Species)
print(paste("SVM Accuracy (default):", round(base_acc, 2)))

# 4. Model Evaluation: Confusion Matrix
cm <- confusionMatrix(predictions, testData$Species)
print(cm)

# 5. Hyperparameter Tuning using caret
tuneGrid <- expand.grid(C = c(0.1, 1, 10, 100),
                        sigma = c(0.001, 0.01, 0.1, 1))
ctrl <- trainControl(method = "cv", number = 5)
set.seed(42)
svm_tuned <- train(Species ~ ., data = trainData,
                   method = "svmRadial",
                   tuneGrid = tuneGrid,
                   trControl = ctrl)
print(svm_tuned)
tuned_predictions <- predict(svm_tuned, testData)
tuned_acc <- mean(tuned_predictions == testData$Species)
print(paste("Tuned SVM Accuracy:", round(tuned_acc, 2)))

# 6. Visualizing Hyperparameter Tuning Results
plot(svm_tuned)

Extreme Learning Machine (ELM)

  • Introduction: Build an Extreme Learning Machine (ELM) on the Iris dataset.
  • Preprocessing: Convert target labels to one-hot encoding as required by hpelm.
  • Model Evaluation: Compute test accuracy and perform hyperparameter tuning over the number of hidden neurons.
Python:
import hpelm
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# 1. Load the Iris dataset
data = load_iris()
X, y = data.data, data.target
num_classes = len(np.unique(y))

# Convert target labels to one-hot encoding (shape: [n_samples, num_classes])
y_onehot = np.eye(num_classes)[y]

# 2. Split and scale the data
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=42)
scaler = StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 3. Train an ELM with a fixed hidden neuron count and evaluate
fixed_neurons = 10
model = hpelm.ELM(X_train_scaled.shape[1], num_classes)
model.add_neurons(fixed_neurons, 'sigm')
model.train(X_train_scaled, y_train, 'c')
pred = model.predict(X_test_scaled)

# Convert predictions to class labels by taking argmax (assumes predictions are one-hot style)
pred_labels = pred.argmax(axis=1) if pred.ndim > 1 else pred
# Convert true one-hot labels back to class indices
true_labels = y_test.argmax(axis=1)
base_acc = accuracy_score(true_labels, pred_labels)
print("ELM Accuracy with {} neurons: {:.2f}".format(fixed_neurons, base_acc))

# 4. Hyperparameter Tuning: Vary the number of hidden neurons
neuron_counts = [5, 10, 15, 20, 25, 30]
accuracies = []
for n in neuron_counts:
    model = hpelm.ELM(X_train_scaled.shape[1], num_classes)
    model.add_neurons(n, 'sigm')
    model.train(X_train_scaled, y_train, 'c')
    pred = model.predict(X_test_scaled)
    pred_labels = pred.argmax(axis=1) if pred.ndim > 1 else pred
    acc = accuracy_score(true_labels, pred_labels)
    accuracies.append(acc)
    print("Neurons: {}, Accuracy: {:.2f}".format(n, acc))

# 5. Plot Hyperparameter Tuning Results
plt.figure()
plt.plot(neuron_counts, accuracies, marker='o', linestyle='-')
plt.xlabel("Number of Hidden Neurons")
plt.ylabel("Accuracy")
plt.title("ELM Hyperparameter Tuning: Hidden Neurons vs Accuracy")
plt.grid(True)
plt.show()
R:
# ELM implementations in R may use different packages.
# One example is the elmNN package.
library(elmNN)
library(caret)
library(ggplot2)

# 1. Load the Iris dataset
data(iris)
X <- as.matrix(iris[, 1:4])
# Convert species factor to one-hot encoding using class.ind()
y <- class.ind(iris$Species)
true_labels <- max.col(y)

# 2. Train an ELM with fixed hidden neurons and evaluate
fixed_neurons <- 10
model <- elmtrain(x = X, y = y, nhid = fixed_neurons, actfun = "sig")
pred <- predict(model, X)
pred_labels <- apply(pred, 1, which.max)
base_acc <- mean(pred_labels == true_labels)
print(paste("ELM Accuracy with", fixed_neurons, "neurons:", round(base_acc, 2)))

# 3. Hyperparameter Tuning: Vary number of hidden neurons
neuron_counts <- c(5, 10, 15, 20, 25, 30)
acc_values <- numeric(length(neuron_counts))
for(i in seq_along(neuron_counts)){
  model <- elmtrain(x = X, y = y, nhid = neuron_counts[i], actfun = "sig")
  pred <- predict(model, X)
  pred_labels <- apply(pred, 1, which.max)
  acc <- mean(pred_labels == true_labels)
  acc_values[i] <- acc
  cat("Neurons:", neuron_counts[i], "Accuracy:", round(acc, 2), "\n")
}
results <- data.frame(HiddenNeurons = neuron_counts, Accuracy = acc_values)
print(results)

# 4. Plot Hidden Neurons vs Accuracy
ggplot(results, aes(x = HiddenNeurons, y = Accuracy)) +
  geom_line(color = "blue") +
  geom_point(color = "red", size = 3) +
  ggtitle("ELM Hyperparameter Tuning: Hidden Neurons vs Accuracy") +
  xlab("Number of Hidden Neurons") +
  ylab("Accuracy")

Hopfield Network

  • Introduction: Demonstrate associative memory using a Hopfield network from the neurodynex3 library.
  • Model Evaluation: Vary the noise level added to a stored pattern and compute the retrieval accuracy.
  • Hyperparameter Tuning: Vary the number of update iterations (max_iter) and visualize its impact on retrieval accuracy.
Python:
import numpy as np
import matplotlib.pyplot as plt
from neurodynex3.hopfield_network import network

# Define stored patterns (each with 4 neurons)
patterns = np.array([
    [1, -1, 1, -1],
    [-1, 1, -1, 1]
])

HopfieldNetwork = network.HopfieldNetwork

def compute_weight_matrix(patterns):
    """
    Compute the weight matrix using the Hebbian rule.
    For bipolar patterns, W = sum(outer(p, p)) with zero diagonal.
    """
    N = patterns.shape[1]
    W = np.zeros((N, N))
    for p in patterns:
        W += np.outer(p, p)
    np.fill_diagonal(W, 0)
    return W

def add_noise(pattern, noise_level):
    """Flip a fraction of bits in the pattern based on the noise level."""
    noisy_pattern = pattern.copy()
    n_flip = int(len(pattern) * noise_level)
    indices = np.random.choice(len(pattern), n_flip, replace=False)
    noisy_pattern[indices] *= -1
    return noisy_pattern

def retrieval_accuracy(original, retrieved):
    """Compute the fraction of bits that match between the original and retrieved patterns."""
    return np.mean(original == retrieved)

def synchronous_update_steps(model, nr_steps, weight_matrix):
    """
    Manually run synchronous updates for nr_steps using the given weight matrix.
    """
    for _ in range(nr_steps):
        net_input = weight_matrix.dot(model.state)
        # Synchronous update: sign(net_input), with tie-breaker: +1
        new_state = np.where(net_input >= 0, 1, -1)
        model.state = new_state

########################################
# Part 1: Evaluate retrieval accuracy vs. noise level
########################################
noise_levels = np.linspace(0, 0.5, 6)  # Noise levels: 0%, 10%, 20%, 30%, 40%, 50%
accuracies = []
nr_steps_eval = 5  # number of update steps for evaluation

# Compute weight matrix from patterns once (since patterns are fixed)
W = compute_weight_matrix(patterns)

for noise in noise_levels:
    # Reinitialize the network for each trial
    model = HopfieldNetwork(nr_neurons=len(patterns[0]))
    model.store_patterns(patterns)  # (this call may not set a weight matrix attribute)

    original = patterns[0]  # use the first stored pattern for testing
    noisy_input = add_noise(original, noise)
    # Set the network's initial state using the noisy pattern
    model.state = noisy_input.copy()

    # Perform synchronous updates manually using our computed weight matrix
    synchronous_update_steps(model, nr_steps_eval, W)

    # Retrieve final state from model.state
    retrieved = model.state
    acc = retrieval_accuracy(original, retrieved)
    accuracies.append(acc)
    print(f"Noise level: {noise:.2f}, Retrieval accuracy: {acc:.2f}")

plt.figure()
plt.plot(noise_levels, accuracies, marker='o', linestyle='-')
plt.xlabel("Noise Level (fraction of bits flipped)")
plt.ylabel("Retrieval Accuracy")
plt.title("Retrieval Accuracy vs Noise Level")
plt.grid(True)
plt.show()

########################################
# Part 2: Hyperparameter Tuning: Vary the number of update steps (nr_steps)
########################################
fixed_noise = 0.3  # fixed noise level of 30%
iter_values = [1, 5, 10, 15, 20, 25]
iter_accuracies = []

for iters in iter_values:
    # Reinitialize the network for each trial
    model = HopfieldNetwork(nr_neurons=len(patterns[0]))
    model.store_patterns(patterns)
    
    original = patterns[0]
    noisy_input = add_noise(original, fixed_noise)
    model.state = noisy_input.copy()
    
    # Run synchronous updates manually for a given number of steps
    synchronous_update_steps(model, iters, W)
    
    retrieved = model.state
    acc = retrieval_accuracy(original, retrieved)
    iter_accuracies.append(acc)
    print(f"nr_steps: {iters}, Retrieval accuracy: {acc:.2f}")

plt.figure()
plt.plot(iter_values, iter_accuracies, marker='o', linestyle='-')
plt.xlabel("nr_steps (number of update iterations)")
plt.ylabel("Retrieval Accuracy")
plt.title("Hyperparameter Tuning: Accuracy vs nr_steps")
plt.grid(True)
plt.show()

Transformer Networks

  • Introduction: Fine-tune a pre-trained transformer (BERT) for text classification using the GLUE SST-2 dataset.
  • Steps include: Loading libraries and dataset, tokenizing text, model training, evaluating (with a loss curve plot), and hyperparameter tuning.
Python:
from transformers import BertForSequenceClassification, BertTokenizerFast, Trainer, TrainingArguments
from datasets import load_dataset
from evaluate import load 
import numpy as np
import matplotlib.pyplot as plt
import optuna

# 1. Load the Dataset (GLUE SST-2)
dataset = load_dataset("glue", "sst2")
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

# 2. Tokenize the Data
def tokenize_function(examples):
    return tokenizer(examples['sentence'], truncation=True, padding="max_length", max_length=128)
tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets.set_format("torch", columns=['input_ids', 'attention_mask', 'labels'])

# 3. Initialize the Model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# 4. Define Evaluation Metric
metric = load("glue", "sst2")
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

# 5. Set Up Training Arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=2,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    evaluation_strategy="epoch",
    logging_dir='./logs',
    logging_steps=10,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    compute_metrics=compute_metrics,
)

# 6. Train the Model
train_result = trainer.train()
trainer.save_model()
print("Training Metrics:", train_result.metrics)

# 7. Plot the Training Loss Curve
log_history = trainer.state.log_history
loss_steps = [entry['step'] for entry in log_history if 'loss' in entry]
losses = [entry['loss'] for entry in log_history if 'loss' in entry]
plt.plot(loss_steps, losses, marker='o')
plt.xlabel("Training Steps")
plt.ylabel("Loss")
plt.title("Training Loss Curve")
plt.show()

# 8. Evaluate the Model
eval_result = trainer.evaluate()
print("Evaluation Results:", eval_result)

# 9. Hyperparameter Tuning with Optuna
def model_init():
    return BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

def hp_space(trial):
    return {
        "learning_rate": trial.suggest_loguniform("learning_rate", 1e-5, 5e-5),
        "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [16, 32])
    }

best_run = trainer.hyperparameter_search(direction="maximize", hp_space=hp_space, n_trials=5)
print("Best Hyperparameters:", best_run.hyperparameters)
Good code
R:
library(huggingfaceR)
library(ggplot2)
# Note: Comprehensive transformer fine-tuning and hyperparameter tuning are less common in R.
# This example demonstrates basic inference and simulates a training loss curve.

# 1. Load a Pre-trained Transformer Model for Inference
model <- transformer_from_pretrained("bert-base-uncased")
inputs <- list(text = c("Hello, world!", "I love machine learning!"))
outputs <- model(inputs)
print("Transformer output:")
print(outputs)

# 2. Simulate a Training Loss Curve
# (Since full training is typically done in Python, we simulate loss values.)
loss_values <- c(0.9, 0.85, 0.8, 0.75, 0.7, 0.65, 0.6)
steps <- 1:length(loss_values)
df <- data.frame(steps, loss = loss_values)
ggplot(df, aes(x = steps, y = loss)) +
  geom_line(color = "blue") +
  geom_point(color = "red") +
  ggtitle("Simulated Training Loss Curve") +
  xlab("Training Steps") +
  ylab("Loss")