Fundamentals
Neural Network Basics
# Basic neural network layer
layer = Dense(units=64, activation='relu')
# Forward pass
output = layer(input)
# Loss function
loss = tf.keras.losses.BinaryCrossentropy()
# Optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
layer = Dense(units=64, activation='relu')
# Forward pass
output = layer(input)
# Loss function
loss = tf.keras.losses.BinaryCrossentropy()
# Optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
Note: Neural networks consist of layers of neurons that transform input data through weighted connections and activation functions.
Activation Functions
# Sigmoid
def sigmoid(x):
return 1 / (1 + np.exp(-x))
# ReLU (Rectified Linear Unit)
def relu(x):
return np.maximum(0, x)
# Softmax (for multi-class classification)
def softmax(x):
exp_x = np.exp(x - np.max(x))
return exp_x / np.sum(exp_x, axis=0)
def sigmoid(x):
return 1 / (1 + np.exp(-x))
# ReLU (Rectified Linear Unit)
def relu(x):
return np.maximum(0, x)
# Softmax (for multi-class classification)
def softmax(x):
exp_x = np.exp(x - np.max(x))
return exp_x / np.sum(exp_x, axis=0)
Note: Activation functions introduce non-linearity, allowing neural networks to learn complex patterns.
Architectures
Convolutional Neural Networks (CNN)
# Basic CNN architecture
model = Sequential([
Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
MaxPooling2D((2, 2)),
Conv2D(64, (3, 3), activation='relu'),
MaxPooling2D((2, 2)),
Flatten(),
Dense(64, activation='relu'),
Dense(10, activation='softmax')
])
# Compile the model
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model = Sequential([
Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
MaxPooling2D((2, 2)),
Conv2D(64, (3, 3), activation='relu'),
MaxPooling2D((2, 2)),
Flatten(),
Dense(64, activation='relu'),
Dense(10, activation='softmax')
])
# Compile the model
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
Note: CNNs are particularly effective for image processing tasks due to their ability to capture spatial hierarchies.
Recurrent Neural Networks (RNN)
# Simple RNN
model = Sequential([
SimpleRNN(50, return_sequences=True, input_shape=(None, 1)),
SimpleRNN(50, return_sequences=True),
SimpleRNN(50),
Dense(1)
])
# LSTM (Long Short-Term Memory)
model = Sequential([
LSTM(50, return_sequences=True, input_shape=(None, 1)),
LSTM(50, return_sequences=True),
LSTM(50),
Dense(1)
])
model = Sequential([
SimpleRNN(50, return_sequences=True, input_shape=(None, 1)),
SimpleRNN(50, return_sequences=True),
SimpleRNN(50),
Dense(1)
])
# LSTM (Long Short-Term Memory)
model = Sequential([
LSTM(50, return_sequences=True, input_shape=(None, 1)),
LSTM(50, return_sequences=True),
LSTM(50),
Dense(1)
])
Note: RNNs are designed for sequential data, with LSTMs addressing the vanishing gradient problem in traditional RNNs.
Transformers
# Transformer implementation (simplified)
class TransformerBlock(tf.keras.layers.Layer):
def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
super().__init__()
self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
self.ffn = Sequential([
Dense(ff_dim, activation='relu'),
Dense(embed_dim),
])
self.layernorm1 = LayerNormalization(epsilon=1e-6)
self.layernorm2 = LayerNormalization(epsilon=1e-6)
self.dropout1 = Dropout(rate)
self.dropout2 = Dropout(rate)
def call(self, inputs, training):
attn_output = self.att(inputs, inputs)
attn_output = self.dropout1(attn_output, training=training)
out1 = self.layernorm1(inputs + attn_output)
ffn_output = self.ffn(out1)
ffn_output = self.dropout2(ffn_output, training=training)
return self.layernorm2(out1 + ffn_output)
class TransformerBlock(tf.keras.layers.Layer):
def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
super().__init__()
self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
self.ffn = Sequential([
Dense(ff_dim, activation='relu'),
Dense(embed_dim),
])
self.layernorm1 = LayerNormalization(epsilon=1e-6)
self.layernorm2 = LayerNormalization(epsilon=1e-6)
self.dropout1 = Dropout(rate)
self.dropout2 = Dropout(rate)
def call(self, inputs, training):
attn_output = self.att(inputs, inputs)
attn_output = self.dropout1(attn_output, training=training)
out1 = self.layernorm1(inputs + attn_output)
ffn_output = self.ffn(out1)
ffn_output = self.dropout2(ffn_output, training=training)
return self.layernorm2(out1 + ffn_output)
Note: Transformers use self-attention mechanisms and have become the standard for NLP tasks, powering models like BERT and GPT.
Generative Adversarial Networks (GANs)
# Basic GAN structure
def build_generator():
model = Sequential()
model.add(Dense(256, input_dim=100))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(512))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(1024))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(784, activation='tanh'))
model.add(Reshape((28, 28, 1)))
return model
def build_generator():
model = Sequential()
model.add(Dense(256, input_dim=100))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(512))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(1024))
model.add(LeakyReLU(alpha=0.2))
model.add(BatchNormalization(momentum=0.8))
model.add(Dense(784, activation='tanh'))
model.add(Reshape((28, 28, 1)))
return model
Note: GANs consist of a generator and discriminator that compete against each other, enabling the generation of realistic synthetic data.
Frameworks
TensorFlow/Keras
# Basic model creation
import tensorflow as tf
from tensorflow.keras import layers
# Sequential API
model = tf.keras.Sequential([
layers.Dense(64, activation='relu'),
layers.Dense(64, activation='relu'),
layers.Dense(10, activation='softmax')
])
# Functional API
inputs = tf.keras.Input(shape=(32,))
x = layers.Dense(64, activation='relu')(inputs)
x = layers.Dense(64, activation='relu')(x)
outputs = layers.Dense(10, activation='softmax')(x)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
import tensorflow as tf
from tensorflow.keras import layers
# Sequential API
model = tf.keras.Sequential([
layers.Dense(64, activation='relu'),
layers.Dense(64, activation='relu'),
layers.Dense(10, activation='softmax')
])
# Functional API
inputs = tf.keras.Input(shape=(32,))
x = layers.Dense(64, activation='relu')(inputs)
x = layers.Dense(64, activation='relu')(x)
outputs = layers.Dense(10, activation='softmax')(x)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
PyTorch
# Basic model creation
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 32, 3, 1)
self.conv2 = nn.Conv2d(32, 64, 3, 1)
self.fc1 = nn.Linear(9216, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = F.max_pool2d(x, 2)
x = torch.flatten(x, 1)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return F.log_softmax(x, dim=1)
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 32, 3, 1)
self.conv2 = nn.Conv2d(32, 64, 3, 1)
self.fc1 = nn.Linear(9216, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = F.max_pool2d(x, 2)
x = torch.flatten(x, 1)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return F.log_softmax(x, dim=1)
Optimization
Optimizers
# Common optimizers in TensorFlow
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999)
optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001, rho=0.9)
optimizer = tf.keras.optimizers.Adagrad(learning_rate=0.01)
# Learning rate scheduling
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
initial_learning_rate=1e-2,
decay_steps=10000,
decay_rate=0.9)
optimizer = tf.keras.optimizers.SGD(learning_rate=lr_schedule)
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999)
optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001, rho=0.9)
optimizer = tf.keras.optimizers.Adagrad(learning_rate=0.01)
# Learning rate scheduling
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
initial_learning_rate=1e-2,
decay_steps=10000,
decay_rate=0.9)
optimizer = tf.keras.optimizers.SGD(learning_rate=lr_schedule)
Regularization
# L1/L2 Regularization
model.add(Dense(64, kernel_regularizer=tf.keras.regularizers.l2(0.01)))
model.add(Dense(64, kernel_regularizer=tf.keras.regularizers.l1_l2(l1=0.01, l2=0.01)))
# Dropout
model.add(Dropout(0.5))
# Batch Normalization
model.add(BatchNormalization())
# Early Stopping
early_stopping = tf.keras.callbacks.EarlyStopping(
monitor='val_loss',
patience=10,
restore_best_weights=True)
model.add(Dense(64, kernel_regularizer=tf.keras.regularizers.l2(0.01)))
model.add(Dense(64, kernel_regularizer=tf.keras.regularizers.l1_l2(l1=0.01, l2=0.01)))
# Dropout
model.add(Dropout(0.5))
# Batch Normalization
model.add(BatchNormalization())
# Early Stopping
early_stopping = tf.keras.callbacks.EarlyStopping(
monitor='val_loss',
patience=10,
restore_best_weights=True)
Quick Reference
Loss Functions
# Regression
mean_squared_error # MSE
mean_absolute_error # MAE
huber_loss # Combines MSE and MAE
# Classification
binary_crossentropy # Binary classification
categorical_crossentropy # Multi-class, one-hot encoded
sparse_categorical_crossentropy # Multi-class, integer labels
# Specialized
hinge # SVM-like loss
kl_divergence # For probabilistic models
mean_squared_error # MSE
mean_absolute_error # MAE
huber_loss # Combines MSE and MAE
# Classification
binary_crossentropy # Binary classification
categorical_crossentropy # Multi-class, one-hot encoded
sparse_categorical_crossentropy # Multi-class, integer labels
# Specialized
hinge # SVM-like loss
kl_divergence # For probabilistic models
Evaluation Metrics
# Classification metrics
accuracy
precision
recall
f1_score
auc # Area Under ROC Curve
# Regression metrics
mean_squared_error
mean_absolute_error
r2_score # Coefficient of determination
# Custom metrics
def custom_metric(y_true, y_pred):
return tf.reduce_mean(y_true - y_pred)
accuracy
precision
recall
f1_score
auc # Area Under ROC Curve
# Regression metrics
mean_squared_error
mean_absolute_error
r2_score # Coefficient of determination
# Custom metrics
def custom_metric(y_true, y_pred):
return tf.reduce_mean(y_true - y_pred)