ResNet-50 网络表情识别
- 1. 导入依赖库
- 2. 加载中文字体文件
- 3. 设置图像尺寸和训练参数
- 4. 数据增强和预处理
- 5. 加载数据集
- 6. 检查数据维度
- 7. 定义ResNet50模型
- 8. 初始化模型、损失函数和优化器
- 9. 训练和测试函数
- 10. 训练和测试模型
- 11. 保存模型
- 12. 评估数据保存和可视化
- 原码
本项目采用的是FER-2013数据集加上博主的一些其他数据集整合的
FER-2013数据集链接如下
https://www.kaggle.com/datasets/msambare/fer2013
1. 导入依赖库
代码开始处导入了多个Python库,用于图像处理、数学运算、深度学习模型的构建和训练。
import cv2
import numpy as np
from PIL import ImageFont, ImageDraw
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.optim import Adam
import matplotlib.pyplot as plt
from PIL import Image
2. 加载中文字体文件
加载中文字体文件以便在图像上绘制中文标签。
font_path = "SourceHanSansSC-Bold.otf"
font = ImageFont.truetype(font_path, 30)
3. 设置图像尺寸和训练参数
定义了图像的目标尺寸、训练轮数和每批的样本数量。
img_size = 48
targetx = 48
targety = 48
epochs = 50
batch_size = 64
4. 数据增强和预处理
定义了一个转换流程,包括调整图像大小、随机水平翻转和转换为张量。
transform = transforms.Compose([transforms.Resize((targetx, targety)),transforms.RandomHorizontalFlip(),transforms.ToTensor(),
])
5. 加载数据集
使用ImageFolder加载训练和测试数据集,并通过DataLoader进行批量加载。
train_dataset = datasets.ImageFolder(root="./FER-2013/train" , transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = datasets.ImageFolder(root="./FER-2013/test", transform=transform)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
6. 检查数据维度
定义了一个函数check_data_dimensions来检查数据加载器返回的批次中图像和标签的维度。
def check_data_dimensions(loader):for images, labels in loader:print("Batch image size:", images.shape)print("Batch label size:", labels.shape)break
7. 定义ResNet50模型
创建了一个ResNet50模型,使用预训练权重,并替换最后的全连接层以适应表情识别的类别数。
class ResNet50Model(nn.Module):def __init__(self, num_classes=7):super(ResNet50Model, self).__init__()self.resnet50 = models.resnet50(pretrained=True)num_ftrs = self.resnet50.fc.in_featuresself.resnet50.fc = nn.Linear(num_ftrs, num_classes)def forward(self, x):return self.resnet50(x)
8. 初始化模型、损失函数和优化器
初始化了ResNet50模型,定义了损失函数和优化器,并根据GPU的可用性将它们移动到GPU。
model = ResNet50Model(num_classes=7)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=0.01)
9. 训练和测试函数
定义了训练和测试函数,用于迭代模型的训练和评估。
def train(model, train_loader, criterion, optimizer, device):# ...def test(model, test_loader, criterion, device):# ...
10. 训练和测试模型
在指定的轮数内迭代训练和测试模型,并打印每个epoch的损失和准确率。
for epoch in range(num_epochs):# 训练和测试过程...
11. 保存模型
训练完成后,保存ResNet50模型到文件。
torch.save(model.state_dict(), 'resnet50_final.pth')
12. 评估数据保存和可视化
将训练与测试的损失及准确率保存到.npy文件中,并使用matplotlib绘制损失和准确率图表。
np.save('train_losses.npy', train_losses)
# ...
plt.show()
原码
import cv2
import numpy as np
from PIL import ImageFont, ImageDraw
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.optim import Adam
import matplotlib.pyplot as plt
from PIL import Image
#%%
# 加载中文字体文件
font_path = "SourceHanSansSC-Bold.otf"
font = ImageFont.truetype(font_path, 30)
#%%
img_size = 48 #original size of the image
targetx = 48
targety = 48
epochs = 50
batch_size = 64
# 数据增强和预处理
transform = transforms.Compose([transforms.Resize((targetx, targety)),transforms.RandomHorizontalFlip(),transforms.ToTensor(),
])# 加载数据集
train_dataset = datasets.ImageFolder(root="./FER-2013/train" , transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = datasets.ImageFolder(root="./FER-2013/test", transform=transform)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
#%%
# 检查数据维度
def check_data_dimensions(loader):for images, labels in loader:print("Batch image size:", images.shape) # 批次中图像的维度print("Batch label size:", labels.shape) # 批次中标签的维度break # 只查看第一个批次即可# 查看训练数据和测试数据的维度
print("Training data dimensions:")
check_data_dimensions(train_loader)
print("\nTesting data dimensions:")
check_data_dimensions(test_loader)
#%%
import torch.nn as nn
import torchvision.models as modelsclass ResNet50Model(nn.Module):def __init__(self, num_classes=7):super(ResNet50Model, self).__init__()#加载预训练的ResNet-50模型self.resnet50 = models.resnet50(pretrained=True)#获取ResNet-50模型的最后一层全连接层的输入特征数量num_ftrs = self.resnet50.fc.in_features。num_ftrs = self.resnet50.fc.in_features#将ResNet-50模型的最后一层全连接层替换为一个新的全连接层,输出特征数量设置为num_classesself.resnet50.fc = nn.Linear(num_ftrs, num_classes)#forward方法定义了前向传播过程。# 在这个简单的类中,仅仅是调用self.resnet50(x),将输入x传递给预训练的ResNet-50模型进行前向传播。def forward(self, x):return self.resnet50(x)# Example usage
model = ResNet50Model(num_classes=7)
print(model)
#%%
# 初始化模型、损失函数和优化器
loss_fn = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.0001, weight_decay=0.01)# 如果GPU可用,移动模型和损失函数到GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
loss_fn.to(device)# 训练模型
num_epochs = 60train_losses = []
train_accuracies = []for epoch in range(num_epochs):model.train()total_loss = 0total_correct = 0total_samples = 0for data, targets in train_loader:# 将输入和标签移动到GPU(如果可用)data, targets = data.to(device), targets.to(device)# 前向传播outputs = model(data)loss = loss_fn(outputs, targets)# 零梯度optimizer.zero_grad()# 反向传播和优化loss.backward()optimizer.step()total_loss += loss.item()# 计算准确率_, predicted = outputs.max(1)total_correct += predicted.eq(targets).sum().item()total_samples += targets.size(0)avg_loss = total_loss / len(train_loader)train_losses.append(avg_loss)# 计算准确率avg_accuracy = total_correct / total_samplestrain_accuracies.append(avg_accuracy)print(f'Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss}')print(f'Epoch {epoch+1}/{num_epochs}, Accuracy: {avg_accuracy}')# 保存最后训练完的模型
torch.save(model.state_dict(), 'resnet50_final.pth')
print("最后训练完的模型已保存!")test_losses = []
test_accuracies = []
# 测试模型
for epoch in range(num_epochs):model.eval()total_loss = 0total_correct = 0total_samples = 0for data, targets in test_loader:data, targets = data.to(device), targets.to(device)outputs = model(data)loss = loss_fn(outputs, targets)optimizer.zero_grad()loss.backward()total_loss += loss.item()# 计算准确率_, predicted = outputs.max(1)total_correct += predicted.eq(targets).sum().item()total_samples += targets.size(0)avg_loss = total_loss / len(test_loader)test_losses.append(avg_loss)# 计算准确率avg_accuracy = total_correct / total_samplestest_accuracies.append(avg_accuracy)print(f'Epoch {epoch+1}/{num_epochs}, test_Loss: {avg_loss}')print(f'Epoch {epoch+1}/{num_epochs}, test_Accuracy: {avg_accuracy}')# 评估数据保存到.npy文件
np.save('train_losses.npy', train_losses)
np.save('train_accuracies.npy', train_accuracies)
np.save('test_losses.npy', test_losses)
np.save('test_accuracies.npy', test_accuracies)# 绘制损失图
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='Training Loss')
plt.title('Training Loss per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()# 绘制准确率图
plt.figure(figsize=(10, 5))
plt.plot(train_accuracies, label='Training Accuracy')
plt.title('Training Accuracy per Epoch')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()