深度学习
实验要求
- 建立深度学习模型,检测出图中的人是否佩戴了口罩,并将其尽可能调整到最佳状态。
- 学习经典的模型MTCNN 和 MobileNet 的结构。
- 学习训练时的方法。
实验环境
可以使用基于Python的Numpy等库进行数据处理,使用sklearn等框架建立深度学习模型,使用过程中请注意Python包(库)的版本。
参考资料
- 论文:Joint Face Detection and Alignmentusing Multi-task Cascaded Convolutional Networks:https://kpzhang93.github.io/MTCNN_face_detection_alignment/
- OpenCV:https://opencv-python-
tutroals.readthedocs.io/en/latest/py_tutorials/py_tutorials.html - PIL:https://pillow.readthedocs.io/en/stable/
- Numpy:https://www.numpy.org/
- Scikit-learn:https://scikit-learn.org/
- PyTorch:https://pytorch.org/
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision import datasets
import cv2
from PIL import Image
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import os
# MTCNN人脸检测
from mtcnn import MTCNN
# MobileNet模型
from torchvision.models import mobilenet_v2
# 设备配置
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# 数据预处理
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
# 加载数据集
def load_dataset(data_dir):
dataset = datasets.ImageFolder(root=data_dir, transform=transform)
return dataset
# 划分训练集和测试集
def split_dataset(dataset):
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])
return train_dataset, test_dataset
# 数据加载器
def get_data_loaders(train_dataset, test_dataset):
train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=False)
return train_loader, test_loader
# 初始化MobileNet模型
def init_model(num_classes=2):
model = mobilenet_v2(pretrained=True)
model.classifier[1] = torch.nn.Linear(model.classifier[1].in_features, num_classes)
model.to(device)
return model
# 训练模型
def train_model(model, train_loader, criterion, optimizer, num_epochs=10):
model.train()
for epoch in range(num_epochs):
for images, labels in train_loader:
images, labels = images.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
# 评估模型
def evaluate_model(model, test_loader):
model.eval()
predictions, true_labels = [], []
with torch.no_grad():
for images, labels in test_loader:
images, labels = images.to(device), labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs, 1)
predictions.extend(predicted.tolist())
true_labels.extend(labels.tolist())
accuracy = accuracy_score(true_labels, predictions)
return accuracy
# 口罩佩戴检测
def mask_detection(model, image_path):
detector = MTCNN()
image = cv2.imread(image_path)
results = detector.detect_faces(image)
for result in results:
x, y, width, height = result['box']
face = image[y:y+height, x:x+width]
face = Image.fromarray(face)
face = transform(face).unsqueeze(0).to(device)
output = model(face)
_, predicted = torch.max(output, 1)
label = 'Mask' if predicted.item() == 1 else 'No Mask'
cv2.rectangle(image, (x, y), (x+width, y+height), (0, 255, 0), 2)
cv2.putText(image, label, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36,255,12), 2)
cv2.imshow('Mask Detection', image)
cv2.waitKey(0)
cv2.destroyAllWindows()
def main():
data_dir = 'data'
dataset = load_dataset(data_dir)
train_dataset, test_dataset = split_dataset(dataset)
train_loader, test_loader = get_data_loaders(train_dataset, test_dataset)
model = init_model(num_classes=2)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
train_model(model, train_loader, criterion, optimizer, num_epochs=10)
accuracy = evaluate_model(model, test_loader)
print(f'Accuracy: {accuracy:.4f}')
# 检测一张图片
mask_detection(model, 'test_image.jpg')
if __name__ == "main":
main()