【6.1】图片风格迁移 Neural Style Transfer

article/2025/9/18 7:47:58

完整代码：

from __future__ import division
from torchvision import models
from torchvision import transforms
from PIL import Image
import torch
import torch.nn as nn
import numpy as npimport matplotlib.pyplot as pltdevice = torch.device("cuda" if torch.cuda.is_available() else "cpu")# -------------------------------------------------加载图片---------------------------------------------------------------
# 加载图片 + 图像预处理为相同的shape，这样vgg提取出来feature vector才是一样的大小，否则不能直接计算L2 loss
def load_image(image_path, transform=None, max_size=None, shape=None):image = Image.open(image_path)if max_size:scale = max_size / max(image.size)size = np.array(image.size) * scaleimage = image.resize(size.astype(int), Image.ANTIALIAS)if shape:image = image.resize(shape, Image.LANCZOS)if transform:image = transform(image).unsqueeze(0)return image.to(device)transform = transforms.Compose([transforms.ToTensor(),# 因为我要用VGG,在ImageNet上做的处理，transforms.Normalize(mean=[0.485, 0.456, 0.406],std= [0.229, 0.224, 0.225])
])  # 来自ImageNet的mean和variance# 此处是经过标准化后的照片
content = load_image("png/content.png", transform, max_size=400)
style = load_image("png/style.png", transform, shape=[content.size(2), content.size(3)]) # 这里是想得到和content大小一样的样式print(content.shape,style.shape) # torch.Size([1, 3, 400, 311]) torch.Size([1, 3, 311, 400])# -------------------------------------------------图片展示给大家看---------------------------------------------------------
unloader = transforms.ToPILImage()  # reconvert into PIL image
plt.ion()
def imshow(tensor, title=None):image = tensor.cpu().clone()  # we clone the tensor to not do changes on itimage = image.squeeze(0)      # remove the fake batch dimensionimage = unloader(image)plt.imshow(image)if title is not None:plt.title(title)plt.pause(0.001) # pause a bit so that plots are updatedplt.figure()
imshow(style[0], title='Image')# 并不是训练这个VGGNet,他只是一个特征提取器,真正要优化的是这一张target图片
class VGGNet(nn.Module):def __init__(self):super(VGGNet, self).__init__()# 有些层取出来当feature，基本上就可以拿到图片的内容和textureself.select = ['0', '5', '10', '19', '28']# 拿到VGG network,此时我们只需要features部分（即只需要拿到这些层就可以了，其余的信息不需要）self.vgg = models.vgg19(pretrained=True).features# 取出 self.select 中的层，组成一个新的featuresdef forward(self, x):features = []for name, layer in self.vgg._modules.items(): # _modules 可以把vgg一层层拿出来x = layer(x)if name in self.select:features.append(x)return features# target拿到的就是和content.png内容上相似，但是风格上更倾向于style.png的图片，内容是会变化的所以requires_grad_
target = content.clone().requires_grad_(True)
# 优化的是target这张图片，
optimizer = torch.optim.Adam([target], lr=0.003, betas=[0.5, 0.999])
vgg = VGGNet().to(device).eval()  # 所以设置为eval(),它是不会被优化的# 打印出每一层拿到的 feature vector
# torch.Size([1, 64, 400, 311])
# torch.Size([1, 128, 200, 155])
# torch.Size([1, 256, 100, 77])
# torch.Size([1, 512, 50, 38])
# torch.Size([1, 512, 25, 19])
feature = vgg(content)  # list,里面包含不同的features
for feat in feature:print(feat.shape)# 开始优化我们的target图片
target_features = vgg(target)total_step = 2000
style_weight = 100.
for step in range(total_step):target_features  = vgg(target)content_features = vgg(content)style_features   = vgg(style)style_loss = 0content_loss = 0for f1, f2, f3 in zip(target_features, content_features, style_features):content_loss += torch.mean((f1 - f2) ** 2)  # 使用L2 loss_, c, h, w = f1.size()f1 = f1.view(c, h * w)f3 = f3.view(c, h * w)# 计算gram matrixf1 = torch.mm(f1, f1.t())  # 此处做了点积运算,(c, h * w) × (h * w, c) = (c,c)f3 = torch.mm(f3, f3.t())style_loss += torch.mean((f1 - f3) ** 2) / (c * h * w) # 使用L2 loss,只是多个了除数而已# 由于损失是不同的，我们将内容和风格损失的和作为总的loss，此处我们给他们各自合适的权重，会让总的loss看起来更符合真实的lossloss = content_loss + style_weight * style_loss# 更新target image 的 Tensoroptimizer.zero_grad()loss.backward()optimizer.step()if step % 10 == 0:print("Step [{}/{}], Content Loss: {:.4f}, Style Loss: {:.4f}".format(step, total_step, content_loss.item(), style_loss.item()))# 将图片打印出来
denorm = transforms.Normalize((-2.12, -2.04, -1.80), (4.37, 4.46, 4.44))
img = target.clone().squeeze()
img = denorm(img).clamp_(0, 1)
plt.figure() # Create a new figure, or activate an existing figure.
imshow(img, title='Target Image')