运行ConvLSTM官方例子记录(包含逐行解释):
""" This script demonstrates the use of a convolutional LSTM network.
This network is used to predict the next frame of an artificially
generated movie which contains moving squares.
"""
from keras.models import Sequential
from keras.layers.convolutional import Conv3D
from keras.layers.convolutional_recurrent import ConvLSTM2D
from keras.layers import BatchNormalization
import numpy as np
import pylab as plt# We create a layer which take as input movies of shape
# (n_frames, width, height, channels) and returns a movie
# of identical shape.seq = Sequential() #sequential顺序模型是多个网络层的线性堆叠
seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3), #输出数据的空间维度为40,卷积核为3*3input_shape=(None, 40, 40, 1), #input_shape是一个表示尺寸的元组,none表示可能为任何正整数,输入维度(None,40,40,1),samples为none,40行*40列,通道数为1padding='same', return_sequences=True)) #大小写不敏感,返回5D张量(None, None, 40, 40, 40)
seq.add(BatchNormalization()) #层标准化seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3), #输入为上一层的输出5D张量(None, None, 40, 40, 40)padding='same', return_sequences=True)) #输出5D张量(None, None, 40,40,40)
seq.add(BatchNormalization())seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3),padding='same', return_sequences=True))
seq.add(BatchNormalization())seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3),padding='same', return_sequences=True))
seq.add(BatchNormalization())seq.add(Conv3D(filters=1, kernel_size=(3, 3, 3), #输入上一层的输出结果activation='sigmoid',padding='same', data_format='channels_last')) #返回一个张量
seq.compile(loss='binary_crossentropy', optimizer='adadelta') #配置训练模型。optimizer是优化器# Artificial data generation: 生成人工数据,用作训练数据
# Generate movies with 3 to 7 moving squares inside. 生成包含3-7个移动方格的电影
# The squares are of shape 1x1 or 2x2 pixels, 方格形状1*1或者2*2
# which move linearly over time. 方格随时间线性移动
# For convenience we first create movies with bigger width and height (80x80) 首先选择80*80
# and at the end we select a 40x40 window. 最终选择40*40def generate_movies(n_samples=1200, n_frames=15):row = 80col = 80noisy_movies = np.zeros((n_samples, n_frames, row, col, 1), dtype=np.float) #返回一个(1200,15,80,80,1)形状的数组,数据类型为floatshifted_movies = np.zeros((n_samples, n_frames, row, col, 1), dtype=np.float) #返回一个(1200,15,80,80,1)形状的数组,数据类型为floatfor i in range(n_samples):# Add 3 to 7 moving squaresn = np.random.randint(3, 8) #随机生成3到7之间的整数,增加3-7个移动方格for j in range(n):# Initial position 初始化位置,x和y均为(20-60)之间的随机数xstart = np.random.randint(20, 60)ystart = np.random.randint(20, 60)# Direction of motion 移动方向也为随机数directionx = np.random.randint(0, 3) - 1directiony = np.random.randint(0, 3) - 1# Size of the square 方格尺寸为2-4之间的随机数w = np.random.randint(2, 4)for t in range(n_frames):x_shift = xstart + directionx * t #x移动后的位置y_shift = ystart + directiony * t #y移动后的位置noisy_movies[i, t, x_shift - w: x_shift + w,y_shift - w: y_shift + w, 0] += 1# Make it more robust by adding noise. 增加噪声# The idea is that if during inference,# the value of the pixel is not exactly one,# we need to train the network to be robust and still# consider it as a pixel belonging to a square.if np.random.randint(0, 2):noise_f = (-1) ** np.random.randint(0, 2)noisy_movies[i, t,x_shift - w - 1: x_shift + w + 1,y_shift - w - 1: y_shift + w + 1,0] += noise_f * 0.1# Shift the ground truth by 1x_shift = xstart + directionx * (t + 1)y_shift = ystart + directiony * (t + 1)shifted_movies[i, t, x_shift - w: x_shift + w,y_shift - w: y_shift + w, 0] += 1# Cut to a 40x40 window 裁剪到40*40的窗口noisy_movies = noisy_movies[::, ::, 20:60, 20:60, ::]shifted_movies = shifted_movies[::, ::, 20:60, 20:60, ::]noisy_movies[noisy_movies >= 1] = 1shifted_movies[shifted_movies >= 1] = 1return noisy_movies, shifted_movies# Train the network
noisy_movies, shifted_movies = generate_movies(n_samples=1200) #生成人工数据用作训练seq.fit(noisy_movies[:1000], shifted_movies[:1000], batch_size=10, #以固定数量的epochs训练模型,noisy_movies[:1000]作为输入数据,shifted_movies作为目标数据epochs=300, validation_split=0.05) #分为300批,0.05的数据作为验证数据
# Testing the network on one movie
# feed it with the first 7 positions and then
# predict the new positions
which = 1004
track = noisy_movies[which][:7, ::, ::, ::]
for j in range(16):new_pos = seq.predict(track[np.newaxis, ::, ::, ::, ::]) #用前7个图像预测下一个图像new = new_pos[::, -1, ::, ::, ::]track = np.concatenate((track, new), axis=0)
# And then compare the predictions 作出预测值和真实值的图像
# to the ground truth
track2 = noisy_movies[which][::, ::, ::, ::]
for i in range(15):fig = plt.figure(figsize=(10, 5))ax = fig.add_subplot(121)if i >= 7:ax.text(1, 3, 'Predictions !', fontsize=20, color='w')else:ax.text(1, 3, 'Initial trajectory', fontsize=20)toplot = track[i, ::, ::, 0]plt.imshow(toplot)ax = fig.add_subplot(122)plt.text(1, 3, 'Ground truth', fontsize=20)toplot = track2[i, ::, ::, 0]if i >= 2:toplot = shifted_movies[which][i - 1, ::, ::, 0]plt.imshow(toplot)
plt.savefig('%i_animate.png' % (i + 1))
官方示例运行使用Keras库,因此先搭建tensorflow-gpu的虚拟环境,再安装 keras库
找到的唯一成功的方法,配合anaconda也最简单
tensorflow安装https://blog.csdn.net/m0_49090516/article/details/113576003https://blog.csdn.net/m0_49090516/article/details/113576003 安装完后使用 conda install keras==(对应版本)
目前已经能够成功运行,但感觉未使用GPU
查询GPU情况,只能找到CPU
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())
查询网上解决方案,将尝试uninstall tensorflow
版本不对应,换较低版本tensorflow
经过讨论,可能是电脑显卡资源不足导致,放弃解决此问题