PYTORCH 笔记 DILATE 代码解读

article/2025/8/29 5:32:26

dilate 完整代码路径:vincent-leguen/DILATE: Code for our NeurIPS 2019 paper "Shape and Time Distortion Loss for Training Deep Time Series Forecasting Models" (github.com)

1 main 函数

1.1 导入库

import numpy as np
import torch
from data.synthetic_dataset import create_synthetic_dataset, SyntheticDataset
from models.seq2seq import EncoderRNN, DecoderRNN, Net_GRU
from loss.dilate_loss import dilate_loss
from torch.utils.data import DataLoader
import random
from tslearn.metrics import dtw, dtw_path
import matplotlib.pyplot as plt
import warnings
import warnings; warnings.simplefilter('ignore')

1.2 设置超参数

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
random.seed(0)# parameters
batch_size = 100
N = 500
#训练集和测试集样本的数量(500个时间序列)
N_input = 20
# 输入的time step
N_output = 20
# 输出的time step
sigma = 0.01
# 噪声的标准差
gamma = 0.01
#soft-dtw时需要的平滑参数

1.3 导入数据集

num_workers那里,在windows操作环境下,只能设置为0,否则会报错

pytorch错误解决: BrokenPipeError: [Errno 32] Broken pipe_UQI-LIUWJ的博客-CSDN博客

# Load synthetic dataset
X_train_input,X_train_target,X_test_input,X_test_target,train_bkp,test_bkp = \create_synthetic_dataset(N,N_input,N_output,sigma)
##训练集的 input 训练集的output 测试集的input 测试集的output,训练集的陡降点,测试集的陡降点dataset_train = SyntheticDataset(X_train_input,X_train_target,train_bkp)
dataset_test  = SyntheticDataset(X_test_input,X_test_target, test_bkp)trainloader = DataLoader(dataset_train, batch_size=batch_size,shuffle=True, num_workers=0)
testloader  = DataLoader(dataset_test, batch_size=batch_size,shuffle=False, num_workers=0)
#train和test的 Dataloader

 

数据集部分具体实现见 2: 

1.4 train_model 训练模型的方法


def train_model(net,loss_type,learning_rate,epochs=1000,gamma = 0.001,print_every=50,eval_every=50,verbose=1,Lambda=1,alpha=0.5):optimizer = torch.optim.Adam(net.parameters(),lr=learning_rate)criterion = torch.nn.MSELoss()#优化函数和默认损失函数for epoch in range(epochs): for i, data in enumerate(trainloader, 0):inputs, target, _ = data#[batch_size,input_size,1]inputs = torch.tensor(inputs, dtype=torch.float32).to(device)target = torch.tensor(target, dtype=torch.float32).to(device)batch_size, N_output = target.shape[0:2]                     # forward + backward + optimizeoutputs = net(inputs)#outputs:[batch_size,output_size,1],学到的输出loss_mse,loss_shape,loss_temporal = torch.tensor(0),torch.tensor(0),torch.tensor(0)if (loss_type=='mse'):loss_mse = criterion(target,outputs)loss = loss_mse#如果是mse,那么直接设置loss值,进入接下来的数据更新即可if (loss_type=='dilate'):    loss, loss_shape, loss_temporal = dilate_loss(target,outputs,alpha,gamma,device)#如果是dilate,那么就是用dilate_loss(在loss.dilate_loss中定义)#见第四小节if (loss_type=='soft'):_, loss,loss_temporal = dilate_loss(target,outputs,alpha,gamma,device)#如果是soft-dtw,把么就是用dilate_loss中的shape lossoptimizer.zero_grad()loss.backward()optimizer.step()#pytorch 老三样if(verbose):if (epoch % print_every == 0):print('epoch ', epoch, ' loss ',loss.item(),' loss shape ',loss_shape.item(),' loss temporal ',loss_temporal.item())eval_model(net,testloader, gamma,verbose=1)#每print_every轮打印一次当前结果

1.5 eval_model 函数(评估模型当前的效果)

def eval_model(net,loader, gamma,verbose=1):   criterion = torch.nn.MSELoss()losses_mse = []losses_dtw = []losses_tdi = []   for i, data in enumerate(loader, 0):loss_mse, loss_dtw, loss_tdi = torch.tensor(0),torch.tensor(0),torch.tensor(0)# get the inputsinputs, target, breakpoints = data#输入值,目标预测值,突变点inputs = torch.tensor(inputs, dtype=torch.float32).to(device)target = torch.tensor(target, dtype=torch.float32).to(device)batch_size, N_output = target.shape[0:2]outputs = net(inputs)#预测的输出# MSE    loss_mse = criterion(target,outputs)  #以MSE作为loss function的情况下,损失函数是多少loss_dtw, loss_tdi = 0,0# DTW and TDIfor k in range(batch_size):       #一个batch中的每对时间序列,进行比较target_k_cpu = target[k,:,0:1].view(-1).detach().cpu().numpy()output_k_cpu = outputs[k,:,0:1].view(-1).detach().cpu().numpy()path, sim = dtw_path(target_k_cpu, output_k_cpu)  #dtw的最佳path,dtw最优值            loss_dtw += sim#这个batch的总和dtwDist = 0for i,j in path:Dist += (i-j)*(i-j)#路径上每一个点离对角线的距离loss_tdi += Dist / (N_output*N_output)            loss_dtw = loss_dtw /batch_sizeloss_tdi = loss_tdi / batch_size# print statisticslosses_mse.append( loss_mse.item() )losses_dtw.append( loss_dtw )losses_tdi.append( loss_tdi )print( ' Eval mse= ', np.array(losses_mse).mean() ,' dtw= ',np.array(losses_dtw).mean() ,' tdi= ', np.array(losses_tdi).mean()) 

 1.6 基本seq2seq模型

详细见3

encoder = EncoderRNN(input_size=1,hidden_size=128,num_grulstm_layers=1,batch_size=batch_size).to(device)
#一个GRU:输入维度1维,输出&隐藏层维度128维,单层GRU,batch_firstdecoder = DecoderRNN(input_size=1,hidden_size=128,num_grulstm_layers=1,fc_units=16,output_size=1).to(device)
'''
一个GRU:输入维度1维,输出&隐藏层维度128维,单层GRU,batch_first
两个全连接:128-》16,16-》1
'''

1.7 基于不同的loss function,建立不同的network(并训练之)

注,每个不同的model,需要对应不同的encoder和decoder(也就是每一个network在train之前,都需要新建一堆encoder-decoder

1.7.1 基于Dilate

net_gru_dilate = Net_GRU(encoder,decoder,N_output,device).to(device)train_model(net_gru_dilate,loss_type='dilate',learning_rate=0.001,epochs=500,gamma=gamma,print_every=50,eval_every=50,verbose=1)

1.7.2 基于soft-dtw

重新新建一对encoder-decoder


encoder1 = EncoderRNN(input_size=1,hidden_size=128,num_grulstm_layers=1,batch_size=batch_size).to(device)
#一个GRU:输入维度1维,输出&隐藏层维度128维,单层GRU,batch_first
decoder1 = DecoderRNN(input_size=1,hidden_size=128,num_grulstm_layers=1,fc_units=16,output_size=1).to(device)
'''
一个GRU:输入维度1维,输出&隐藏层维度128维,单层GRU,batch_first
两个全连接:128-》16,16-》1
'''net_gru_soft = Net_GRU(encoder1,decoder1,N_output,device).to(device)
train_model(net_gru_soft,loss_type='soft',learning_rate=0.001,epochs=500,gamma=gamma,print_every=50,eval_every=50,verbose=1)

1.7.3基于MSE

encoder2 = EncoderRNN(input_size=1,hidden_size=128,num_grulstm_layers=1,batch_size=batch_size).to(device)
#一个GRU:输入维度1维,输出&隐藏层维度128维,单层GRU,batch_first
decoder2 = DecoderRNN(input_size=1,hidden_size=128,num_grulstm_layers=1,fc_units=16,output_size=1).to(device)
'''
一个GRU:输入维度1维,输出&隐藏层维度128维,单层GRU,batch_first
两个全连接:128-》16,16-》1
'''
net_gru_mse = Net_GRU(encoder2,decoder2,N_output,device).to(device)
train_model(net_gru_mse,loss_type='mse',learning_rate=0.001,epochs=500,gamma=gamma,print_every=50,eval_every=50,verbose=1)

1.7.4 可视化结果

# Visualize results
gen_test = iter(testloader)
test_inputs, test_targets, breaks = next(gen_test)
#取一个batch的test datatest_inputs  = torch.tensor(test_inputs, dtype=torch.float32).to(device)
test_targets = torch.tensor(test_targets, dtype=torch.float32).to(device)
criterion = torch.nn.MSELoss()nets = [net_gru_mse,net_gru_soft,net_gru_dilate]for ind in range(1,51):plt.figure()plt.rcParams['figure.figsize'] = (30.0,10.0)  k = 1for net in nets:pred = net(test_inputs).to(device)input = test_inputs.detach().cpu().numpy()[ind,:,:]target = test_targets.detach().cpu().numpy()[ind,:,:]preds = pred.detach().cpu().numpy()[ind,:,:]plt.subplot(1,3,k)#一行三列的三个子图plt.plot(range(0,N_input) ,input,label='input',linewidth=3)plt.plot(range(N_input-1,N_input+N_output), np.concatenate([ input[N_input-1:N_input], target ]) ,label='target',linewidth=3)   plt.plot(range(N_input-1,N_input+N_output),  np.concatenate([ input[N_input-1:N_input], preds ])  ,label='prediction',linewidth=3)       plt.xticks(range(0,40,2))plt.legend()k = k+1plt.show()

2 data/synthetic_dataset.py

2.1 导入库

import numpy as np
import torch
import random
from torch.utils.data import Dataset, DataLoader

2.2  create_synthetic_dataset 生成人工合成数据


def create_synthetic_dataset(N, N_input,N_output,sigma):# N: number of samples in each split (train, test)# N_input: import of time steps in input series# N_output: import of time steps in output series# sigma: standard deviation of additional noiseX = []breakpoints = []for k in range(2*N):serie = np.array([ sigma*random.random() for i in range(N_input+N_output)])# input 和output的值 N(0,0.01)i1 = random.randint(1,10)i2 = random.randint(10,18)#i1,i2 随机两个input 位置的脉冲信号峰值(产生突变)j1 = random.random()j2 = random.random()#两个脉冲峰值 N(0,1)interval = abs(i2-i1) + random.randint(-3,3)#输出陡降的位置serie[i1:i1+1] += j1serie[i2:i2+1] += j2#两个脉冲位置赋值serie[i2+interval:] += (j2-j1)#输出陡降的位置之后的位置,全部减去 j2和j1振幅的差距X.append(serie)breakpoints.append(i2+interval)#陡降点X = np.stack(X)breakpoints = np.array(breakpoints)return X[0:N,0:N_input], X[0:N, N_input:N_input+N_output], X[N:2*N,0:N_input], X[N:2*N, N_input:N_input+N_output],breakpoints[0:N], breakpoints[N:2*N]
#训练集的 input 训练集的output 测试集的input 测试集的output,训练集的陡降点,测试集的陡降点
#[N,N_input]

2.3 SyntheticDataset 为了pytorch的Dataloader 准备

pytorch笔记:Dataloader_UQI-LIUWJ的博客-CSDN博客_torch的dataloader

class SyntheticDataset(torch.utils.data.Dataset):def __init__(self, X_input, X_target, breakpoints):super(SyntheticDataset, self).__init__()  self.X_input = X_inputself.X_target = X_targetself.breakpoints = breakpointsdef __len__(self):return (self.X_input).shape[0]def __getitem__(self, idx):return (self.X_input[idx,:,np.newaxis], self.X_target[idx,:,np.newaxis]  , self.breakpoints[idx])

3  seq2seq.py 模型部分

3.1 导入库

import torch
import torch.nn as nn
import torch.nn.functional as F

3.2 encoder 部分

pytorch笔记:torch.nn.GRU_UQI-LIUWJ的博客-CSDN博客

class EncoderRNN(torch.nn.Module):def __init__(self,input_size, hidden_size, num_grulstm_layers, batch_size):super(EncoderRNN, self).__init__()  self.hidden_size = hidden_sizeself.batch_size = batch_sizeself.num_grulstm_layers = num_grulstm_layersself.gru = nn.GRU(input_size=input_size,hidden_size=hidden_size,num_layers=num_grulstm_layers,batch_first=True)def forward(self, input, hidden):# input [batch_size, length T, dimensionality d]      output, hidden = self.gru(input, hidden)      return output, hiddendef init_hidden(self,device):#[num_layers*num_directions,batch,hidden_size]   return torch.zeros(self.num_grulstm_layers,self.batch_size,self.hidden_size,device=device)#初始化h0

3.3 decoder部分

class DecoderRNN(nn.Module):def __init__(self, input_size, hidden_size, num_grulstm_layers,fc_units, output_size):super(DecoderRNN, self).__init__()      self.gru = nn.GRU(input_size=input_size,hidden_size=hidden_size,num_layers=num_grulstm_layers,batch_first=True)self.fc = nn.Linear(hidden_size, fc_units)self.out = nn.Linear(fc_units, output_size)         def forward(self, input, hidden):output, hidden = self.gru(input, hidden) output = F.relu( self.fc(output) )output = self.out(output)      return output, hidden

3.4 seq2seq (encoder和decoder拼起来)

class Net_GRU(nn.Module):#一个encoder-decoder结构def __init__(self, encoder, decoder, target_length, device):super(Net_GRU, self).__init__()self.encoder = encoderself.decoder = decoderself.target_length = target_lengthself.device = devicedef forward(self, x):#x:[batch_size,input_size,1]input_length  = x.shape[1]#因为是batch_first,所以这里是sequence lengthencoder_hidden = self.encoder.init_hidden(self.device)#初始化h0 [1,batch_size,hidden_size]for ei in range(input_length):encoder_output, encoder_hidden = self.encoder(x[:,ei:ei+1,:]  ,encoder_hidden)#读入输入的部分decoder_input = x[:,-1,:].unsqueeze(1)# first decoder input= last element of input sequencedecoder_hidden = encoder_hidden#encoder最后一个hidden元素作为decoder第一个hidden元素outputs = torch.zeros([x.shape[0],self.target_length,x.shape[2]]  ).to(self.device)#outputs:[batch_size,output_size,1]for di in range(self.target_length):decoder_output, decoder_hidden = self.decoder(decoder_input,decoder_hidden)decoder_input = decoder_outputoutputs[:,di:di+1,:] = decoder_outputreturn outputs#outputs:[batch_size,output_size,1]

4 dilate_loss.py 计算dilate_loss

import torch
from . import soft_dtw
from . import path_soft_dtw def dilate_loss(outputs, targets, alpha, gamma, device):# outputs, targets: shape (batch_size, N_output, 1)batch_size, N_output = outputs.shape[0:2]loss_shape = 0softdtw_batch = soft_dtw.SoftDTWBatch.apply#我们自定义了SoftDTWBatch 这个torch.autograd 方法的前向和反向传播#使用apply,表示softdtw是其的一个别名D = torch.zeros((batch_size, N_output,N_output )).to(device)#(batch_size,N_output,N_output)for k in range(batch_size):Dk = soft_dtw.pairwise_distances(targets[k,:,:].view(-1,1),outputs[k,:,:].view(-1,1))
#当前target序列和output序列任意两个点之间的欧几里得距离,拼成一个N_output * N_output的矩阵D[k:k+1,:,:] = Dkloss_shape = softdtw_batch(D,gamma)#这一个batch的平均shape losspath_dtw = path_soft_dtw.PathDTWBatch.apply#我们自定义了PathDTWBatch这个torch.autograd 方法的前向和反向传播#使用apply,表示path_dtw是其的一个别名path = path_dtw(D,gamma)#这一个batch 的平均 argminA*的平滑(dtw的一阶梯度)Omega =  soft_dtw.pairwise_distances(torch.range(1,N_output).view(N_output,1)).to(device)#||i-j||^2,也就是没有除以k^2的Ω矩阵loss_temporal =  torch.sum( path*Omega ) / (N_output*N_output)#计算TDI <A*,Ω> 除的部分是是Ω中的k^2loss = alpha*loss_shape+ (1-alpha)*loss_temporal#loss dilatereturn loss, loss_shape, loss_temporal

pytorch 笔记: 扩展torch.autograd_UQI-LIUWJ的博客-CSDN博客

5 soft_dtw.py 计算soft dtw (shape loss)

5.1 导入库

import numpy as np
import torch
from numba import jit
from torch.autograd import Function
#numba的作用是加速计算

5.2 pairwise_distances 两个序列任意两点之间的距离

def pairwise_distances(x, y=None):'''Input: x is a Nxd matrixy is an optional Mxd matirxOutput: dist is a NxM matrix where dist[i,j] is the square norm between x[i,:] and y[j,:]if y is not given then use 'y=x'.i.e. dist[i,j] = ||x[i,:]-y[j,:]||^2''''''x,y :(N_output,1)'''x_norm = (x**2).sum(1).view(-1, 1)#每一个N_output元素的各个维度 算出平方和#(N_output,1)if y is not None:y_t = torch.transpose(y, 0, 1)y_norm = (y**2).sum(1).view(1, -1)else:y_t = torch.transpose(x, 0, 1)y_norm = x_norm.view(1, -1)#else算的就是不除以k^2的Ω矩阵了dist = x_norm + y_norm - 2.0 * torch.mm(x, y_t)#||x-y||^2return torch.clamp(dist, 0.0, float('inf'))#[0,+inf]的范围#x序列和y序列任意两个点之间的欧几里得距离

5.3 compute_softdtw 计算soft-dtw

@jit(nopython = True)
def compute_softdtw(D, gamma):#D_[k,:,:]————当前target序列和output序列任意两个点之间的欧几里得距离N = D.shape[0]M = D.shape[1]#N,M 都是N_outputR = np.zeros((N + 2, M + 2)) + 1e8#给N*M的R矩阵周围围上一圈,防止下标越界R[0, 0] = 0for j in range(1, M + 1):for i in range(1, N + 1):r0 = -R[i - 1, j - 1] / gammar1 = -R[i - 1, j] / gammar2 = -R[i, j - 1] / gammarmax = max(max(r0, r1), r2)rsum = np.exp(r0 - rmax) + np.exp(r1 - rmax) + np.exp(r2 - rmax)softmin = - gamma * (np.log(rsum) + rmax)#rmax的出现是为了防止exp之后,数值过大,导致上溢出R[i, j] = D[i - 1, j - 1] + softmin#由于R周围围了一圈,所以R[i,j]对应的是D[i-1,j-1]#即soft-dtw的递推公式return R

 

5.4 compute_softdtw_backward 使用动态规划的soft-dtw反向传播

@jit(nopython = True)
def compute_softdtw_backward(D_, R, gamma):#这里其实也是在计算dtw,只不过实现方式和path_soft_dtw里面的略有不同#D_——当前target序列和output序列任意两个点之间的欧几里得距离N = D_.shape[0]M = D_.shape[1]#(N_output,N_output)D = np.zeros((N + 2, M + 2))E = np.zeros((N + 2, M + 2))D[1:N + 1, 1:M + 1] = D_E[-1, -1] = 1R[:, -1] = -1e8R[-1, :] = -1e8R[-1, -1] = R[-2, -2]for j in range(M, 0, -1):for i in range(N, 0, -1):a0 = (R[i + 1, j] - R[i, j] - D[i + 1, j]) / gammab0 = (R[i, j + 1] - R[i, j] - D[i, j + 1]) / gammac0 = (R[i + 1, j + 1] - R[i, j] - D[i + 1, j + 1]) / gamma#γ log d (r_i+1,j)/d (r_i,j)=(r_i+1,j)-(r_i,j)-(δ_i+1,j)a = np.exp(a0)b = np.exp(b0)c = np.exp(c0)E[i, j] = E[i + 1, j] * a + E[i, j + 1] * b + E[i + 1, j + 1] * creturn E[1:N + 1, 1:M + 1]

 

 5.5 SoftDTWBatch——自定义的、扩展torch.autograd的损失函数(L-shape)

class SoftDTWBatch(Function):@staticmethoddef forward(ctx, D, gamma = 1.0): # D.shape: [batch_size, N , N]#D_——当前target序列和output序列任意两个点之间的欧几里得距离dev = D.devicebatch_size,N,N = D.shape#(batch_size,N_output,N_output)gamma = torch.FloatTensor([gamma]).to(dev)D_ = D.detach().cpu().numpy()g_ = gamma.item()#soft-dtw需要的γtotal_loss = 0R = torch.zeros((batch_size, N+2 ,N+2)).to(dev)#(batch_size,N_output+2,N_output+2)for k in range(0, batch_size):# 一个batch中每对sequence都进行比较Rk = torch.FloatTensor(compute_softdtw(D_[k,:,:], g_)).to(dev)'''D_[k,:,:]#当前target序列和output序列任意两个点之间的欧几里得距离'''R[k:k+1,:,:] = Rk#R——softdtw的结果矩阵total_loss = total_loss + Rk[-2,-2]ctx.save_for_backward(D, R, gamma)return total_loss / batch_size#这一个batch的平均shape loss@staticmethoddef backward(ctx, grad_output):dev = grad_output.deviceD, R, gamma = ctx.saved_tensorsbatch_size,N,N = D.shapeD_ = D.detach().cpu().numpy()R_ = R.detach().cpu().numpy()g_ = gamma.item()E = torch.zeros((batch_size, N ,N)).to(dev) for k in range(batch_size):         Ek = torch.FloatTensor(compute_softdtw_backward(D_[k,:,:], R_[k,:,:], g_)).to(dev)#Ek就是之前所说的E矩阵E[k:k+1,:,:] = Ekreturn grad_output * E, None#D的loss

6 path_soft_dtw 计算temporal loss

6.1 导入库

import numpy as np
import torch
from torch.autograd import Function
from numba import jit

6.2 辅助函数

6.2.1 my_max

@jit(nopython = True)
def my_max(x, gamma):# use the log-sum-exp trickmax_x = np.max(x)exp_x = np.exp((x - max_x) / gamma)Z = np.sum(exp_x)return gamma * np.log(Z) + max_x, exp_x / Z#相当于是max(x),(e^x)/(Σe^x)

6.2.2 my_min

@jit(nopython = True)
def my_min(x,gamma) :min_x, argmax_x = my_max(-x, gamma)return - min_x, argmax_x#相当于是min(x),(e^x)/(Σe^x)

6.2.3  my_max_hessian_product & my_min_hessian_product

二阶导的部分我其实没太搞明白

@jit(nopython = True)
def my_max_hessian_product(p, z, gamma):return  ( p * z - p * np.sum(p * z) ) /gamma@jit(nopython = True)
def my_min_hessian_product(p, z, gamma):return - my_max_hessian_product(p, z, gamma)

6.2.4 dtw_grad

@jit(nopython = True)
def dtw_grad(theta, gamma):#theta——当前target序列和output序列任意两个点之间的欧几里得距离m = theta.shape[0]n = theta.shape[1]#(N_output,N_output)V = np.zeros((m + 1, n + 1))V[:, 0] = 1e10V[0, :] = 1e10V[0, 0] = 0Q = np.zeros((m + 2, n + 2, 3))for i in range(1, m + 1):for j in range(1, n + 1):# theta is indexed starting from 0.v, Q[i, j] = my_min(np.array([V[i, j - 1],V[i - 1, j - 1],V[i - 1, j]]) , gamma)#min(V[i, j - 1],V[i - 1, j - 1],V[i - 1, j]])'''第二项相当于 e^{-r i,j-1}/(e^{-r i,j-1}+e^{-r i-1,j-1}+e^{-r i-1,j})=d r i,j/d r i,j-1e^{-r i-1,j-1}/(e^{-r i,j-1}+e^{-r i-1,j-1}+e^{-r i-1,j})=d r i,j/d r i-1,j-1e^{-r i-1,j}/(e^{-r i,j-1}+e^{-r i-1,j-1}+e^{-r i-1,j})=d r i,j/d r i-1,j'''V[i, j] = theta[i - 1, j - 1] + v#soft-dtw结果矩阵E = np.zeros((m + 2, n + 2))#也是外面围了一圈E[m + 1, :] = 0E[:, n + 1] = 0E[m + 1, n + 1] = 1Q[m + 1, n + 1] = 1for i in range(m,0,-1):for j in range(n,0,-1):E[i, j] = Q[i, j + 1, 0] * E[i, j + 1] + \Q[i + 1, j + 1, 1] * E[i + 1, j + 1] + \Q[i + 1, j, 2] * E[i + 1, j]#e ij=d (r_i,j+1)/d (r_i,j) * e_i,j+1+\#        d (r_i+1,j+1)/d (r_i,j) * e_i+1,j+1+\#        d (r_i+1,j)/ d(r_i,j) * e_i+1,j#soft-dtw backward DTW的一阶导return V[m, n], E[1:m + 1, 1:n + 1], Q, E'''V:soft-dtw 结果 E: dtw一阶导  Q:每一个元素都是r_i,j 相对与影响他前向传播的r的偏导(顺时针顺序)'''

 

 6.2.5 dtw_hessian_prod  dtw一阶梯度的梯度(散度)

@jit(nopython = True)
def dtw_hessian_prod(theta, Z, Q, E, gamma):#theta——当前target序列和output序列任意两个点之间的欧几里得距离#Z-#input的梯度#Q_cpu_k:每一个元素都是r_i,j 相对与影响他前向传播的r的偏导(顺时针顺序)#E_cpu_k: dtw一阶导(外面围了一圈0)m = Z.shape[0]n = Z.shape[1]V_dot = np.zeros((m + 1, n + 1))V_dot[0, 0] = 0Q_dot = np.zeros((m + 2, n + 2, 3))for i in range(1, m + 1):for j in range(1, n + 1):# theta is indexed starting from 0.V_dot[i, j] = Z[i - 1, j - 1] + \Q[i, j, 0] * V_dot[i, j - 1] + \Q[i, j, 1] * V_dot[i - 1, j - 1] + \Q[i, j, 2] * V_dot[i - 1, j]v = np.array([V_dot[i, j - 1],V_dot[i - 1, j - 1],V_dot[i - 1, j]])Q_dot[i, j] = my_min_hessian_product(Q[i, j], v, gamma)E_dot = np.zeros((m + 2, n + 2))for j in range(n,0,-1):for i in range(m,0,-1):E_dot[i, j] = Q_dot[i, j + 1, 0] * E[i, j + 1] + \Q[i, j + 1, 0] * E_dot[i, j + 1] + \Q_dot[i + 1, j + 1, 1] * E[i + 1, j + 1] + \Q[i + 1, j + 1, 1] * E_dot[i + 1, j + 1] + \Q_dot[i + 1, j, 2] * E[i + 1, j] + \Q[i + 1, j, 2] * E_dot[i + 1, j]#二阶何塞矩阵的链式法则(带dot的就是二阶导)return V_dot[m, n], E_dot[1:m + 1, 1:n + 1]

6.2.6 PathDTWBatch——返回argminA*

class PathDTWBatch(Function):@staticmethoddef forward(ctx, D, gamma): # D.shape: [batch_size, N , N]#D——当前target序列和output序列任意两个点之间的欧几里得距离batch_size,N,N = D.shape#(batch_size,N_output,N_output)device = D.deviceD_cpu = D.detach().cpu().numpy()gamma_gpu = torch.FloatTensor([gamma]).to(device)grad_gpu = torch.zeros((batch_size, N ,N)).to(device)Q_gpu = torch.zeros((batch_size, N+2 ,N+2,3)).to(device)E_gpu = torch.zeros((batch_size, N+2 ,N+2)).to(device)  for k in range(0,batch_size): # loop over all D in the batch    _, grad_cpu_k, Q_cpu_k, E_cpu_k = dtw_grad(D_cpu[k,:,:], gamma)'''V:soft-dtw 结果 grad_cpu_k: dtw一阶导   [1:m + 1, 1:n + 1] argmin的平滑近似Q_cpu_k:每一个元素都是r_i,j 相对与影响他前向传播的r的偏导(顺时针顺序)E_cpu_k: dtw一阶导(外面围了一圈0)  '''grad_gpu[k,:,:] = torch.FloatTensor(grad_cpu_k).to(device)Q_gpu[k,:,:,:] = torch.FloatTensor(Q_cpu_k).to(device)E_gpu[k,:,:] = torch.FloatTensor(E_cpu_k).to(device)ctx.save_for_backward(grad_gpu,D, Q_gpu ,E_gpu, gamma_gpu) return torch.mean(grad_gpu, dim=0)#把一个batch的平均掉#argminA*    @staticmethoddef backward(ctx, grad_output):device = grad_output.devicegrad_gpu, D_gpu, Q_gpu, E_gpu, gamma = ctx.saved_tensorsD_cpu = D_gpu.detach().cpu().numpy()#D——当前target序列和output序列任意两个点之间的欧几里得距离Q_cpu = Q_gpu.detach().cpu().numpy()#Q_cpu_k:每一个元素都是r_i,j 相对与影响他前向传播的r的偏导(顺时针顺序)E_cpu = E_gpu.detach().cpu().numpy()#E_cpu_k: dtw一阶导(外面围了一圈0)gamma = gamma.detach().cpu().numpy()[0]Z = grad_output.detach().cpu().numpy()#input的梯度batch_size,N,N = D_cpu.shapeHessian = torch.zeros((batch_size, N ,N)).to(device)for k in range(0,batch_size):_, hess_k = dtw_hessian_prod(D_cpu[k,:,:],Z,Q_cpu[k,:,:,:],E_cpu[k,:,:],gamma)Hessian[k:k+1,:,:] = torch.FloatTensor(hess_k).to(device)return  Hessian, None


http://chatgpt.dhexx.cn/article/XN8Of81I.shtml

相关文章

Opencv中的erode和dilate(腐蚀和膨胀-python实现)

文章目录 1.腐蚀原理&#xff08;1&#xff09;具体实现过程&#xff08;2&#xff09;.函数讲解 &#xff08;3&#xff09;.代码实战2.膨胀原理&#xff08;1&#xff09;具体实现过程&#xff08;2&#xff09;函数讲解&#xff08;3&#xff09;代码实现 1.腐蚀原理 &…

OpenCV-Python腐蚀膨胀函数erode、dilate使用详解

☞ ░ 前往老猿Python博客 https://blog.csdn.net/LaoYuanPython ░ 一、引言 在《OpenCV-Python图像处理&#xff1a;腐蚀和膨胀原理及erode、dilate函数介绍&#xff1a;https://blog.csdn.net/LaoYuanPython/article/details/109441709》介绍了图像腐蚀和膨胀的基本原理&a…

OpenCV-Python图像处理:腐蚀和膨胀原理及erode、dilate函数介绍

☞ ░ 前往老猿Python博客 https://blog.csdn.net/LaoYuanPython ░ 一、引言 关于图像的腐蚀和膨胀&#xff0c;网上介绍的资料非常多&#xff0c;老猿也看了很多&#xff0c;总体来说主要偏向于就使用OpenCV腐蚀和膨胀函数的应用&#xff0c;另外原理介绍的有一小部分&#…

opencv之dilate()函数

概述 dilate()函数可以对输入图像用特定结构元素进行膨胀操作,该结构元素确定膨胀操作过程中的邻域的形状,各点像素值将被替换为对应邻域上的最大值: API说明 C++ API: void cv::dilate(InputArraysrc,(原始图像:通道数不限,depth必须是CV_8U,CV_16U,CV_16S,CV_…

OpenCV-膨胀cv::dilate

作者&#xff1a;翟天保Steven 版权声明&#xff1a;著作权归作者所有&#xff0c;商业转载请联系作者获得授权&#xff0c;非商业转载请注明出处 原理 形态学是图像处理中常见的名词&#xff0c;图像处理的形态学基本属于数学形态学的范畴&#xff0c;是一门建立在格论和拓扑…

【图像处理】腐蚀(erode)和膨胀(dilate)

【图像处理】腐蚀&#xff08;erode&#xff09;和膨胀&#xff08;dilate&#xff09; 原理腐蚀&#xff08;erode&#xff09;膨胀&#xff08;dilate&#xff09; OpenCV实现C实现python实现 开运算&#xff08;Opening Operation&#xff09;闭运算&#xff08;Closing Ope…

chatgpt赋能Python-python_dilate

Python中的dilate操作&#xff1a;了解该操作及其应用 在计算机视觉领域&#xff0c;dilate操作是一种常用的图像处理技术。在Python中&#xff0c;我们可以使用OpenCV库来实现dilate操作。本文将介绍dilate操作的基本概念&#xff0c;讨论其应用及如何使用Python进行实现。 …

OPenCV膨胀函数dilate()的使用

OPenCV版本&#xff1a;4.4 IDE&#xff1a;VS2017 功能描述 简述&#xff1a;使用一个指定的核元素去膨胀一个图像&#xff0c;图像膨胀的过程类似于一个卷积的过程&#xff0c;源图像矩阵A以及结构元素B&#xff0c;B在A矩阵上依次移动&#xff0c;每个位置上B所覆盖元素的最…

OpenCV每日函数 图像过滤模块 (5) dilate膨胀函数

一、概述 通过使用特定的结构元素来扩大图像。该函数使用指定的结构元素扩展源图像,该结构元素确定取最大值的像素邻域的形状: 膨胀可以应用数次(迭代)。 在多通道图像的情况下,每个通道都是独立处理的。 膨胀: 腐蚀: 二、dilate函数 1、函数原型 cv::dilate…

生产环境屏蔽swagger

只需要在swagger的配置类上增加Profile( )注解&#xff0c;指定环境变量&#xff0c;即可屏蔽生产环境上的swagger&#xff0c;如下&#xff1a; 先使用dev环境&#xff0c;访问swagger&#xff0c;结果如下&#xff1a; 再使用test环境&#xff0c;访问swagger&#xff0c;结果…

生产环境解决跨域

1.先决条件是在开发环境中使用的是代理。 2然后在生产环境当中&#xff0c;增加一个proxy代理&#xff1b; &#xff08;1&#xff09;先下载&#xff1a;cnpm install http-proxy-middleware -D &#xff08;2&#xff09;引入&#xff1a; &#xff08;3&#xff09;使用&am…

linux环境下编译部署php生产环境

linux环境下编译部署php生产环境 版本控制 php&#xff1a;7.2.4 nginx&#xff1a;1.9.9 部分插件版本 xlswriter&#xff1a;1.3.3.2 redis:3.1.3 一、安装php 1.安装依赖(之后安装缺少的依赖都可以用yum安装) yum install -y gcc gcc-c make zlib zlib-devel pcre pcre…

用 source map 调试生产环境

当我们的应用程序部署到生产环境时&#xff0c;我们发现它与我们在开发环境时的代码不同。我们的代码在构建过程中会以各种方式进行修改和优化。 TypeScript 被转译、压缩。生成的 JavaScript 包尽可能小并且能够在浏览器中正常运行。 所有这些步骤都很有效率&#xff0c;它们…

uniapp 小程序 开发环境和生产环境

uni开发小程序 运行到开发工具 再上传 process.env.NODE_ENV 获取的值是development 一直是开发环境 用uni发行 上传到微信公众平台是生成环境 在 HBuilderX 中&#xff0c;点击“运行”编译出来的代码是开发环境&#xff0c;点击“发行”编译出来的代码是生产环境 let url;if …

Vue cli3配置生产环境,开发环境,和测试环境

目录1、先在package.json文件中添加&#xff1a; 2、在项目目录下建立 .env文件和.env.test文件 3、配置api变量 3.1、配置axios的baseURL路径 3.2、自己拼接的路径 4、.env知识点补充 4.1&#xff0c;关于文件名&#xff1a;必须以如下方式命名&#xff0c;不要…

31 SpringBoot多环境的切换(生产环境、开发环境、测试环境)

参考链接&#xff1a; Spring官网 Spring官网 外部配置加载顺序的说明 SpringBoot多环境的切换(生产环境、开发环境、测试环境)_周太阳的博客-CSDN博客_springboot测试生产环境切换 java maven 测试生产环境配置_SpringBoot生产环境和测试环境配置分离的教程详解_落云歌语文…

生产环境关闭 swagger

#生产环境需要关闭 swagger 防止接口暴露 1&#xff0c;启动判断写在相应的环境配置文件中&#xff0c;根据条件判断是否启动 swagger &#xff1a; 添加配置项&#xff1a;swagger.is.enable #是否激活 swagger true or false swagger.is.enabletrue2&#xff0c;代码取值&a…

前端工程师生产环境 debugger 技巧

关注公众号 前端开发博客&#xff0c;领27本电子书 回复加群&#xff0c;自助秒进前端群 导言 开发环境 debug 是每个程序员上岗的必备技能。生产环境呢&#xff1f;虽然生产环境 debug 是一件非常不优雅的行为&#xff0c;但是由于种种原因&#xff0c;我们又不得不这么干。 那…

【微信小程序 | 实战开发】开发环境、测试环境和生产环境

写在前面&#xff1a; 你是否想要掌握人工智能的最新技术和应用&#xff1f;你是否想要成为未来社会的创新者和领导者&#xff1f;你是否想要和全球的优秀导师和同学一起学习和交流&#xff1f;如果你的答案是肯定的&#xff0c;那么欢迎来到床长人工智能教程网站&#xff0c;这…

Nginx生产环境平滑升级

一.背景 最近遇到一个比较尴尬而又实际的问题&#xff0c;那就是我们生产环境使用的Nginx是Centos6的老古董。业务需求需要加载Nginx的一个模块来实现&#xff0c;但是版本太老了&#xff0c;需要Nginx1.18之后才能支持&#xff0c;而我们的是Nginx1.12。 那升级Nginx是我们要做…