Issue
I'm trying to use an implementation of the D-Linear model in Pytorch.
Here is the model architecture
from re import X
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
class moving_avg(nn.Module):
"""
Moving average block to highlight the trend of time series
"""
def __init__(self, kernel_size, stride):
super(moving_avg, self).__init__()
self.kernel_size = kernel_size
self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)
def forward(self, x):
# padding on the both ends of time series
front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
x = torch.cat([front, x, end], dim=1)
x = self.avg(x.permute(0, 2, 1))
x = x.permute(0, 2, 1)
return x
class series_decomp(nn.Module):
"""
Series decomposition block
"""
def __init__(self, kernel_size):
super(series_decomp, self).__init__()
self.moving_avg = moving_avg(kernel_size, stride=1)
def forward(self, x):
moving_mean = self.moving_avg(x)
res = x - moving_mean
return res, moving_mean
class Model(nn.Module):
"""
DLinear
"""
def __init__(self, seq_len, pred_len, individual, enc_in, kernel_size = 25):
super(Model, self).__init__()
self.seq_len = seq_len
self.pred_len = pred_len
# Decompsition Kernel Size
self.kernel_size = kernel_size
self.decompsition = series_decomp(self.kernel_size)
self.individual = individual
self.channels = enc_in
if self.individual:
self.Linear_Seasonal = nn.ModuleList()
self.Linear_Trend = nn.ModuleList()
self.Linear_Decoder = nn.ModuleList()
for i in range(self.channels):
self.Linear_Seasonal.append(nn.Linear(self.seq_len,self.pred_len))
self.Linear_Seasonal[i].weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len]))
self.Linear_Trend.append(nn.Linear(self.seq_len,self.pred_len))
self.Linear_Trend[i].weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len]))
self.Linear_Decoder.append(nn.Linear(self.seq_len,self.pred_len))
else:
self.Linear_Seasonal = nn.Linear(self.seq_len,self.pred_len)
self.Linear_Trend = nn.Linear(self.seq_len,self.pred_len)
self.Linear_Decoder = nn.Linear(self.seq_len,self.pred_len)
self.Linear_Seasonal.weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len]))
self.Linear_Trend.weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len]))
def forward(self, x):
# x: [Batch, Input length, Channel]
seasonal_init, trend_init = self.decompsition(x)
seasonal_init, trend_init = seasonal_init.permute(0,2,1), trend_init.permute(0,2,1)
if self.individual:
seasonal_output = torch.zeros([seasonal_init.size(0),seasonal_init.size(1),self.pred_len],dtype=seasonal_init.dtype).to(seasonal_init.device)
trend_output = torch.zeros([trend_init.size(0),trend_init.size(1),self.pred_len],dtype=trend_init.dtype).to(trend_init.device)
for i in range(self.channels):
seasonal_output[:,i,:] = self.Linear_Seasonal[i](seasonal_init[:,i,:])
trend_output[:,i,:] = self.Linear_Trend[i](trend_init[:,i,:])
else:
seasonal_output = self.Linear_Seasonal(seasonal_init)
trend_output = self.Linear_Trend(trend_init)
x = seasonal_output + trend_output
return x.permute(0,2,1) # to [Batch, Output length, Channel]
My dataframe has 15 features and one target variable, for a total of 16 columns. I want to use the past values of the features and the target to predict the next n staps.
from sklearn.preprocessing import StandardScaler
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
# creating random dataframe
df = pd.DataFrame(np.random.randint(0,100,size=(1000, 5)), columns=list('ABCDE'))
np.random.seed(42)
# Parameters
seq_len = 12
pred_len = 3
kernel_size = 5
batch_size = 4
individual = True
# Extract the target
target_column = 'A'
# Function to create sequences for training
def create_sequence(data, seq_len, pred_len):
sequences = []
targets = []
for i in range(len(data) - seq_len - pred_len + 1):
sequence = data.iloc[i:i + seq_len].values
target = data.iloc[i + seq_len:i + seq_len + pred_len][target_column].values
sequences.append(sequence)
targets.append(target)
return np.array(sequences), np.array(targets)
sequences, targets = create_sequence(df, seq_len, pred_len)
# split the data
train_data, test_data, train_target, test_target = train_test_split(sequences, targets, test_size = 0.25, random_state = 42)
train_data, val_data, train_target, val_target = train_test_split(train_data, train_target, test_size = 0.33, random_state = 42)
# standardize data
scaler = StandardScaler()
train_data = scaler.fit_transform(train_data.reshape(-1, train_data.shape[-1])).reshape(train_data.shape)
val_data = scaler.transform(val_data.reshape(-1, val_data.shape[-1])).reshape(val_data.shape)
test_data = scaler.transform(test_data.reshape(-1, test_data.shape[-1])).reshape(test_data.shape)
train_data_tensor = torch.Tensor(train_data)
train_target_tensor = torch.Tensor(train_target)
val_data_tensor = torch.Tensor(val_data)
val_target_tensor = torch.Tensor(val_target)
test_data_tensor = torch.Tensor(test_data)
test_target_tensor = torch.Tensor(test_target)
# Create DataLoader
train_dataset = TensorDataset(train_data_tensor, train_target_tensor)
train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
model_config = {'seq_len':seq_len,
'pred_len':pred_len,
'individual': individual,
'enc_in':len(features_column),
'kernel_size': kernel_size}
model = Model(seq_len = seq_len, pred_len = pred_len, individual = individual, enc_in = df.shape[1], kernel_size = kernel_size)
optimizer = optim.Adam(model.parameters(), lr = 0.001)
criterion = nn.MSELoss()
num_epoch = 30
But when I try to run the training loop
for epoch in range(num_epoch):
model.train()
for inputs, targets in train_loader:
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
model.eval()
with torch.no_grad():
val_inputs = val_data_tensor
val_targets = val_target_tensor
val_outputs = model(val_inputs)
val_loss = criterion(val_outputs, val_targets)
with torch.no_grad():
test_inputs = test_data_tensor
test_targets = test_target_tensor
test_outputs = model(test_inputs)
test_loss = criterion(test_outputs, test_targets)
print(f'EPOCH: {epoch + 1}')
print(f'TRAINING LOSS {loss.item()}')
print(f'VALIDATION LOSS {val_loss.item()}')
print(f'TEST LOSS {test_loss.item()}')
I get the following error
Using a target size (torch.Size([4, 3])) that is different to the input size (torch.Size([4, 3, 5])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.
return F.mse_loss(input, target, reduction=self.reduction)
Output exceeds the size limit. Open the full output data in a text editor
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_3124\1828251915.py in <module>
9
10 outputs = outputs.squeeze(dim=1)
---> 11 loss = criterion(outputs, targets)
12 loss.backward()
13 optimizer.step()
~\AppData\Roaming\Python\Python37\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
1128 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1129 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1130 return forward_call(*input, **kwargs)
1131 # Do not call functions when jit is used
1132 full_backward_hooks, non_full_backward_hooks = [], []
~\AppData\Roaming\Python\Python37\site-packages\torch\nn\modules\loss.py in forward(self, input, target)
528
529 def forward(self, input: Tensor, target: Tensor) -> Tensor:
--> 530 return F.mse_loss(input, target, reduction=self.reduction)
531
532
~\AppData\Roaming\Python\Python37\site-packages\torch\nn\functional.py in mse_loss(input, target, size_average, reduce, reduction)
3277 reduction = _Reduction.legacy_get_string(size_average, reduce)
...
---> 73 return _VF.broadcast_tensors(tensors) # type: ignore[attr-defined]
74
75
RuntimeError: The size of tensor a (5) must match the size of tensor b (3) at non-singleton dimension 2
Solution
Since the output has the shape of [Batch, Output length, Channel], this means that in the output we can see the contribute to the final prediction of each feature/channel.
This means we need to sum all the channels to get the final prediction.
This can be achieved by doing
overall_predictions = torch.sum(model_output, dim=2)
Answered By - ianux22
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.