Issue
I am trying to train an RNN based off the code here
I also found two similar posts, but was not able to extrapolate from them what I should do to fix my problem here and here
The error is pretty easy to interpret, the model is expecting 3 dimensions, but I am only giving it 1. However, I do not know where to fix the issue. I know that a good stack post is to include data, but I am not sure how to include example tensors in the post. Apologies.
My input are 300d word embeddings and my output are one hot encoded vectors of length 11, where the model makes a classification choice in each of the 11 output dimensions.
I will start with the dataloader then go from there with the code.
from torch.utils.data import Dataset, DataLoader
class CustomDataset(Dataset):
def __init__(self, dat, labels):
self.labels = labels
self.dat = dat
def __len__(self):
return len(self.labels)
def __getitem__(self, idx):
label = self.labels[idx]
dat = self.dat[idx]
sample = {"Sample": dat, "Class": label}
return sample
I define my vanilla RNN as follows.
class VanillaRNN(nn.Module):
def __init__(self, input_size, output_size, hidden_dim, n_layers):
super(VanillaRNN, self).__init__()
# Defining some parameters
self.hidden_dim = hidden_dim
self.n_layers = n_layers
#Defining the layers
# RNN Layer
self.rnn = nn.RNN(input_size, hidden_dim, n_layers, batch_first=True)
# Fully connected layer
self.fc = nn.Linear(hidden_dim, output_size)
def forward(self, inputs):
batch_size = inputs.size(0)
# Initializing hidden state for first input using method defined below
hidden = self.init_hidden(batch_size)
# Passing in the input and hidden state into the model and obtaining outputs
out, hidden = self.rnn(inputs, hidden)
# Reshaping the outputs such that it can be fit into the fully connected layer
out = out.contiguous().view(-1, self.hidden_dim)
out = self.fc(out)
return out, hidden
def init_hidden(self, batch_size):
# This method generates the first hidden state of zeros which we'll use in the forward pass
# We'll send the tensor holding the hidden state to the device we specified earlier as well
hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim)
return hidden
and my training loop as follows
def plot_train_val(x, train, val, train_label,
val_label, title, y_label,
color):
plt.plot(x, train, label=train_label, color=color)
plt.plot(x, val, label=val_label, color=color, linestyle='--')
plt.legend(loc='lower right')
plt.xlabel('epoch')
plt.ylabel(y_label)
plt.title(title)
def count_parameters(model):
parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
return parameters
def init_weights(m):
if type(m) in (nn.Linear, nn.Conv1d):
nn.init.xavier_uniform_(m.weight)
# Training functioN
def train(model, device, train_loader, valid_loader, epochs, learning_rate):
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
train_loss, validation_loss = [], []
train_acc, validation_acc = [], []
for epoch in range(epochs):
#train
model.train()
running_loss = 0.
correct, total = 0, 0
steps = 0
for idx, batch in enumerate(train_loader):
text = batch["Sample"].to(device)
target = batch['Class'].to(device)
target = torch.autograd.Variable(target).long()
text, target = text.to(device), target.to(device)
# add micro for coding training loop
optimizer.zero_grad()
output, hideden = model(text)
print(output.shape, target.shape, target.view(-1).shape)
loss = criterion(output, target.view(-1))
loss.backward()
optimizer.step()
steps += 1
running_loss += loss.item()
# get accuracy
_, predicted = torch.max(output, 1)
print(predicted)
#predicted = torch.round(output.squeeze())
total += target.size(0)
correct += (predicted == target).sum().item()
train_loss.append(running_loss/len(train_loader))
train_acc.append(correct/total)
print(f'Epoch: {epoch + 1}, '
f'Training Loss: {running_loss/len(train_loader):.4f}, '
f'Training Accuracy: {100*correct/total: .2f}%')
# evaluate on validation data
model.eval()
running_loss = 0.
correct, total = 0, 0
with torch.no_grad():
for idx, batch in enumerate(valid_loader):
text = batch["Sample"].to(device)
print(type(text), text.shape)
target = batch['Class'].to(device)
target = torch.autograd.Variable(target).long()
text, target = text.to(device), target.to(device)
optimizer.zero_grad()
output = model(text)
loss = criterion(output, target)
running_loss += loss.item()
# get accuracy
_, predicted = torch.max(output, 1)
#predicted = torch.round(output.squeeze())
total += target.size(0)
correct += (predicted == target).sum().item()
validation_loss.append(running_loss/len(valid_loader))
validation_acc.append(correct/total)
print (f'Validation Loss: {running_loss/len(valid_loader):.4f}, '
f'Validation Accuracy: {100*correct/total: .2f}%')
return train_loss, train_acc, validation_loss, validation_acc
When I run the model with the following, I get the error provided below. Thanks in advance for any help.
# Model hyperparamters
#vocab_size = len(word_array)
learning_rate = 1e-3
output_size = 11
input_size = 300
epochs = 10
hidden_dim = 100
n_layers = 2
# Initialize model, training and testing
set_seed(SEED)
vanilla_rnn_model = VanillaRNN(input_size, output_size, hidden_dim, n_layers)
#vanilla_rnn_model = VanillaRNN(output_size, input_size, RNN_size, fc_size, DEVICE)
vanilla_rnn_model.to(DEVICE)
vanilla_rnn_start_time = time.time()
vanilla_train_loss, vanilla_train_acc, vanilla_validation_loss, vanilla_validation_acc = train(vanilla_rnn_model,
DEVICE,
train_loader,
valid_loader,
epochs = epochs,
learning_rate = learning_rate)
The error :(
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-31-bfd2f8f3456f> in <module>()
19 valid_loader,
20 epochs = epochs,
---> 21 learning_rate = learning_rate)
22 print("--- Time taken to train = %s seconds ---" % (time.time() - vanilla_rnn_start_time))
23 #test_accuracy = test(vanilla_rnn_model, DEVICE, test_iter)
6 frames
<ipython-input-30-db1fa6c8b625> in train(model, device, train_loader, valid_loader, epochs, learning_rate)
45 # add micro for coding training loop
46 optimizer.zero_grad()
---> 47 output, hideden = model(text)
48 print(output.shape, target.shape, target.view(-1).shape)
49 loss = criterion(output, target.view(-1))
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1050 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051 return forward_call(*input, **kwargs)
1052 # Do not call functions when jit is used
1053 full_backward_hooks, non_full_backward_hooks = [], []
<ipython-input-26-c34b90b3cbc3> in forward(self, x)
21
22 # Passing in the input and hidden state into the model and obtaining outputs
---> 23 out, hidden = self.rnn(x, hidden)
24
25 # Reshaping the outputs such that it can be fit into the fully connected layer
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1049 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1050 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1051 return forward_call(*input, **kwargs)
1052 # Do not call functions when jit is used
1053 full_backward_hooks, non_full_backward_hooks = [], []
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py in forward(self, input, hx)
263 assert hx is not None
264 input = cast(Tensor, input)
--> 265 self.check_forward_args(input, hx, batch_sizes)
266 _impl = _rnn_impls[self.mode]
267 if batch_sizes is None:
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py in check_forward_args(self, input, hidden, batch_sizes)
227
228 def check_forward_args(self, input: Tensor, hidden: Tensor, batch_sizes: Optional[Tensor]):
--> 229 self.check_input(input, batch_sizes)
230 expected_hidden_size = self.get_expected_hidden_size(input, batch_sizes)
231
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py in check_input(self, input, batch_sizes)
201 raise RuntimeError(
202 'input must have {} dimensions, got {}'.format(
--> 203 expected_input_dim, input.dim()))
204 if self.input_size != input.size(-1):
205 raise RuntimeError(
RuntimeError: input must have 3 dimensions, got 1
Solution
First, you need to wrap your dataset in a proper dataloader, and you can do something like this:
from torch.utils.data import DataLoader
# [...]
# define a batch_size, I'll use 4 as an example
batch_size = 4
train_dset = CustomDataset(X2, y) # your current code (change train_loader to train_dset)
train_loader = DataLoader(train_dset, batch_size=batch_size, shuffle=True))
At this point, text
now should be [4, 300]
.
Then, you said the your sequence length is equal to 1. To fix the error, you can add the length dimension using unsqueeze
:
# [...]
output, hideden = model(text.unsqueeze(1))
# [...]
Now, text
should be [4, 1, 300]
, and here you have the 3 dimensions the RNN forward call is expecting (your RNN has batch_first=True
):
input: tensor of shape
(L, N, H_in)
whenbatch_first=False
or(N, L, H_in)
whenbatch_first=True
containing the features of the input sequence. (...)
Answered By - Berriel
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.