Issue
I'm working with the Google utterance dataset in spectrogram form. Each data point has dimension (160, 101). In my data loader, I used batch_size=128
. Therefore, each batch has dimension (128, 160, 101).
I use a LeNet model with the following code as the model:
class LeNet(nn.Module):
def __init__(self):
super(LeNet, self).__init__()
self.conv1 = nn.Conv2d(1, 6, 5)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16*5*5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 30)
def forward(self, x):
out = F.relu(self.conv1(x))
out = F.max_pool2d(out, 2)
out = F.relu(self.conv2(out))
out = F.max_pool2d(out, 2)
out = out.view(out.size(0), -1)
out = F.relu(self.fc1(out))
out = F.relu(self.fc2(out))
out = self.fc3(out)
return out
I tried unsqueezing the data with dim=3
, but got this error:
Traceback (most recent call last):
File "train_speech.py", line 359, in <module>
train_loss, reg_loss, train_acc, cost = train(epoch)
File "train_speech.py", line 258, in train
outputs = (net(inputs))['out']
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/parallel/data_parallel.py", line 166, in forward
return self.module(*inputs[0], **kwargs[0])
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/content/gdrive/My Drive/Colab Notebooks/mixup_erm-master/models/lenet.py", line 15, in forward
out = F.relu(self.conv1(x))
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1051, in _call_impl
return forward_call(*input, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/conv.py", line 443, in forward
return self._conv_forward(input, self.weight, self.bias)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/conv.py", line 440, in _conv_forward
self.padding, self.dilation, self.groups)
RuntimeError: Given groups=1, weight of size [6, 1, 5, 5], expected input[128, 160, 101, 1] to have 1 channels, but got 160 channels instead
How do I fix this issue?
EDIT: New Error Message Below
torch.Size([128, 160, 101])
torch.Size([128, 1, 160, 101])
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py:718: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at /pytorch/c10/core/TensorImpl.h:1156.)
return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
Traceback (most recent call last):
File "train_speech.py", line 363, in <module>
train_loss, reg_loss, train_acc, cost = train(epoch)
File "train_speech.py", line 262, in train
outputs = (net(inputs))['out']
IndexError: too many indices for tensor of dimension 2
I'm unsqueezing the data in each batch. The relevant section of my training code is below. inputs
is analogous to x
.
print(inputs.shape)
inputs = inputs.unsqueeze(1)
print(inputs.shape)
outputs = (net(inputs))['out']
Edit 2: New Error
Traceback (most recent call last):
File "train_speech.py", line 361, in <module>
train_loss, reg_loss, train_acc, cost = train(epoch)
File "train_speech.py", line 270, in train
loss.backward()
File "/usr/local/lib/python3.7/dist-packages/torch/_tensor.py", line 255, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
File "/usr/local/lib/python3.7/dist-packages/torch/autograd/__init__.py", line 149, in backward
allow_unreachable=True, accumulate_grad=True) # allow_unreachable flag
RuntimeError: Function AddmmBackward returned an invalid gradient at index 1 - got [128, 400] but expected shape compatible with [128, 13024]
Edit 3: Train Loop Below
def train(epoch):
print('\nEpoch: %d' % epoch)
net.train()
train_loss = 0
reg_loss = 0
correct = 0
total = 0
for batch_idx, (inputs, targets) in enumerate(trainloader):
if use_cuda:
inputs, targets = inputs.cuda(), targets.cuda()
inputs, targets_a, targets_b, lam,layer, cost = mixup_data(inputs, targets,
args.alpha,args.mixupBatch, use_cuda)
inputs, targets_a, targets_b = map(Variable, (inputs,
targets_a, targets_b))
outputs = net(inputs)
loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)
train_loss += loss.item()
_, predicted = torch.max(outputs.data, 1)
total += targets.size(0)
correct += (lam * predicted.eq(targets_a.data).cpu().sum().float()
+ (1 - lam) * predicted.eq(targets_b.data).cpu().sum().float())
optimizer.zero_grad()
loss.backward()
optimizer.step()
return (train_loss/batch_idx, reg_loss/batch_idx, 100.*correct/total, cost/batch_idx)
Solution
You should expand on axis=1
a.k.a. the channel axis:
>>> x = x.unsqueeze(1)
If you're inside the dataset __getitem__
, then it corresponds to axis=0
.
Answered By - Ivan
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.