Issue
I'm trying to train a simple neural net with the following architecture:
class ModifiedNet(nn.Module):
def __init__(self, num_inputs, num_outputs):
super(ModifiedNet, self).__init__()
self.linear = nn.Linear(num_inputs, 1000)
self.linear2 = nn.Linear(in_features=1000, out_features=num_outputs)
def forward(self, input):
input = input.view(-1, num_inputs) # reshape input to batch x num_inputs
output = self.linear(input)
output = self.linear2(output)
return output
The dataset is MNIST (num_inputs=784
and num_outputs=10
).
I'm trying to plot the loss (we're using CrossEntropy) for each learning rate (0.01, 0.1, 1, 10), but the loss is NaN when I reach LR=1.
From looking at similar questions, I saw that you're supposed to lower the LR, but my task is to measure it with the given ones.
What am I doing wrong?
This is the code for the train and test:
# train and test functions
def train(epoch, network, optimizer=None):
losses = list()
network.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = Variable(data), Variable(target)
if optimizer is not None:
optimizer.zero_grad()
output = network(data)
loss = F.cross_entropy(output, target).to(torch.float64)
losses.append(loss.item())
loss.backward()
if optimizer is not None:
optimizer.step()
if batch_idx % 100 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
return np.mean(np.array(losses))
def test(network):
network.eval()
test_loss = 0
correct = 0
for data, target in test_loader:
#data, target = Variable(data, volatile=True), Variable(target)
output = network(data)
test_loss += F.cross_entropy(output, target, reduction='sum').to(torch.double).item() # sum up batch loss
#test_loss += F.cross_entropy(output, target, sum=True).item() # sum up batch loss
pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
return test_loss
and this is where I'm looping over the learning rates:
learning_rates = [0.01, 0.1, 1, 10]
for learning_rate in learning_rates:
net = ModifiedNet(num_inputs, num_outputs)
optimizer = optim.SGD(net.parameters(), lr=learning_rate)
train_losses = dict()
for epoch_idx in range(10):
train_losses[epoch_idx] = train(epoch_idx, net, optimizer)
plot_graph(list(train_losses.keys()), list(train_losses.values()), "epoch", "train loss", str(learning_rate))
And this is the original question:
Retrain the model for 10 epochs with each of the learning rates in the set {0.01, 0.1, 1, 10} and test the resulting model. Create a figure and plot the loss curves of each of the four runs for comparison. Explain the obtained (train and test) results.
Also, the net architecture is a given in the question, so I can't change it.
Solution
I had a silly bug as usual, and didn't use an activation function.
Answered By - woofwoof
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.