Issue
I have thrown together a dummy model to showcase linear regression in pytorch, but I find that my model is not properly learning. It's doing well when it comes to learning the slope, but the intercept is not really budging. Printing out the grads at every epoch tells me that, indeed, the grad is a lot smaller for the bias. Why is that? How can I remedy it, so the intercept is properly learnt?
This is what happens (a set to 0 to illustrate):
# Create some dummy data: we establish a linear relationship between x and y
a = np.random.rand()
b = np.random.rand()
a=0
x = np.linspace(start=0, stop=100, num=100)
y = a * x + b
# Now let's create some noisy measurements
noise = np.random.normal(size=100)
y_noisy = a * x + b + noise
# What's the overall error?
mse_actual = np.sum(np.power(y-y_noisy,2))/len(y)
# Visualize
plt.scatter(x,y_noisy, label='Measurements', alpha=.7)
plt.plot(x,y,'r', label='Underlying')
plt.legend()
plt.show()
# Let's learn something!
inputs = torch.from_numpy(x).type(torch.FloatTensor).unsqueeze(1)
targets = torch.from_numpy(y_noisy).type(torch.FloatTensor).unsqueeze(1)
# This is our model (one hidden node + bias)
model = torch.nn.Linear(1,1)
optimizer = torch.optim.SGD(model.parameters(),lr=1e-5)
loss_function = torch.nn.MSELoss()
# What does it predict right now?
shuffled_inputs, preds = [], []
for input, target in zip(inputs,targets):
pred = model(input)
shuffled_inputs.append(input.detach().numpy()[0])
preds.append(pred.detach().numpy()[0])
# Visualize
plt.scatter(x,y_noisy, color='blue', label='Measurements', alpha=.7)
plt.plot(shuffled_inputs, preds, color='orange', label='Predictions', alpha=.7)
plt.plot(x,y,'r', label='Underlying')
plt.legend()
plt.show()
# Let's train!
epochs = 100
a_s, b_s = [], []
for epoch in range(epochs):
# Reset optimizer values
optimizer.zero_grad()
# Predict values using current model
preds = model(inputs)
# How far off are we?
loss = loss_function(targets,preds)
# Calculate the gradient
loss.backward()
# Update model
optimizer.step()
for p in model.parameters():
print('Grads:', p.grad)
# New parameters
a_s.append(list(model.parameters())[0].item())
b_s.append(list(model.parameters())[1].item())
print(f"Epoch {epoch+1} -- loss = {loss}")
Solution
It's a bit of a non-answer, but just use more epochs or add more datapoints. When you have 100 datapoints with noise as significant as you had (if you just plot the initial data it becomes obvious) the model will struggle with MSE as a loss.
I can't see your image (work blocked imgur...) but I found it looked bad if you didn't adjust the axes on your matplotlib plot because it was so zoomed in on the x axis (when a=0), so I zoomed out of that too:
# Create some dummy data: we establish a linear relationship between x and y
a = np.random.rand()
b = np.random.rand()
a=0
N = 10000
x = np.linspace(start=0, stop=100, num=N)
y = a * x + b
# Now let's create some noisy measurements
noise = np.random.normal(size=N)*0.1
y_noisy = a * x + b + noise
# What's the overall error?
mse_actual = np.sum(np.power(y-y_noisy,2))/len(y)
# Visualize
plt.figure()
plt.scatter(x,y_noisy, label='Measurements', alpha=.7)
plt.plot(x,y,'r', label='Underlying')
plt.legend()
plt.show()
# Let's learn something!
inputs = torch.from_numpy(x).type(torch.FloatTensor).unsqueeze(1)
targets = torch.from_numpy(y_noisy).type(torch.FloatTensor).unsqueeze(1)
# This is our model (one hidden node + bias)
model = torch.nn.Linear(1,1)
optimizer = torch.optim.SGD(model.parameters(),lr=1e-5)
loss_function = torch.nn.MSELoss()
# Let's train!
epochs = 50000
a_s, b_s = [], []
for epoch in range(epochs):
# Reset optimizer values
optimizer.zero_grad()
# Predict values using current model
preds = model(inputs)
# How far off are we?
loss = loss_function(targets,preds)
# Calculate the gradient
loss.backward()
# Update model
optimizer.step()
#for p in model.parameters():
# print('Grads:', p.grad)
# New parameters
a_s.append(list(model.parameters())[0].item())
b_s.append(list(model.parameters())[1].item())
print(f"Epoch {epoch+1} -- loss = {loss}")
# What does it predict right now?
shuffled_inputs, preds = [], []
for input, target in zip(inputs,targets):
pred = model(input)
shuffled_inputs.append(input.detach().numpy()[0])
preds.append(pred.detach().numpy()[0])
plt.figure()
plt.scatter(x,y_noisy, color='blue', label='Measurements', alpha=.7)
plt.plot(shuffled_inputs, preds, color='orange', label='Predictions', alpha=.7)
plt.plot(x,y,'r', label='Underlying')
plt.axis([0,100,y.min()-1,y.max()+1])
plt.legend()
plt.show()
Answered By - jhso
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.