Issue
I have the error being displayed whilst trying to plot the graph...
I am sharing the code in the following link: https://colab.research.google.com/drive/1PooWIPVhm67iZquqZvxz3mdfmd6rv-3d#scrollTo=qSM7mNrKhBOt
I think I'm missing 'tensor.cpu()' somewhere but I can't really pinpoint it.. Everything else works :/ Can anyone help please?
def train_epoch(
model,
data_loader,
loss_fn,
optimizer,
device,
scheduler,
n_examples
):
model = model.train()
losses = []
correct_predictions = 0
for d in data_loader:
input_ids = d["input_ids"].to(device)
attention_mask = d["attention_mask"].to(device)
targets = d["targets"].to(device)
outputs = model(
input_ids=input_ids,
attention_mask=attention_mask
)
_, preds = torch.max(outputs, dim=1)
loss = loss_fn(outputs, targets)
correct_predictions += torch.sum(preds == targets)
losses.append(loss.item())
loss.backward()
nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
optimizer.step()
scheduler.step()
optimizer.zero_grad()
return correct_predictions.double() / n_examples, np.mean(losses)
def eval_model(model, data_loader, loss_fn, device, n_examples):
model = model.eval()
losses = []
correct_predictions = 0
with torch.no_grad():
for d in data_loader:
input_ids = d["input_ids"].to(device)
attention_mask = d["attention_mask"].to(device)
targets = d["targets"].to(device)
outputs = model(
input_ids=input_ids,
attention_mask=attention_mask
)
_, preds = torch.max(outputs, dim=1)
loss = loss_fn(outputs, targets)
correct_predictions += torch.sum(preds == targets)
losses.append(loss.item())
return correct_predictions.double() / n_examples, np.mean(losses)
%%time
history = defaultdict(list)
best_accuracy = 0
for epoch in range(EPOCHS):
print(f'Epoch {epoch + 1}/{EPOCHS}')
print('-' * 10)
train_acc, train_loss = train_epoch(
model,
train_data_loader,
loss_fn,
optimizer,
device,
scheduler,
len(df_train)
)
print(f'Train loss {train_loss} accuracy {train_acc}')
val_acc, val_loss = eval_model(
model,
val_data_loader,
loss_fn,
device,
len(df_val)
)
print(f'Val loss {val_loss} accuracy {val_acc}')
print()
history['train_acc'].append(train_acc)
history['train_loss'].append(train_loss)
history['val_acc'].append(val_acc)
history['val_loss'].append(val_loss)
if val_acc > best_accuracy:
torch.save(model.state_dict(), 'best_model_state.bin')
best_accuracy = val_acc
plt.plot(history['train_acc'], label='train accuracy')
plt.plot(history['val_acc'], label='validation accuracy')
plt.title('Training history')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend()
plt.ylim([0, 1]);
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/matplotlib/cbook/__init__.py in index_of(y)
1626 try:
-> 1627 return y.index.values, y.values
1628 except AttributeError:
AttributeError: 'builtin_function_or_method' object has no attribute 'values'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
8 frames
<__array_function__ internals> in atleast_1d(*args, **kwargs)
/usr/local/lib/python3.7/dist-packages/torch/_tensor.py in __array__(self, dtype)
730 return handle_torch_function(Tensor.__array__, (self,), self, dtype=dtype)
731 if dtype is None:
--> 732 return self.numpy()
733 else:
734 return self.numpy().astype(dtype, copy=False)
TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.
Solution
When you are computing the number of correct predictions: correct_predictions += torch.sum(preds == targets)
, both preds
and targets
are CUDA tensors, which matplotlib
knows nothing about.
In this case, we should detach the tensor (to stop Autograd tracking it), push the data from GPU to CPU, and convert it to numpy elements, like so: torch.sum(preds == targets).detach().cpu().numpy()
.
Further, since the number of correct predictions is just a single number, we can just do torch.sum(preds == targets).item()
which is a shorthand for the above, but only if the tensor is a singleton.
This way, correct_predictions
is a Python integer, & you can return float(correct_predictions) / n_examples
from your methods and pass them onto matplotlib
.
For further reading:
- https://discuss.pytorch.org/t/what-is-the-difference-between-loss-and-loss-item/126083
- https://discuss.pytorch.org/t/does-item-automatically-move-the-data-to-the-cpu/69629/6
- https://discuss.pytorch.org/t/difference-between-loss-item-and-loss-detach-cpu-numpy/127773
Answered By - Riddhiman Dasgupta
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.