Issue
I'm using a ConvNet built using PyTorch for inference and getting a Runtime Error in the following line:
outputs = model(X_batch)
The error is
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument mat1 in method wrapper_CUDA_addmm)
My code structure is as follows. I put the model and tensor on the GPU and made a few checks to ensure that they are both on the GPU.
print(args.device) # returns 'cuda'
print(torch.cuda.is_available()) # returns True
model = CNN_MLP(args)
model.to(args.device)
def inference(model, test_dataloader, device='cpu'):
model.eval()
metrics = []
for _, (X_batch, y_batch) in enumerate(tqdm(test_dataloader)):
X_batch = X_batch.to(args.device)
print(next(model.parameters()).is_cuda) # returns True
print(X_batch.is_cuda) # returns True
outputs = model(X_batch) # Error raised here
metrics.append(calc_metrics(outputs, y_batch)) # This is a simplified version of the code here
return aggregate(metrics)
zero_shot_metrics = inference(model, test_dataloader, device=args.device)
Other details:
- I'm using a single NVIDIA RTX A4000 GPU, and can confirm this with
nvidia-smi
. - The
torch
version is2.0.1+cu118
.nvidia-smi
shows the CUDA version is 12.0. I'm not sure if this mismatch is significant, buttorch.cuda.is_available()
seems to be working fine. - The
CNN_MLP
class is defined as follows:
import math
import torch
class CNN_MLP(torch.nn.Module):
""" CNN-MLP with 1 Conv layer, 1 Max Pool layer, and 1 Linear layer. """
def __init__(self, seq_len=220, embed_size=64, vocab_size=45, pad_index=0,
stride=1, kernel_size=3, conv_out_size=64, hidden_layer_sizes=[128, 64, 32, 8, 1], dropout_rate=0.25):
super(CNN_MLP, self).__init__()
# Embedding layer parameters
self.seq_len = seq_len
self.embed_size = embed_size
self.vocab_size = vocab_size
self.pad_index = pad_index
self.hidden_layer_sizes = hidden_layer_sizes
# Conv layer parameters
self.stride = stride
self.kernel_size = kernel_size
self.conv_out_size = conv_out_size
# Misc
self.dropout_rate = dropout_rate
# Conv Layers
self.embedding = torch.nn.Embedding(self.vocab_size, self.embed_size, padding_idx=self.pad_index)
self.conv = torch.nn.Conv1d(self.seq_len, self.conv_out_size, self.kernel_size, self.stride)
self.hidden_act = torch.relu
self.max_pool = torch.nn.MaxPool1d(self.kernel_size, self.stride)
self.flatten = lambda x: x.view(x.shape[0], x.shape[1]*x.shape[2])
# MLP layers
self.fc_layers = []
self.hidden_layer_sizes.insert(0, self._linear_layer_in_size())
for i in range(len(self.hidden_layer_sizes) - 1):
self.fc_layers.append(torch.nn.Linear(self.hidden_layer_sizes[i], self.hidden_layer_sizes[i+1]))
self.fc_layers.append(torch.nn.ReLU())
if self.dropout_rate and i != len(self.hidden_layer_sizes) - 2:
self.fc_layers.append(torch.nn.Dropout(self.dropout_rate))
self.fc_layers.append(torch.sigmoid)
def _linear_layer_in_size(self):
out_conv_1 = ((self.embed_size - 1 * (self.kernel_size - 1) - 1) / self.stride) + 1
out_conv_1 = math.floor(out_conv_1)
out_pool_1 = ((out_conv_1 - 1 * (self.kernel_size - 1) - 1) / self.stride) + 1
out_pool_1 = math.floor(out_pool_1)
return out_pool_1*self.conv_out_size
def forward(self, x):
x = self.embedding(x)
x = self.conv(x)
x = self.hidden_act(x)
x = self.max_pool(x)
x = self.flatten(x)
for layer in self.fc_layers:
x = layer(x)
return x.squeeze()
def embed(self, x):
x = self.embedding(x)
x = self.conv(x)
x = self.hidden_act(x)
x = self.max_pool(x)
x = self.flatten(x)
for i, layer in enumerate(self.fc_layers):
if i != len(self.fc_layers) - 1:
x = layer(x)
return x
Solution
In CNN_MLP
the fc_layers
member needs to be an nn.Module
because it contains other modules. When you call .to
on your model, .to
will be called recursively on all the other members that inhereit nn.Module
. fc_layers
is a python list (not an nn.Module
) so the parameters of these modules are not being transferred to the GPU. The simplest fix would be to replace
self.fc_layers = []
with
self.fc_layers = torch.ModuleList()
(see torch.ModuleList
for more information).
A better solution would be to make fc_layers
an nn.Sequential
, and then call it like a normal nn.Module
during forward.
Answered By - jodag
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.