Issue
I'm beggining my journey to Python 3 / Pytorch, and i'm having the following issue:
I'm trying to run this code on my GPU, but i get the following error :
Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!
I know it means i'm trying to manipulate 2 tensors that are both on different devices, but i can figure out where in my code I missed to transfer this tensor.
Any help would be appreciated.
Here is the code
import sys
import os
sys.path.append(os.path.abspath("include"))
sys.path.append(os.path.abspath("models"))
from utils import *
from MD5EncryptedDataEncoder import *
import torch
import torch.nn.functional as F
from torch import nn
import numpy as np
import hashlib
import pathlib
from pathlib import Path
# instantiate the model
model = MD5EncryptionEncoder()
model_file = Path(model.m_save_path)
if model_file.is_file():
model = torch.load(model.m_save_path)
print("Loaded previously saved model from ["+model.m_save_path+"]")
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print("Device that will be used : ")
print(device)
model = model.to(device)
print("Model parameter devices :")
print(next(model.parameters()).device)
#define our loss function
loss_function = nn.MSELoss()
loss_function = loss_function.to(device)
#defined our optimizer
#learning_rate = 0.00000005
#optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.7,patience=8,cooldown=2,verbose=True)
loss_history = [0.02]
for i in range(50000000):
#Create our random unencrypted md5 data + convert it to a pytorch tensor
md5_unencrypted_data_hexadecimal_string = secrets.token_hex(64)
md5_unencrypted_data_tensor = HexStringToBinaryTensor(md5_unencrypted_data_hexadecimal_string)
md5_unencrypted_data_binary_blob = HexStringToBinaryBlob(md5_unencrypted_data_hexadecimal_string)
#Run the MD5 algorithm on our unencrypted md5 data + convert the result to a pytorch tensor
md5_encrypted_data = hashlib.md5(md5_unencrypted_data_binary_blob)
md5_encrypted_data_hexadecimal_string = md5_encrypted_data.hexdigest()
md5_encrypted_data_tensor = HexStringToBinaryTensor(md5_encrypted_data_hexadecimal_string)
md5_encrypted_data_tensor = md5_encrypted_data_tensor.to(device)
print(md5_encrypted_data_tensor)
#print("Unencrypted tensor :")
#print(md5_unencrypted_data_tensor)
#print("Encrypted tensor :")
#print(md5_encrypted_data_tensor)
#Run our forward pass
predictions_tensor = model(md5_encrypted_data_tensor)
#print("Prediction : ",predictions_tensor)
#Run our loss function
loss = loss_function(predictions_tensor, md5_encrypted_data_tensor)
#Compute the gradient (this does NOT update the weight : only compute the error gradient)
loss.backward()
if i % 100 == 0:
#Update the weights
optimizer.step()
#We are not using batch update, so restart the gradients
optimizer.zero_grad()
#Call the scheduler that will changer some params if necessary
scheduler.step(loss)
print("Loss : ",loss.item())
if(loss.item() < min(loss_history)):
print("Saving model to ["+model.m_save_path+"]")
torch.save(model,model.m_save_path)
loss_history.append(loss.item())
if i % 500 == 0:
print("Prediction : ",predictions_tensor)
print("Wanted result : ",md5_encrypted_data_tensor)
Here is the class MD5EncryptedDataEncoder code :
import torch
import torch.nn.functional as F
from torch import nn
# define the network class
class MD5EncryptionEncoder(nn.Module):
m_save_path = "data/MD5EncryptionEncoder.model"
def __init__(self):
# call constructor from superclass
super().__init__()
self.input_to_hidden_1 = nn.Linear(128, 10240)
self.hidden_layers = []
self.hidden_layers.append(nn.Linear(10240, 10240))
self.hidden_layers.append(nn.Linear(10240, 10240))
self.last_hidden_to_output = nn.Linear(10240, 128)
self.hidden_layers_activation_function = torch.nn.LeakyReLU(0.1)
def forward(self, x):
# define forward pass.
# Here, 'x' represent the output of a network defined in __init__
x = self.hidden_layers_activation_function(self.input_to_hidden_1(x))
x = self.hidden_layers_activation_function(self.hidden_layers[0](x))
x = self.hidden_layers_activation_function(self.hidden_layers[1](x))
x = torch.sigmoid(self.last_hidden_to_output(x))
return x
def EncryptedTensorToStateTensor(self,x):
x = self.hidden_layers_activation_function(self.input_to_hidden_1(x))
x = self.hidden_layers_activation_function(self.hidden_layers[0](x))
return x
def StateTensorToEncryptedTensor(self,x):
x = self.hidden_layers_activation_function(self.hidden_layers[1](x))
x = torch.sigmoid(self.last_hidden_to_output(x))
return x
Here is the content of utils.py
import torch
import secrets
import binascii
def HexStringToBinaryBlob(p_hex_string):
if(len(p_hex_string)%2 != 0):
print("[HexStringToBinaryTensor] Parameter [p_hex_string]'s size if not an even number")
exit(1)
binary_blob = binascii.unhexlify(p_hex_string)
return binary_blob
def HexStringToBinaryTensor(p_hex_string):
if(len(p_hex_string)%2 != 0):
print("[HexStringToBinaryTensor] Parameter [p_hex_string]'s size if not an even number")
exit(1)
binary_blob = binascii.unhexlify(p_hex_string)
binary_string = ''.join(format(x, '08b') for x in binary_blob)
result_tensor_size = int(len(p_hex_string)/2*8)
result_tensor = torch.randn(result_tensor_size)
for binary_position in range (len(binary_string)):
binary_digit = int(binary_string[binary_position])
result_tensor[binary_position] = binary_digit
return result_tensor
def Create512BitBinaryTensor():
#Create a 512 bits (64 BYTES) block of data in hexadecimal
hexadecimal_blob = secrets.token_hex(64)
result_tensor = HexStringToBinaryTensor(hexadecimal_blob)
return result_tensor
Here is the terminal error
Loaded previously saved model from [data/MD5EncryptionEncoder.model]
Device that will be used :
cuda:0
Model parameter devices :
cuda:0
tensor([0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 1., 0., 1., 0., 1., 1., 1., 0.,
1., 1., 0., 1., 1., 0., 0., 0., 0., 0., 1., 0., 0., 1., 1., 1., 0., 0.,
1., 1., 1., 0., 1., 0., 1., 1., 0., 0., 1., 1., 0., 1., 1., 0., 0., 1.,
0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 0., 0., 0., 1., 0., 0., 0., 1.,
1., 0., 0., 1., 1., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0.,
1., 1., 1., 0., 0., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 0., 0., 1.,
0., 1., 1., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 1., 0., 1., 1.,
1., 0.], device='cuda:0')
Traceback (most recent call last):
File "TrainEncryptedDataEncoder.py", line 71, in <module>
predictions_tensor = model(md5_encrypted_data_tensor)
File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/workspace/Pytorch/models/MD5EncryptedDataEncoder.py", line 45, in forward
x = self.hidden_layers_activation_function(self.hidden_layers[0](x))
File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/linear.py", line 114, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument mat in method wrapper_addmv_)
Solution
The issue is the List data type defined in the Model Class. You have used
self.hidden_layers = []
self.hidden_layers.append(nn.Linear(10240, 10240))
self.hidden_layers.append(nn.Linear(10240, 10240))
Here the defined list will be on cpu only. You need to either put it in a nn.sequential and then call it by index as below:-
class MD5EncryptionEncoder(nn.Module):
m_save_path = "data/MD5EncryptionEncoder.model"
def __init__(self):
# call constructor from superclass
super().__init__()
self.input_to_hidden_1 = nn.Linear(128, 10240)
self.hidden_layers = []
self.hidden_layers.append(nn.Linear(10240, 10240))
self.hidden_layers.append(nn.Linear(10240, 10240))
self.hidden = nn.Sequential(*self.hidden_layers)
self.last_hidden_to_output = nn.Linear(10240, 128)
self.hidden_layers_activation_function = torch.nn.LeakyReLU(0.1)
def forward(self, x):
# define forward pass.
# Here, 'x' represent the output of a network defined in __init__
x = self.hidden_layers_activation_function(self.input_to_hidden_1(x))
x = self.hidden_layers_activation_function(self.hidden[0](x))
x = self.hidden_layers_activation_function(self.hidden[1](x))
x = torch.sigmoid(self.last_hidden_to_output(x))
return x
def EncryptedTensorToStateTensor(self, x):
x = self.hidden_layers_activation_function(self.input_to_hidden_1(x))
x = self.hidden_layers_activation_function(self.hidden_layers[0](x))
return x
def StateTensorToEncryptedTensor(self, x):
x = self.hidden_layers_activation_function(self.hidden_layers[1](x))
x = torch.sigmoid(self.last_hidden_to_output(x))
return x
or just ditch putting the linear layers in list and just define the layers outside.
class MD5EncryptionEncoder(nn.Module):
m_save_path = "data/MD5EncryptionEncoder.model"
def __init__(self):
# call constructor from superclass
super().__init__()
self.input_to_hidden_1 = nn.Linear(128, 10240)
self.hidden_layers = []
self.hidden_layer1 = nn.Linear(10240, 10240)
self.hidden_layer2 = nn.Linear(10240, 10240)
self.last_hidden_to_output = nn.Linear(10240, 128)
self.hidden_layers_activation_function = torch.nn.LeakyReLU(0.1)
def forward(self, x):
# define forward pass.
# Here, 'x' represent the output of a network defined in __init__
x = self.hidden_layers_activation_function(self.input_to_hidden_1(x))
x = self.hidden_layer1(x)
x = self.hidden_layer2(x)
x = torch.sigmoid(self.last_hidden_to_output(x))
return x
def EncryptedTensorToStateTensor(self, x):
x = self.hidden_layers_activation_function(self.input_to_hidden_1(x))
x = self.hidden_layers_activation_function(self.hidden_layers[0](x))
return x
def StateTensorToEncryptedTensor(self, x):
x = self.hidden_layers_activation_function(self.hidden_layers[1](x))
x = torch.sigmoid(self.last_hidden_to_output(x))
return x
Answered By - Manu Gond
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.