Thursday, September 29, 2022

[FIXED] Pytorch gives error Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu

September 29, 2022 python, python-3.x, pytorch No comments

Issue

I'm beggining my journey to Python 3 / Pytorch, and i'm having the following issue:
I'm trying to run this code on my GPU, but i get the following error :

Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!

I know it means i'm trying to manipulate 2 tensors that are both on different devices, but i can figure out where in my code I missed to transfer this tensor.

Any help would be appreciated.

Here is the code

import sys
import os
sys.path.append(os.path.abspath("include"))
sys.path.append(os.path.abspath("models"))

from utils import *
from MD5EncryptedDataEncoder import *

import torch
import torch.nn.functional as F
from torch import nn


import numpy as np
import hashlib
import pathlib
from pathlib import Path

# instantiate the model
model = MD5EncryptionEncoder()
model_file = Path(model.m_save_path)
if model_file.is_file():
    model = torch.load(model.m_save_path)
    print("Loaded previously saved model from ["+model.m_save_path+"]")
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print("Device that will be used : ")
print(device)
model = model.to(device)
print("Model parameter devices :")
print(next(model.parameters()).device)


#define our loss function
loss_function = nn.MSELoss()
loss_function = loss_function.to(device)

#defined our optimizer

#learning_rate = 0.00000005
#optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.7,patience=8,cooldown=2,verbose=True)

loss_history = [0.02]
for i in range(50000000):

    #Create our random unencrypted md5 data + convert it to a pytorch tensor
    md5_unencrypted_data_hexadecimal_string = secrets.token_hex(64)
    md5_unencrypted_data_tensor = HexStringToBinaryTensor(md5_unencrypted_data_hexadecimal_string)
    md5_unencrypted_data_binary_blob = HexStringToBinaryBlob(md5_unencrypted_data_hexadecimal_string)

    #Run the MD5 algorithm on our unencrypted md5 data + convert the result to a pytorch tensor
    md5_encrypted_data = hashlib.md5(md5_unencrypted_data_binary_blob)
    md5_encrypted_data_hexadecimal_string = md5_encrypted_data.hexdigest()
    md5_encrypted_data_tensor = HexStringToBinaryTensor(md5_encrypted_data_hexadecimal_string)
    md5_encrypted_data_tensor = md5_encrypted_data_tensor.to(device)
    print(md5_encrypted_data_tensor)

    #print("Unencrypted tensor :")
    #print(md5_unencrypted_data_tensor)

    #print("Encrypted tensor :")
    #print(md5_encrypted_data_tensor)



    #Run our forward pass
    predictions_tensor = model(md5_encrypted_data_tensor)

    #print("Prediction : ",predictions_tensor)

    #Run our loss function
    loss = loss_function(predictions_tensor, md5_encrypted_data_tensor)

    #Compute the gradient (this does NOT update the weight : only compute the error gradient)
    loss.backward()

    if i % 100 == 0:
        #Update the weights
        optimizer.step()

        #We are not using batch update, so restart the gradients
        optimizer.zero_grad()

        #Call the scheduler that will changer some params if necessary
        scheduler.step(loss)

        print("Loss : ",loss.item())

        if(loss.item() < min(loss_history)):
            print("Saving model to ["+model.m_save_path+"]")
            torch.save(model,model.m_save_path)

        loss_history.append(loss.item())
    if i % 500 == 0:
        print("Prediction : ",predictions_tensor)
        print("Wanted result : ",md5_encrypted_data_tensor)

Here is the class MD5EncryptedDataEncoder code :

import torch
import torch.nn.functional as F
from torch import nn



# define the network class
class MD5EncryptionEncoder(nn.Module):

    m_save_path = "data/MD5EncryptionEncoder.model"

    def __init__(self):
        # call constructor from superclass
        super().__init__()

        self.input_to_hidden_1 = nn.Linear(128, 10240)

        self.hidden_layers = []
        self.hidden_layers.append(nn.Linear(10240, 10240))
        self.hidden_layers.append(nn.Linear(10240, 10240))

        self.last_hidden_to_output = nn.Linear(10240, 128)

        self.hidden_layers_activation_function = torch.nn.LeakyReLU(0.1)

    def forward(self, x):
        # define forward pass.
        # Here, 'x' represent the output of a network defined in __init__

        x = self.hidden_layers_activation_function(self.input_to_hidden_1(x))
        x = self.hidden_layers_activation_function(self.hidden_layers[0](x))

        x = self.hidden_layers_activation_function(self.hidden_layers[1](x))
        x = torch.sigmoid(self.last_hidden_to_output(x))

        return x


    def EncryptedTensorToStateTensor(self,x):
        
        x = self.hidden_layers_activation_function(self.input_to_hidden_1(x))
        x = self.hidden_layers_activation_function(self.hidden_layers[0](x))

        return x


    def StateTensorToEncryptedTensor(self,x):

        x = self.hidden_layers_activation_function(self.hidden_layers[1](x))
        x = torch.sigmoid(self.last_hidden_to_output(x))

        return x

Here is the content of utils.py

import torch
import secrets
import binascii

def HexStringToBinaryBlob(p_hex_string):
    if(len(p_hex_string)%2 != 0):
        print("[HexStringToBinaryTensor] Parameter [p_hex_string]'s size if not an even number")
        exit(1)
    binary_blob = binascii.unhexlify(p_hex_string)
    return binary_blob

def HexStringToBinaryTensor(p_hex_string):
    if(len(p_hex_string)%2 != 0):
        print("[HexStringToBinaryTensor] Parameter [p_hex_string]'s size if not an even number")
        exit(1)
    binary_blob = binascii.unhexlify(p_hex_string)
    binary_string = ''.join(format(x, '08b') for x in binary_blob)

    result_tensor_size = int(len(p_hex_string)/2*8)
    result_tensor = torch.randn(result_tensor_size)
    for binary_position in range (len(binary_string)):
        binary_digit = int(binary_string[binary_position])
        result_tensor[binary_position] = binary_digit
    return result_tensor

def Create512BitBinaryTensor():
    #Create a 512 bits (64 BYTES) block of data in hexadecimal
    hexadecimal_blob = secrets.token_hex(64)
    result_tensor = HexStringToBinaryTensor(hexadecimal_blob)
    return result_tensor

Here is the terminal error

Loaded previously saved model from [data/MD5EncryptionEncoder.model]
Device that will be used :
cuda:0
Model parameter devices :
cuda:0
tensor([0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 1., 0., 1., 0., 1., 1., 1., 0.,
        1., 1., 0., 1., 1., 0., 0., 0., 0., 0., 1., 0., 0., 1., 1., 1., 0., 0.,
        1., 1., 1., 0., 1., 0., 1., 1., 0., 0., 1., 1., 0., 1., 1., 0., 0., 1.,
        0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 0., 0., 0., 1., 0., 0., 0., 1.,
        1., 0., 0., 1., 1., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0.,
        1., 1., 1., 0., 0., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 0., 0., 1.,
        0., 1., 1., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 1., 0., 1., 1.,
        1., 0.], device='cuda:0')
Traceback (most recent call last):
  File "TrainEncryptedDataEncoder.py", line 71, in <module>
    predictions_tensor = model(md5_encrypted_data_tensor)
  File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
    return forward_call(*input, **kwargs)
  File "/workspace/Pytorch/models/MD5EncryptedDataEncoder.py", line 45, in forward
    x = self.hidden_layers_activation_function(self.hidden_layers[0](x))
  File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
    return forward_call(*input, **kwargs)
  File "/opt/conda/lib/python3.7/site-packages/torch/nn/modules/linear.py", line 114, in forward
    return F.linear(input, self.weight, self.bias)
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument mat in method wrapper_addmv_)

Solution

The issue is the List data type defined in the Model Class. You have used

self.hidden_layers = []
self.hidden_layers.append(nn.Linear(10240, 10240))
self.hidden_layers.append(nn.Linear(10240, 10240))

Here the defined list will be on cpu only. You need to either put it in a nn.sequential and then call it by index as below:-

class MD5EncryptionEncoder(nn.Module):
    m_save_path = "data/MD5EncryptionEncoder.model"

    def __init__(self):
        # call constructor from superclass
        super().__init__()

        self.input_to_hidden_1 = nn.Linear(128, 10240)
        self.hidden_layers = []
        self.hidden_layers.append(nn.Linear(10240, 10240))
        self.hidden_layers.append(nn.Linear(10240, 10240))
        self.hidden = nn.Sequential(*self.hidden_layers)
        self.last_hidden_to_output = nn.Linear(10240, 128)
        self.hidden_layers_activation_function = torch.nn.LeakyReLU(0.1)

    def forward(self, x):
        # define forward pass.
        # Here, 'x' represent the output of a network defined in __init__
        x = self.hidden_layers_activation_function(self.input_to_hidden_1(x))
        x = self.hidden_layers_activation_function(self.hidden[0](x))
        x = self.hidden_layers_activation_function(self.hidden[1](x))
        x = torch.sigmoid(self.last_hidden_to_output(x))

        return x

    def EncryptedTensorToStateTensor(self, x):
        x = self.hidden_layers_activation_function(self.input_to_hidden_1(x))
        x = self.hidden_layers_activation_function(self.hidden_layers[0](x))

        return x

    def StateTensorToEncryptedTensor(self, x):
        x = self.hidden_layers_activation_function(self.hidden_layers[1](x))
        x = torch.sigmoid(self.last_hidden_to_output(x))

        return x

or just ditch putting the linear layers in list and just define the layers outside.

class MD5EncryptionEncoder(nn.Module):
    m_save_path = "data/MD5EncryptionEncoder.model"

    def __init__(self):
        # call constructor from superclass
        super().__init__()

        self.input_to_hidden_1 = nn.Linear(128, 10240)

        self.hidden_layers = []
        self.hidden_layer1 = nn.Linear(10240, 10240)
        self.hidden_layer2 = nn.Linear(10240, 10240)
        self.last_hidden_to_output = nn.Linear(10240, 128)

        self.hidden_layers_activation_function = torch.nn.LeakyReLU(0.1)

    def forward(self, x):
        # define forward pass.
        # Here, 'x' represent the output of a network defined in __init__

        x = self.hidden_layers_activation_function(self.input_to_hidden_1(x))

        x = self.hidden_layer1(x)
        x = self.hidden_layer2(x)

        x = torch.sigmoid(self.last_hidden_to_output(x))

        return x

    def EncryptedTensorToStateTensor(self, x):
        x = self.hidden_layers_activation_function(self.input_to_hidden_1(x))
        x = self.hidden_layers_activation_function(self.hidden_layers[0](x))

        return x

    def StateTensorToEncryptedTensor(self, x):
        x = self.hidden_layers_activation_function(self.hidden_layers[1](x))
        x = torch.sigmoid(self.last_hidden_to_output(x))

        return x

Answered By - Manu Gond

This Answer collected from stackoverflow and tested by PythonFixing community admins, is licensed under cc by-sa 2.5 , cc by-sa 3.0 and cc by-sa 4.0

Thursday, September 29, 2022

[FIXED] Pytorch gives error Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu

Issue

Solution

0 comments:

Post a Comment

Popular Posts

Labels