Issue
I'm failing to run my GAN on GPU. I call to(device)
for all both models and all tensors but still keep getting the following error:
Traceback (most recent call last):
File "code/a3_gan_template.py", line 185, in <module>
main(args)
File "code/a3_gan_template.py", line 162, in main
train(dataloader, discriminator, generator, optimizer_G, optimizer_D, device)
File "code/a3_gan_template.py", line 100, in train
d_x = discriminator.forward(imgs)
File "code/a3_gan_template.py", line 80, in forward
out = self.model(img)
File "/home/lgpu0365/.local/lib/python3.6/site-packages/torch/nn/modules/module.py", line 489, in __call__
result = self.forward(*input, **kwargs)
File "/home/lgpu0365/.local/lib/python3.6/site-packages/torch/nn/modules/container.py", line 92, in forward
input = module(input)
File "/home/lgpu0365/.local/lib/python3.6/site-packages/torch/nn/modules/module.py", line 489, in __call__
result = self.forward(*input, **kwargs)
File "/home/lgpu0365/.local/lib/python3.6/site-packages/torch/nn/modules/linear.py", line 67, in forward
return F.linear(input, self.weight, self.bias)
File "/home/lgpu0365/.local/lib/python3.6/site-packages/torch/nn/functional.py", line 1352, in linear
ret = torch.addmm(torch.jit._unwrap_optional(bias), input, weight.t())
RuntimeError: Expected object of backend CUDA but got backend CPU for argument #4 'mat1'
Source code:
import argparse
import os
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.utils import save_image
from torchvision import datasets
from torch.nn.functional import binary_cross_entropy
class Generator(nn.Module):
def __init__(self, latent_dim):
super(Generator, self).__init__()
# Construct generator. You are free to experiment with your model,
# but the following is a good start:
# Linear args.latent_dim -> 128
# LeakyReLU(0.2)
# Linear 128 -> 256
# Bnorm
# LeakyReLU(0.2)
# Linear 256 -> 512
# Bnorm
# LeakyReLU(0.2)
# Linear 512 -> 1024
# Bnorm
# LeakyReLU(0.2)
# Linear 1024 -> 784
# Output non-linearity
self.latent_dim = latent_dim
self.model = nn.Sequential(
nn.Linear(latent_dim, 128),
nn.LeakyReLU(0.2),
nn.Linear(128, 256),
nn.BatchNorm1d(256),
nn.LeakyReLU(0.2),
nn.Linear(256, 512),
nn.BatchNorm1d(512),
nn.LeakyReLU(0.2),
nn.Linear(512, 1024),
nn.BatchNorm1d(1024),
nn.LeakyReLU(0.2),
nn.Linear(1024, 784),
nn.Sigmoid()
)
def forward(self, z):
# Generate images from z
out = self.model(z)
return out
class Discriminator(nn.Module):
def __init__(self):
super(Discriminator, self).__init__()
# Construct distriminator. You are free to experiment with your model,
# but the following is a good start:
# Linear 784 -> 512
# LeakyReLU(0.2)
# Linear 512 -> 256
# LeakyReLU(0.2)
# Linear 256 -> 1
# Output non-linearity
self.model = nn.Sequential(
nn.Linear(784, 512),
nn.LeakyReLU(0.2),
nn.Linear(512, 256),
nn.LeakyReLU(0.2),
nn.Linear(256, 1),
nn.Sigmoid()
)
def forward(self, img):
# return discriminator score for img
out = self.model(img)
return out
def train(dataloader, discriminator, generator, optimizer_G, optimizer_D, device):
for epoch in range(args.n_epochs):
for i, (imgs, _) in enumerate(dataloader):
batch_count = epoch * len(dataloader) + i
imgs.to(device)
batch_size = imgs.shape[0]
imgs = imgs.reshape(batch_size, -1)
z = torch.rand(batch_size, generator.latent_dim, device=device)
gen_imgs = generator(z)
discriminator.to(device)
d_x = discriminator(imgs)
d_g_z = discriminator(gen_imgs)
ones = torch.ones(d_g_z.shape, device=device)
# Train Generator
# ---------------
loss_G = binary_cross_entropy(d_g_z, ones)
optimizer_G.zero_grad()
loss_G.backward(retain_graph=True)
optimizer_G.step()
# Train Discriminator
# -------------------
if batch_count % args.d_train_interval == 0:
loss_D = binary_cross_entropy(d_x, 0.9 * ones) + binary_cross_entropy(d_g_z, 0. * ones)
optimizer_D.zero_grad()
loss_D.backward()
optimizer_D.step()
# Save Images
# -----------
if batch_count % args.save_interval == 0:
print(f'epoch: {epoch} batches: {batch_count} L_G: {loss_G.item():0.3f} L_D: {loss_D.item():0.3f}')
# You can use the function save_image(Tensor (shape Bx1x28x28),
# filename, number of rows, normalize) to save the generated
# images, e.g.:
save_image(gen_imgs[:25],
f'images/{batch_count}.png',
nrow=5, normalize=True)
def main(args):
# Create output image directory
os.makedirs('images', exist_ok=True)
# Set device
if torch.cuda.is_available():
device = torch.device('cuda')
else:
device = torch.device('cpu')
# load data
dataloader = torch.utils.data.DataLoader(
datasets.MNIST('./data/mnist', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize( (0.5,), (0.5,) )
])),
batch_size=args.batch_size, shuffle=True)
# Initialize models and optimizers
generator = Generator(args.latent_dim)
generator.to(device)
discriminator = Discriminator()
discriminator.to(device)
optimizer_G = torch.optim.Adam(generator.parameters(), lr=args.lr)
optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=args.lr)
# Start training
train(dataloader, discriminator, generator, optimizer_G, optimizer_D, device)
# You can save your generator here to re-use it to generate images for your
# report, e.g.:
torch.save(generator.state_dict(), "mnist_generator.pt")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--n_epochs', type=int, default=200,
help='number of epochs')
parser.add_argument('--batch_size', type=int, default=64,
help='batch size')
parser.add_argument('--lr', type=float, default=0.0002,
help='learning rate')
parser.add_argument('--latent_dim', type=int, default=100,
help='dimensionality of the latent space')
parser.add_argument('--save_interval', type=int, default=500,
help='save every SAVE_INTERVAL iterations')
parser.add_argument('--d_train_interval', type=int, default=25,
help='train discriminator (only) every D_TRAIN_INTERVAL iterations')
args = parser.parse_args()
main(args)
Any ideas on how to figure out what is missing? Thanks!
Solution
Found the solution. Turns out .to(device)
does not work in place for tensors.
# wrong
imgs.to(device)
# correct
imgs = imgs.to(device)
Answered By - David Speck
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.