Issue
I am trying to train a model by Pytorch. The data set belongs to me and it has 2 folders: train_data and test_data. And every folder has 2 folders: detected and not_detected (these are my classes). Inside every folder, there are many png images in different sizes and I always get an exception about the image sizes. By the way, before that, I've run the code with MNIST dataset and it worked. This is my code:
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torchvision import datasets
import torch.optim as optim
import os
from PIL import Image
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
is_gpu = torch.cuda.is_available()
class CustomDataset(Dataset):
def __init__(self, root_dir, transform=None):
self.root_dir = root_dir
self.transform = transform
self.classes = os.listdir(root_dir)
self.data = []
self.targets = []
for class_idx, class_name in enumerate(self.classes):
class_dir = os.path.join(root_dir, class_name)
for filename in os.listdir(class_dir):
img_path = os.path.join(class_dir, filename)
self.data.append(img_path)
self.targets.append(class_idx)
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
img_path = self.data[idx]
target = self.targets[idx]
img = Image.open(img_path).convert('RGB') # Resmi aç ve RGB formatına dönüştür
if self.transform is not None:
img = self.transform(img)
return img, target
transform = transforms.Compose([
transforms.Resize((256, 256)), # Resmi istenen boyuta dönüştürmek için kullanılabilir
transforms.RandomCrop((224, 224)),
transforms.ToTensor()
])
train_dataset = CustomDataset(root_dir=r'C:\Users\enis_\Desktop\yololu\train_data', transform=transform)
test_dataset = CustomDataset(root_dir=r'C:\Users\enis_\Desktop\yololu\test_data', transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
model = nn.Sequential(nn.Flatten(),
nn.Linear(28*28,512),
nn.ReLU(),
nn.Linear(512,256),
nn.ReLU(),
nn.Linear(256,10))
if is_gpu:
model = model.cuda()
print(model)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(),lr=0.7)
epochs = 5
for i in range(epochs):
train_loss = 0.0
for data,label in train_loader:
if is_gpu:
data, label = data.cuda(), label.cuda()
optimizer.zero_grad()
output = model(data)
loss = criterion(output,label)
loss.backward()
optimizer.step()
train_loss += loss.item() * data.size(0)
print(f'Epoch: {i+1} / {epochs} \t\t\t Training Loss:{train_loss/len(train_loader)}')
And this is the output:
Sequential( (0): Flatten(start_dim=1, end_dim=-1) (1): Linear(in_features=784, out_features=512, bias=True) (2): ReLU() (3): Linear(in_features=512, out_features=256, bias=True) (4): ReLU() (5): Linear(in_features=256, out_features=10, bias=True) ) Traceback (most recent call last): File "c:\Users\enis_\Desktop\yololu\tempCodeRunnerFile.py", line 80, in output = model(data) ^^^^^^^^^^^ File "C:\Users\enis_\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\torch\nn\modules\module.py", line 1501, in call_impl return forward_call(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\enis\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\torch\nn\modules\container.py", line 217, in forward input = module(input) ^^^^^^^^^^^^^ File "C:\Users\enis_\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\torch\nn\modules\module.py", line 1501, in call_impl return forward_call(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\enis\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\torch\nn\modules\linear.py", line 114, in forward return F.linear(input, self.weight, self.bias) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x150528 and 784x512)
As you see in my code I am trying to resize the pics by
transform = transforms.Compose([
transforms.Resize((256, 256)),
transforms.RandomCrop((224, 224)),
transforms.ToTensor()
])
but it doesn't work.
Solution
In your model:
model = nn.Sequential(nn.Flatten(),
nn.Linear(28*28,512),
nn.ReLU(),
nn.Linear(512,256),
nn.ReLU(),
nn.Linear(256,10))
you have nn.Flatten()
which flattens the images to a 224*224*3=150528 dimensional vector.
The 3 comes from having 3 colour channels.
If you want to use a fully connected neural network for these images you would need to set the size of the first layer to 224*224*3:
model = nn.Sequential(nn.Flatten(),
nn.Linear(224*224*3,512),
nn.ReLU(),
nn.Linear(512,256),
nn.ReLU(),
nn.Linear(256,10))
Answered By - Nick Antonnopoulos
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.