Issue
I want to train my dataset with training data and validation data. Total data is 2048, train data is 1638, and validation data is 410 (20% of total).
Here are my codes
loading data (org: total training data)
org_x = train_csv.drop(['id', 'digit', 'letter'], axis=1).values org_x = org_x.reshape(-1, 28, 28, 1) org_x = org_x/255 org_x = np.array(org_x) org_x = org_x.reshape(-1, 1, 28, 28) org_x = torch.Tensor(org_x) x_test = test_csv.drop(['id','letter'], axis=1).values x_test = x_test.reshape(-1, 28, 28, 1) x_test = x_test/255 x_test = np.array(x_test) x_test = x_test.reshape(-1, 1, 28, 28) x_test = torch.Tensor(x_test) y = train_csv['digit'] y = list(y) print(len(y)) org_y = np.zeros([len(y), 1]) for i in range(len(y)): org_y[i] = y[i] org1 = np.array(org_y, dtype=object)
splitting data (org: total training data)
from sklearn.model_selection import train_test_split x_train, x_valid, y_train, y_valid = train_test_split( org, org1, test_size=0.2, random_state=42)
transform
transform = transforms.Compose([transforms.ToPILImage(), transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, )) ])
dataset
class kmnistDataset(data.Dataset): def __init__(self, images, labels=None, transforms=None): self.x = images self.y = labels self.transforms = transforms def __len__(self): return (len(self.x)) def __getitem__(self, idx): data = np.asarray(self.x[idx][0:]).astype(np.uint8) if self.transforms: data = self.transforms(data) if self.y is not None: return (data, self.y[i]) else: return data train_data = kmnistDataset(x_train, y_train, transform) valid_data = kmnistDataset(x_valid, y_valid, transform) train_loader = DataLoader(train_data, batch_size=16, shuffle=True) valid_loader = DataLoader(valid_data, batch_size=16, shuffle = False)
I'll skip the model structure.
training(Here, I got the error message)
n_epochs = 30 valid_loss_min = np.Inf for epoch in range(1, n_epochs+1): train_loss = 0 valid_loss = 0 ################### # train the model # ################### model.train() for data in train_loader: inputs, labels = data[0], data[1] optimizer.zero_grad() output = model(inputs) loss = criterion(output, labels) loss.backward() optimizer.step() train_loss += loss.item()*data.size(0) ##################### # validate the model# ##################### model.eval() for data in valid_loader: inputs, labels = data[0], data[1] output = model(inputs) loss = criterion(output, labels) valid_loss += loss.item()*data.size(0) train_loss = train_loss/ len(train_loader.dataset) valid_loss = valid_loss / len(valid_loader.dataset) print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format( epoch, train_loss, valid_loss))
Although I checked the data size, I got the error message below.
index 2047 is out of bounds for axis 0 with size 1638
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-42-b8783819421f> in <module>
11 ###################
12 model.train()
---> 13 for data in train_loader:
14 inputs, labels = data[0], data[1]
15 optimizer.zero_grad()
/opt/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataloader.py in __next__(self)
433 if self._sampler_iter is None:
434 self._reset()
--> 435 data = self._next_data()
436 self._num_yielded += 1
437 if self._dataset_kind == _DatasetKind.Iterable and \
/opt/anaconda3/lib/python3.7/site-packages/torch/utils/data/dataloader.py in_next_data(self)
473 def _next_data(self):
474 index = self._next_index() # may raise StopIteration
--> 475 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
476 if self._pin_memory:
477 data = _utils.pin_memory.pin_memory(data)
/opt/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py in fetch(self,
possibly_batched_index)
42 def fetch(self, possibly_batched_index):
43 if self.auto_collation:
---> 44 data = [self.dataset[idx] for idx in possibly_batched_index]
45 else:
46 data = self.dataset[possibly_batched_index]
/opt/anaconda3/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py in <listcomp(.0)
42 def fetch(self, possibly_batched_index):
43 if self.auto_collation:
---> 44 data = [self.dataset[idx] for idx in possibly_batched_index]
45 else:
46 data = self.dataset[possibly_batched_index]
<ipython-input-38-e5c87dd8a7ff> in __getitem__(self, idx)
17
18 if self.y is not None:
---> 19 return (data, self.y[i])
20 else:
21 return data
IndexError: index 2047 is out of bounds for axis 0 with size 1638
Can you explain why and how to solve it?
Solution
At first glance, you are using incorrect shapes: org_x = org_x.reshape(-1, 28, 28, 1)
. The channel axis you be the second one (unlike in TensorFlow), as (batch_size, channels, height, width)
:
org_x = org_x.reshape(-1, 1, 28, 28)
Same with x_test
x_test = x_test.reshape(-1, 1, 28, 28)
Also, you are accessing a list out of bound. You accessed self.y
with i
. Seems to me you should be returning (data, self.y[idx])
instead.
Answered By - Ivan
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.