Issue
I am trying to build a PyTorch model that can predict the rank of a text where the output is a float number between 0 and 1.
My input details are
- My batch size is 32.
- Max length for the tokenizer is 116
- In addition to the masks and Ids generated from the tokenizer, I am adding 11 values that were generated through preprocessing to the input text.
S the entire input shape would be 32 for batch and array with 127 item for each sample text provided
My layers are as follows:
- a DistilBERT uncased transformer. and I am using the DistilBERT tokenizer over the text.
- The following layer is a CNN that takes the output of the DistilBERT (127 channel) as input and provide 64 channels as output, with kernel=1
- After this, 6 CNN layers each input is 64 and output is 64 with a kernel size of 3 and dilation increasing from 2 to 32. On top of each CNN, there is a relu and a maxpooling with 2 as kernal size.
- My last CNN layer (and where the issue is happening) have 64 input channels and 32 output channels with a kernel size of 1 and a relu with AdaptiveMaxPool1d with size of 32 on top of it
- Linear layer takes 32 and output 16
- Linear layer takes 16 and output 1
below is my code
class Dataset(Dataset):
def __init__(self, df, max_len, bert_model_name, multi=1):
super().__init__()
self.df = df.reset_index(drop=True)
self.max_len = max_len
self.tokenizer = DistilBertTokenizer.from_pretrained(
bert_model_name,
do_lower_case=True,
strip_accents=True,
wordpieces_prefix=None,
use_fast=True
)
self.multiplier = multi
def __getitem__(self, index):
row = self.df.iloc[index]
inputs = self.tokenizer.encode_plus(
row.source,
None,
add_special_tokens=True,
max_length=self.max_len,
padding="max_length",
return_token_type_ids=True,
truncation=True
)
return (
t.LongTensor(t.cat([
t.LongTensor([
row.n_total_cells * self.multiplier,
row.n_code_cells * self.multiplier,
row.n_markdown_cells * self.multiplier,
row.word_counts * self.multiplier,
row.line_counts * self.multiplier,
row.empty_line_counts * self.multiplier,
row.full_lines_count * self.multiplier,
row.text_lines_count * self.multiplier,
row.tag_lines_count * self.multiplier,
row.weight * self.multiplier,
row.weight_counts * self.multiplier,
]),
t.LongTensor(inputs['input_ids']),
], 0)),
t.LongTensor(t.cat([
t.ones(11, dtype=t.long),
t.LongTensor(inputs['attention_mask']),
], 0)),
)
class BModel(nn.Module):
def __init__(self, bert_model_name):
super(BModel, self).__init__()
self.distill_bert = DistilBertModel.from_pretrained(bert_model_name)
self.hidden_size = self.distill_bert.config.hidden_size
print(self.hidden_size) # 768
self.relu = nn.ReLU()
self.dropout = nn.Dropout(0.3)
self.cnn_layers()
def forward(self, inputs):
dbert = self.cnn_forward(inputs[0], inputs[1])
return dbert
def cnn_layers(self):
self.layers = 4
kernel_size = 3
inp = 127
out = 32
grades = [2, 4, 8, 16, 32, 64, ]
self.convs = nn.ModuleList()
self.relus = nn.ModuleList()
self.maxs = nn.ModuleList()
self.norms = nn.ModuleList()
self.start_conv = nn.Conv1d(
in_channels=inp,
out_channels=64,
kernel_size=1,
bias=True
)
for i in range(self.layers):
# dilated convolutions
self.convs.append(nn.Conv1d(
in_channels=64,
out_channels=64,
kernel_size = kernel_size,
bias=False,
dilation=grades[i]
))
self.relus.append(nn.ReLU())
self.maxs.append(nn.MaxPool1d(
kernel_size=kernel_size-1,
))
self.norms.append(nn.BatchNorm1d(
num_features=64,
))
self.end_conv = nn.Conv1d(
in_channels=64,
out_channels=out,
kernel_size=1,
bias=True
)
self.max_pool = nn.AdaptiveMaxPool1d(out)
self.top1 = nn.Linear(out, 16)
self.top2 = nn.Linear(16, 1)
def cnn_forward(self, ids, masks):
x = self.distill_bert(ids, masks)[0]
x = self.relu(x)
x = self.dropout(x)
print(f"X size after BERT:", x.size())
x = self.start_conv(x)
print(f"X size after First Conv:", x.size())
for i in range(self.layers):
x = self.norms[i](self.maxs[i](self.relus[i](self.convs[i](x))))
print(f"X size after {i} CNN dilation:", x.size())
x = self.max_pool(t.abs(self.end_conv(x)))
print("X size after AdaptiveMaxPool1d:", x.size())
x = self.top1(x)
print("X size after before-last linear:", x.size())
x = self.top2(x)
print("X size after last linear:", x.size())
return x
Printing the output size after each layer would be as below
X size after First Conv: torch.Size([32, 64, 768])
X size after 0 CNN dilation: torch.Size([32, 64, 382])
X size after 1 CNN dilation: torch.Size([32, 64, 187])
X size after 2 CNN dilation: torch.Size([32, 64, 85])
X size after 3 CNN dilation: torch.Size([32, 64, 26])
X size after AdaptiveMaxPool1d: torch.Size([32, 32, 32])
X size after before-last linear: torch.Size([32, 32, 16])
X size after last linear: torch.Size([32, 32, 1]
The issue I am facing is after the AdaptiveMaxPool1d, the output of this layer suppose to be 2 dimensions instead of 3 [32, 32]
instead of [32, 32, 32]
The output of AdaptiveMaxPool1d fits into the linear layer but is with one extra dimension causing the output pred to differ from the true input
when I check the pred size vs the true size it would be
y_pred shape (12480,)
y_val shape (390,)
and the code blow with this error
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Input In [13], in <cell line: 21>()
17 # print(mkdn_train_loader, mkdn_val_loader)
18
19 ########################################################################################################################
File E:\KAGGLE_COMP\pt_model.py:796, in train(model, train_loader, val_loader, epochs, patience, path)
793 print('y_val shape', y_val.shape)
794 print(y_pred[:10])
--> 796 print("Validation MSE:", np.round(mean_squared_error(y_val, y_pred), 4))
797 print()
799 early_stopping(np.round(mean_squared_error(y_val, y_pred), 4), model)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\metrics\_regression.py:438, in mean_squared_error(y_true, y_pred, sample_weight, multioutput, squared)
378 def mean_squared_error(
379 y_true, y_pred, *, sample_weight=None, multioutput="uniform_average", squared=True
380 ):
381 """Mean squared error regression loss.
382
383 Read more in the :ref:`User Guide <mean_squared_error>`.
(...)
436 0.825...
437 """
--> 438 y_type, y_true, y_pred, multioutput = _check_reg_targets(
439 y_true, y_pred, multioutput
440 )
441 check_consistent_length(y_true, y_pred, sample_weight)
442 output_errors = np.average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\metrics\_regression.py:94, in _check_reg_targets(y_true, y_pred, multioutput, dtype)
60 def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric"):
61 """Check that y_true and y_pred belong to the same regression task.
62
63 Parameters
(...)
92 the dtype argument passed to check_array.
93 """
---> 94 check_consistent_length(y_true, y_pred)
95 y_true = check_array(y_true, ensure_2d=False, dtype=dtype)
96 y_pred = check_array(y_pred, ensure_2d=False, dtype=dtype)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\utils\validation.py:332, in check_consistent_length(*arrays)
330 uniques = np.unique(lengths)
331 if len(uniques) > 1:
--> 332 raise ValueError(
333 "Found input variables with inconsistent numbers of samples: %r"
334 % [int(l) for l in lengths]
335 )
ValueError: Found input variables with inconsistent numbers of samples: [390, 12480]
I need to know what I must change to make this run and the size is passed with correct shape.
Solution
From AdaptiveMaxPool1d Documentation: If the Input is in Shape of (N, C, L_in), then your output would be in the shape of (N, C, L_out).
Since your input shape to the AdaptiveMaxPool1d is in the shape of (32, 32, 26) and you've set the output_size to 32 ( value of "out" variable ), your output shape comes out as (32, 32, 32). I suggest to set output_size as 1 and use squeeze(2) to squish down the dimension. Something like this:
# For initialization of maxpool layer.
nn.AdaptiveMaxPool1d(1)
# ---------
# In forward add squeeze(2) after max_pool like this:
x = self.max_pool(t.abs(self.end_conv(x))).squeeze(2)
Answered By - Mehrdad1373
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.