Issue
I recently encountered a roadblock following a deep learning tutorial on youtube (entire code can be found here). I'm having a problem with part 4.4. The goal is to return a dictionary of article summaries for certain stocks (their tickers are in a list: monitered_tickers).
def summarize(articles):
summaries = []
for article in articles:
input_ids = tokenizer.encode(article, return_tensors='pt')
output = model.generate(input_ids, max_length=55, num_beams=5, early_stopping=True)
summary = tokenizer.decode(output[0], skip_special_tokens=True)
summaries.append(summary)
return summaries
summaries = {ticker:summarize(articles[ticker]) for ticker in monitered_tickers}
summaries
When I run the code above, I get the following error:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_10688/3329134555.py in <module>
----> 1 summaries = {ticker:summarize(articles[ticker]) for ticker in monitered_tickers}
2 summaries
~\AppData\Local\Temp/ipykernel_10688/3329134555.py in <dictcomp>(.0)
----> 1 summaries = {ticker:summarize(articles[ticker]) for ticker in monitered_tickers}
2 summaries
~\AppData\Local\Temp/ipykernel_10688/3177436574.py in summarize(articles)
3 for article in articles:
4 input_ids = tokenizer.encode(article, return_tensors='pt')
----> 5 output = model.generate(input_ids, max_length=40, num_beams=5, early_stopping = True)
6 summary = tokenizer.decode(output[0], skip_special_tokens=True)
7 summaries.append(summary)
~\anaconda3\lib\site-packages\torch\autograd\grad_mode.py in decorate_context(*args, **kwargs)
26 def decorate_context(*args, **kwargs):
27 with self.__class__():
---> 28 return func(*args, **kwargs)
29 return cast(F, decorate_context)
30
~\anaconda3\lib\site-packages\transformers\generation_utils.py in generate(self, inputs, max_length, min_length, do_sample, early_stopping, num_beams, temperature, top_k, top_p, repetition_penalty, bad_words_ids, bos_token_id, pad_token_id, eos_token_id, length_penalty, no_repeat_ngram_size, encoder_no_repeat_ngram_size, num_return_sequences, max_time, max_new_tokens, decoder_start_token_id, use_cache, num_beam_groups, diversity_penalty, prefix_allowed_tokens_fn, logits_processor, stopping_criteria, output_attentions, output_hidden_states, output_scores, return_dict_in_generate, forced_bos_token_id, forced_eos_token_id, remove_invalid_values, synced_gpus, **model_kwargs)
1022 # if model is encoder decoder encoder_outputs are created
1023 # and added to `model_kwargs`
-> 1024 model_kwargs = self._prepare_encoder_decoder_kwargs_for_generation(
1025 inputs_tensor, model_kwargs, model_input_name
1026 )
~\anaconda3\lib\site-packages\transformers\generation_utils.py in _prepare_encoder_decoder_kwargs_for_generation(self, inputs_tensor, model_kwargs, model_input_name)
484 encoder_args = ()
485
--> 486 model_kwargs["encoder_outputs"]: ModelOutput = encoder(*encoder_args, **encoder_kwargs)
487
488 return model_kwargs
~\anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~\anaconda3\lib\site-packages\transformers\models\pegasus\modeling_pegasus.py in forward(self, input_ids, attention_mask, head_mask, inputs_embeds, output_attentions, output_hidden_states, return_dict)
754 inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
755
--> 756 embed_pos = self.embed_positions(input_shape)
757
758 hidden_states = inputs_embeds + embed_pos
~\anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
1100 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1101 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1102 return forward_call(*input, **kwargs)
1103 # Do not call functions when jit is used
1104 full_backward_hooks, non_full_backward_hooks = [], []
~\anaconda3\lib\site-packages\torch\autograd\grad_mode.py in decorate_context(*args, **kwargs)
26 def decorate_context(*args, **kwargs):
27 with self.__class__():
---> 28 return func(*args, **kwargs)
29 return cast(F, decorate_context)
30
~\anaconda3\lib\site-packages\transformers\models\pegasus\modeling_pegasus.py in forward(self, input_ids_shape, past_key_values_length)
138 past_key_values_length, past_key_values_length + seq_len, dtype=torch.long, device=self.weight.device
139 )
--> 140 return super().forward(positions)
141
142
~\anaconda3\lib\site-packages\torch\nn\modules\sparse.py in forward(self, input)
156
157 def forward(self, input: Tensor) -> Tensor:
--> 158 return F.embedding(
159 input, self.weight, self.padding_idx, self.max_norm,
160 self.norm_type, self.scale_grad_by_freq, self.sparse)
~\anaconda3\lib\site-packages\torch\nn\functional.py in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
2042 # remove once script supports set_grad_enabled
2043 _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
-> 2044 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
2045
2046
IndexError: index out of range in self
Wondering how I can fix this. Bit of a newbie so any help on this would be greatly appreciated. Thank you.
Solution
Your article length might exceed the model max input length. Use:
tokenizer.encode(article, return_tensors='pt', max_length=512, truncation=True)
Answered By - joe32140
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.