Issue
My question is essentially a duplicate of this one, where I'm confused as to what to pass into the decoder during the predict() (i.e., call()) phase. I've modified tutorials found here and here in order to create this script. This is being used for the purposes of self-attention on a time series dataset for regression (not NLP).
There's too much boilerplate to provide the full model so I'll write in the pertinent script:
Transformer.py
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense
# The following imports are my custom Layers/Functions
from Utilities.MachineLearning.Keras.Layers.Encoder import Encoder
from Utilities.MachineLearning.Keras.Layers.Decoder import Decoder
from Utilities.MachineLearning.Keras.Functions.etc import create_padding_mask, create_look_ahead_mask
def create_masks(input, target):
# Encoder padding mask
encoder_mask = create_padding_mask(input)
# Used in the 2nd attention block in the decoder.
# This padding mask is used to mask the encoder outputs.
decoder_mask = create_padding_mask(input)
# Used in the 1st attention block in the decoder.
# It is used to pad and mask future tokens in the input received by
# the decoder.
look_ahead_mask = create_look_ahead_mask(tf.shape(target)[1])
target_mask = create_padding_mask(target)
encoder_decoder_mask = tf.maximum(target_mask, look_ahead_mask)
return encoder_mask, encoder_decoder_mask, decoder_mask
class Transformer(Model):
def __init__(
self,
num_inputs,
num_outputs=1,
num_heads=1,
num_layers=1,
num_embedding_inputs=None,
num_ff_inputs=None,
dropout=0,
):
super().__init__()
self.encoder = Encoder(
num_inputs,
num_heads,
num_layers,
num_embedding_inputs,
num_ff_inputs,
dropout,
)
self.decoder = Decoder(
num_inputs,
num_heads,
num_layers,
num_embedding_inputs,
num_ff_inputs,
dropout,
)
self.output_layer = Dense(num_outputs, name="Output")
def call(
self,
inputs,
targets,
training=None,
):
encoder_mask, encoder_decoder_mask, decoder_mask = create_masks(inputs, targets)
encoder_output = self.encoder(inputs, encoder_mask, training)
decoder_output, attention_weights = self.decoder(
targets, encoder_output, encoder_decoder_mask, decoder_mask, training
)
output = self.output_layer(decoder_output)
return output, attention_weights
train_step_signature = [
tf.TensorSpec(shape=(None, None), dtype=tf.int64),
tf.TensorSpec(shape=(None, None), dtype=tf.int64),
]
@tf.function(input_signature=train_step_signature)
def train_step(self, data):
x, y = data
with tf.GradientTape() as tape:
y_pred = self(x, y, training=True)
loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)
# Compute gradients
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# Update metrics (includes the metric that tracks the loss)
self.compiled_metrics.update_state(y, y_pred)
# Return a dict mapping metric names to current value
return {m.name: m.result() for m in self.metrics}
SelfAttention.py
# Don't worry about what Custom is, it's basically a modified Keras Model
from Utilities.MachineLearning.Keras.Models.Custom import Custom
from Utilities.MachineLearning.Keras.Models.Transformer import Transformer
class SelfAttention(Custom):
def initialize(self):
self.transformer = Transformer(
self.batch_input_shape[-1],
num_heads=self.attention_units,
dropout=self.attention_dropout,
name="Transformer",
)
def call(self, inputs, training=False):
# TODO: What about `targets`?
return self.transformer(inputs, training=training)
Solution
There was no point in using a decoder as all the relevant information for time series data is used by the encoder block.
Answered By - SnakeWasTheNameTheyGaveMe
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.