Issue
I'm trying to use custom loss functions. Here's the code structure:
Libraries:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow_probability as tfp\
from tensorflow.keras import layers, losses
from tensorflow.keras.models import Model
import string
import random
import math
import pickle\
Dataset is in the form:
https://drive.google.com/file/d/1sB8at-hZl-HXeFyFSp1Mm2Bhd3eV8ZxA/view?usp=sharing
Source code:
latent_dim = 64 #Number of Nodes in Hidden layes \
train_size = 10000 #Size of training vocablry
# The trainin dataset has been downloaded form - https://norvig.com/ngrams/
words = pd.read_csv("./enable2.csv") #Opensource data corpus
words = list(words['aa'])
random.shuffle(words)
# Creating the list with all possible characters in english language
letters = list(string.ascii_letters)
punc = list(string.punctuation)
space = [' ',' ',' ']
letters.extend(punc)
letters.extend(space)
random.shuffle(letters)
def word_to_vector(words):
# Creating features out of the word based on the list of characters built in previous step
features = list()
for word in words:
feature = ([0]*v_size)
for i in range(len(word)):
w = word[i]
feature[i] = letters.index(w)
features.append(feature)
return features
vector_words = word_to_vector(words)
# Splitting the vocab into training and test datasets
x_train = np.array(vector_words[:train_size])
x_train.shape
def train_nn_with_custom_loss_function(custom_loss_function, nn, lr, batch_size, nb_epochs, save_to_folder=None, plot_loss=True):
"""
Train a neural net with custom loss function, plots the loss
and returns the history (output of nn)
Args:
custom_loss_function (function): One of the customly developed loss
functions
"""
# reset session
tf.keras.backend.clear_session()
# It's important to use run_eagerly=True in this example because the custom loss
# function converts tensors to numpy arrays which requires to be on eager mode
opt = tf.keras.optimizers.Adam(learning_rate=lr)
nn.compile(optimizer=opt, loss=custom_loss_function, run_eagerly=True)
# nn.build(input_shape=(100,))
nn.summary()
history = nn.fit(x_train, x_train,
epochs=nb_epochs, batch_size = batch_size, verbose=1,
shuffle=True)
if save_to_folder is not None:
nn.save(save_to_folder)
plt.plot(history.history['loss'])
plt.ylabel('loss')
plt.xlabel('epoch')
# plt.set_xticks(np.arange(len(history.history['loss'])))
plt.show()
pred = nn.predict(x_train)
classes = np.argmax(pred, axis=1)
y, x = np.histogram(classes, bins=np.arange(16))
x = x[1:]
plt.bar(x, y)
plt.ylabel('input')
plt.xlabel('class')
plt.xticks(np.arange(1, 16, step=1))
plt.show()
return y
Loss function:
from scipy.spatial import distance
# we don't have a y_true in unsupervised
def distance_loss_fn(y_true, y_pred):
# calculate the centroid of every cluster
#thresh=-1;
#y_pred = tf.gather(y_pred, tf.where(y_pred>thresh))
pred = tf.argmax(y_pred, axis=1).numpy()
# centroids coordinates
centroids = np.zeros((15, 100), dtype=float)
# centroids frequency
centroids_f = np.zeros(15)
# points has the coordinates of the space of the points to cluster
points = y_true.numpy()
# iterate on points
for idx in range(points.shape[0]):
# sum point to calculate centroid later
centroids[pred[idx]] += points[idx]
centroids_f[pred[idx]] += 1
# average over all points to calculate centroid
for idx in range(len(centroids)):
if centroids_f[idx] == 0:
continue
centroids[idx] /= centroids_f[idx]
# sum over the distance between each point and the centroid to calculate the loss to minimize
loss = 0
for idx in range(len(points)):
# add the distance between each coordinate and the centroid of the predicted class
# tfa.metrics.hamming.hamming_loss_fn(y, output, threshold=0.5, mode='multilabel')
# loss += tf.reduce_sum(centroids[pred[idx]] - points[idx])
loss += distance.hamming(centroids[pred[idx]], points[idx])
# loss += tfa.metrics.hamming.hamming_loss_fn(centroids[pred[idx]],
# points[idx],
# threshold=0.5,
# mode='multilabel')
# print('d ', distance.hamming(centroids[pred[idx]], points[idx]))
l = tf.reduce_sum(y_pred)
# print(loss)
ret = tf.constant(loss, dtype=tf.float32)
return ret
Architecture:
base_nn = tf.keras.Sequential([
layers.Flatten(input_shape=(100,)),
layers.Dense(128, activation='relu'),
layers.Dropout(0.2),
layers.Dense(15, activation='softmax')
])
diff = train_nn_with_custom_loss_function(distance_loss_fn, base_nn, 0.00001, 32, 5)
Error I get:
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
flatten (Flatten) (None, 100) 0
_________________________________________________________________
dense (Dense) (None, 128) 12928
_________________________________________________________________
dropout (Dropout) (None, 128) 0
_________________________________________________________________
dense_1 (Dense) (None, 15) 1935
=================================================================
Total params: 14,863
Trainable params: 14,863
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-134-8e360d25be14> in <module>()
7
8
----> 9 diff = train_nn_with_custom_loss_function(distance_loss_fn, base_nn, 0.00001, 32, 5)
12 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/optimizer_v2/utils.py in filter_empty_gradients(grads_and_vars)
74 if not filtered:
75 raise ValueError("No gradients provided for any variable: %s." %
---> 76 ([v.name for _, v in grads_and_vars],))
77 if vars_with_empty_grads:
78 logging.warning(
ValueError: No gradients provided for any variable: ['dense/kernel:0', 'dense/bias:0', 'dense_1/kernel:0', 'dense_1/bias:0'].
I've this loss function. Problem is the loss function is not getting differentiable Any help on how to make this loss function differentiable will be highly appreciated. A breif info about this method: This is an unsupervised nn, where input is like "get filename". I need to predict a class between 1 to 15 numbers. loss function information: Loss 3: Labels with distance
- Extract the predicted class from the last layer of the batch.
- Calculate the centroids of each class by average the sum of points of each class
- Sum the distances (hamming distance used) between each point and the centroid and hence the loss
Solution
The problem is probably in loss function because of pred = tf.argmax(y_pred, axis=1).numpy()
. This operation cannot be derivated so gradient won't flow through it. This is causing your error.
Answered By - kacpo1
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.