Issue
My exercise is to train 10 perceptrons to recognize numbers (0 - 9). Each perceptron should learn a single digit. As training data, I've created 30 images (5x7 bmp). 3 variants per digit.
I've got a perceptron class:
import numpy as np
def unit_step_func(x):
return np.where(x > 0, 1, 0)
def sigmoid(x):
return 1 / (1 + np.exp(-x))
class Perceptron:
def __init__(self, learning_rate=0.01, n_iters=1000):
self.lr = learning_rate
self.n_iters = n_iters
self.activation_func = unit_step_func
self.weights = None
self.bias = None
#self.best_weights = None
#self.best_bias = None
#self.best_error = float('inf')
def fit(self, X, y):
n_samples, n_features = X.shape
self.weights = np.zeros(n_features)
self.bias = 0
#self.best_weights = self.weights.copy()
#self.best_bias = self.bias
for _ in range(self.n_iters):
for x_i, y_i in zip(X, y):
linear_output = np.dot(x_i, self.weights) + self.bias
y_predicted = self.activation_func(linear_output)
update = self.lr * (y_i - y_predicted)
self.weights += update * x_i
self.bias += update
#current_error = np.mean(np.abs(y - self.predict(X)))
#if current_error < self.best_error:
# self.best_weights = self.weights.copy()
# self.best_bias = self.bias
# self.best_error = current_error
def predict(self, X):
linear_output = np.dot(X, self.weights) + self.bias
y_predicted = self.activation_func(linear_output)
return y_predicted
I've tried both, unit_step_func
and sigmoid
, activation functions, and pocketing algorithm to see if there's any difference. I'm a noob, so I'm not sure if this is even implemented correctly.
This is how I train these perceptrons:
import numpy as np
from PIL import Image
from Perceptron import Perceptron
import os
def load_images_from_folder(folder, digit):
images = []
labels = []
for filename in os.listdir(folder):
img = Image.open(os.path.join(folder, filename))
if img is not None:
images.append(np.array(img).flatten())
label = 1 if filename.startswith(f"{digit}_") else 0
labels.append(label)
return np.array(images), np.array(labels)
digits_to_recognize = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
perceptrons = []
for digit_to_recognize in digits_to_recognize:
X, y = load_images_from_folder("data", digit_to_recognize)
p = Perceptron()
p.fit(X, y)
perceptrons.append(p)
in short:
training data filename is in the format digit
_variant
. As I said before, each digit has 3 variants,
so for digit 0
it is 0_0
, 0_1
, 0_2
,
for digit 1
it's: 1_0
, 1_1
, 1_2
,
and so on...
load_images_from_folder
function loads 30 images and checks the name. If digit
part of the name is the same as digit
input then it appends 1
in labels, so that the perceptron knows that it's the desired digit.
I know that it'd be better to load these images once and save them in some array of tuples
, for example, but I don't care about the performance right now (I won't care later either).
for digit 0
labels array is [1, 1, 1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
for digit 1
labels array is [0,0,0, 1, 1, 1, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
and so on...
then I train 10 perceptrons using this data.
This exercise also requires to have some kind of GUI that allows me to draw a number. I've choosen pygame
, I could use pyQT
, it actually does not matter.
This is the code, you can skip it, it's not that important (except for on_rec_button
function, but I'll address on it):
import pygame
import sys
pygame.init()
cols, rows = 5, 7
square_size = 50
width, height = cols * square_size, (rows + 2) * square_size
screen = pygame.display.set_mode((width, height))
pygame.display.set_caption("Zad1")
rec_button_color = (0, 255, 0)
rec_button_rect = pygame.Rect(0, rows * square_size, width, square_size)
clear_button_color = (255, 255, 0)
clear_button_rect = pygame.Rect(0, (rows + 1) * square_size + 1, width, square_size)
mouse_pressed = False
drawing_matrix = np.zeros((rows, cols), dtype=int)
def color_square(x, y):
col = x // square_size
row = y // square_size
if 0 <= row < rows and 0 <= col < cols:
drawing_matrix[row, col] = 1
def draw_button(color, rect):
pygame.draw.rect(screen, color, rect)
def on_rec_button():
np_array_representation = drawing_matrix.flatten()
for digit_to_recognize in digits_to_recognize:
p = perceptrons[digit_to_recognize]
predicted_number = p.predict(np_array_representation)
if predicted_number == digit_to_recognize:
print(f"Image has been recognized as number {digit_to_recognize}")
def on_clear_button():
drawing_matrix.fill(0)
while True:
for event in pygame.event.get():
if event.type == pygame.QUIT:
pygame.quit()
sys.exit()
elif event.type == pygame.MOUSEBUTTONDOWN and event.button == 3:
mouse_pressed = True
elif event.type == pygame.MOUSEBUTTONUP and event.button == 3:
mouse_pressed = False
elif event.type == pygame.MOUSEMOTION:
mouse_x, mouse_y = event.pos
if mouse_pressed:
color_square(mouse_x, mouse_y)
elif event.type == pygame.MOUSEBUTTONDOWN and event.button == 1:
if rec_button_rect.collidepoint(event.pos):
on_rec_button()
if clear_button_rect.collidepoint(event.pos):
on_clear_button()
for i in range(rows):
for j in range(cols):
if drawing_matrix[i, j] == 1:
pygame.draw.rect(screen, (255, 0, 0), (j * square_size, i * square_size, square_size, square_size))
else:
pygame.draw.rect(screen, (0, 0, 0), (j * square_size, i * square_size, square_size, square_size))
draw_button(rec_button_color, rec_button_rect)
draw_button(clear_button_color, clear_button_rect)
pygame.display.flip()
so, now that I run the app, draw the digit 3
, and click the green button that runs on_rec_button
function, I expected to see Image has been recognized as number 3
, but I get Image has been recognized as number 0
.
This is what I draw:
These are training data:
These are very small because of the resolution 5x7
that was required in the exercise.
When I draw the digit 1
then I get 2 results:
Image has been recognized as number 0
Image has been recognized as number 1
What should I do to make it work the way I want? I don't expect this to work 100% accurate but I guess it could be better.
Solution
There seems to be a few issues in the code, I will try to address them:
- It's missing the back progation function derivatives, as metioned in comments! Those are very important because they are the ones that guide the correction to the correct dirrection (based on the gradient).
- simillarly, the bias is not calculated correclty.
Here is a working code:
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def sigmoid_derivative(x):
return x * (1 - x)
class Perceptron:
def __init__(self, learning_rate=0.01, n_iters=1000):
self.lr = learning_rate
self.n_iters = n_iters
self.weights = None
self.bias = None
def fit(self, X, y):
n_samples, n_features = X.shape
self.bias = 0
self.weights = np.zeros(n_features)
for _ in range(self.n_iters):
for x_i, y_i in zip(X, y):
linear_output = np.dot(x_i, self.weights) + self.bias
y_predicted = sigmoid(linear_output)
error = y_i - y_predicted
output_error = error * sigmoid_derivative(y_predicted)
self.weights += x_i.T.dot(output_error) * self.lr
self.bias += np.sum(output_error, axis=0, keepdims=True) * self.lr
def predict(self, X):
linear_output = np.dot(X, self.weights) + self.bias
y_predicted = sigmoid(linear_output)
return y_predicted
As the main question is about the perceptron, I prefered to skip the pygame
code. I used from keras.datasets import mnist
to mock the images. The result correlate, given I didn't changed the Percetron class signature or main functionality. Here is the testing code:
from keras.datasets import mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images_resized = np.zeros((train_images.shape[0], 784))
test_images_resized = np.zeros((test_images.shape[0], 784))
for i in range(train_images.shape[0]):
train_images_resized[i] = np.resize(train_images[i]/np.max(train_images[i]), 784).flatten()
for i in range(test_images.shape[0]):
test_images_resized[i] = np.resize(test_images[i]/np.max(train_images[i]), 784).flatten()
desired_digit = 1
train_labels = [ 1 if label == desired_digit else 0 for label in train_labels]
test_labels = [ 1 if label == desired_digit else 0 for label in test_labels]
digits_to_recognize = [desired_digit]
X, y = train_images_resized,train_labels
p = Perceptron(learning_rate=0.05,n_iters=100000)
p.fit(X, y)
Note that I had to normalize (divide the the max value of each image) the input data so that the sigmoid function don't get saturated, making the derivative function = 0.
Results!
p.predict(test_images_resized)
array([0.004823, 0.531128, 0.94834 , 0.000155, 0.002682, 0.981524,
0.008962, 0.067788, 0.017121, 0.00063 ])
test_labels
[0, 0, 1, 0, 0, 1, 0, 0, 0, 0]
Answered By - Vinícius Matos
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.