Skip to content

Commit

Permalink
add activation functions
Browse files Browse the repository at this point in the history
  • Loading branch information
omoghaoghenemano committed Sep 17, 2024
1 parent b8c0334 commit 5591fb6
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 22 deletions.
6 changes: 6 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"files.associations": {
"iosfwd": "cpp",
"vector": "cpp"
}
}
33 changes: 33 additions & 0 deletions include/activation/activation_functions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#ifndef ACTIVATION_FUNCTIONS_H
#define ACTIVATION_FUNCTIONS_H

#include <vector>
#include <cmath>

namespace activation {

// Sigmoid activation function
inline double sigmoid(double x);

// Derivative of sigmoid function
inline double sigmoid_derivative(double x);

// Tanh activation function
inline double tanh(double x);

// Derivative of tanh function
inline double tanh_derivative(double x);

// ReLU activation function
inline double relu(double x);

// Derivative of ReLU function
inline double relu_derivative(double x);

// Apply an activation function to a vector
template <typename Func>
std::vector<double> apply(const std::vector<double>& inputs, Func func);

} // namespace activation

#endif // ACTIVATION_FUNCTIONS_H
14 changes: 12 additions & 2 deletions include/algorithms/linear_regression.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ namespace algorithms {
class LinearRegression {
public:
// Constructor
LinearRegression() : m_slope(0.0), m_intercept(0.0) {}
LinearRegression() : m_slope(0.0), m_intercept(0.0), m_learning_rate(0.01), m_iterations(1000) {}

// Fit the model to the training data
// Fit the model to the training data using gradient descent
void fit(const std::vector<double>& x, const std::vector<double>& y);

// Predict the output for a given input
Expand All @@ -21,12 +21,22 @@ class LinearRegression {
double getSlope() const { return m_slope; }
double getIntercept() const { return m_intercept; }

// Set learning rate and number of iterations
void setLearningRate(double lr) { m_learning_rate = lr; }
void setIterations(int it) { m_iterations = it; }

private:
double m_slope;
double m_intercept;
double m_learning_rate;
int m_iterations;

// Helper function to compute the mean of a vector
double mean(const std::vector<double>& v) const;

// Helper functions for gradient descent
double computeCost(const std::vector<double>& x, const std::vector<double>& y) const;
void gradientDescent(const std::vector<double>& x, const std::vector<double>& y);
};

} // namespace algorithms
Expand Down
51 changes: 51 additions & 0 deletions src/activation/activation_functions.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#include "activation/activation_functions.h"

namespace activation {

// Sigmoid activation function
inline double sigmoid(double x) {
return 1.0 / (1.0 + std::exp(-x));
}

// Derivative of sigmoid function
inline double sigmoid_derivative(double x) {
double sig = sigmoid(x);
return sig * (1.0 - sig);
}

// Tanh activation function
inline double tanh(double x) {
return std::tanh(x);
}

// Derivative of tanh function
inline double tanh_derivative(double x) {
double tanh_x = tanh(x);
return 1.0 - tanh_x * tanh_x;
}

// ReLU activation function
inline double relu(double x) {
return std::max(0.0, x);
}

// Derivative of ReLU function
inline double relu_derivative(double x) {
return (x > 0) ? 1.0 : 0.0;
}

// Apply an activation function to a vector
template <typename Func>
std::vector<double> apply(const std::vector<double>& inputs, Func func) {
std::vector<double> result;
result.reserve(inputs.size());
for (double input : inputs) {
result.push_back(func(input));
}
return result;
}

// Explicit template instantiations
template std::vector<double> apply(const std::vector<double>& inputs, double (*func)(double));

} // namespace activation
66 changes: 46 additions & 20 deletions src/algorithms/linear_regression.cpp
Original file line number Diff line number Diff line change
@@ -1,38 +1,64 @@
#include "algorithms/linear_regression.h"
#include <numeric> // for std::accumulate
#include <numeric>
#include <cmath>

namespace algorithms {

void LinearRegression::fit(const std::vector<double>& x, const std::vector<double>& y) {
if (x.size() != y.size() || x.empty()) {
throw std::invalid_argument("Input vectors must be of the same size and non-empty.");
// Compute the mean of a vector
double LinearRegression::mean(const std::vector<double>& v) const {
return std::accumulate(v.begin(), v.end(), 0.0) / v.size();
}

// Compute the cost (Mean Squared Error)
double LinearRegression::computeCost(const std::vector<double>& x, const std::vector<double>& y) const {
double total_error = 0.0;
size_t n = x.size();
for (size_t i = 0; i < n; ++i) {
double prediction = m_slope * x[i] + m_intercept;
double error = prediction - y[i];
total_error += error * error;
}
return total_error / (2 * n); // Mean Squared Error
}

double x_mean = mean(x);
double y_mean = mean(y);
// Perform gradient descent to optimize slope and intercept
void LinearRegression::gradientDescent(const std::vector<double>& x, const std::vector<double>& y) {
size_t n = x.size();
for (int i = 0; i < m_iterations; ++i) {
double slope_gradient = 0.0;
double intercept_gradient = 0.0;
for (size_t j = 0; j < n; ++j) {
double prediction = m_slope * x[j] + m_intercept;
double error = prediction - y[j];
slope_gradient += error * x[j];
intercept_gradient += error;
}
slope_gradient /= n;
intercept_gradient /= n;

double numerator = 0.0;
double denominator = 0.0;
m_slope -= m_learning_rate * slope_gradient;
m_intercept -= m_learning_rate * intercept_gradient;

for (size_t i = 0; i < x.size(); ++i) {
numerator += (x[i] - x_mean) * (y[i] - y_mean);
denominator += (x[i] - x_mean) * (x[i] - x_mean);
// Optional: Print cost every 100 iterations
if (i % 100 == 0) {
double cost = computeCost(x, y);
//uncomment to see cost progress
// std::cout << "Iteration " << i << ": Cost " << cost << std::endl;
}
}
}

if (denominator == 0.0) {
throw std::runtime_error("Denominator in slope calculation is zero.");
// Fit the model using gradient descent
void LinearRegression::fit(const std::vector<double>& x, const std::vector<double>& y) {
if (x.size() != y.size()) {
throw std::invalid_argument("Input vectors must have the same size.");
}

m_slope = numerator / denominator;
m_intercept = y_mean - m_slope * x_mean;
gradientDescent(x, y);
}

// Predict the output for a given input
double LinearRegression::predict(double x) const {
return m_slope * x + m_intercept;
}

double LinearRegression::mean(const std::vector<double>& v) const {
return std::accumulate(v.begin(), v.end(), 0.0) / v.size();
}

} // namespace algorithms

0 comments on commit 5591fb6

Please sign in to comment.