machine_learning/neural_network.cpp
/*******************************************************
* Copyright (c) 2019, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <arrayfire.h>
#include <math.h>
#include <stdio.h>
#include <af/util.h>
#include <string>
#include <vector>
#include "mnist_common.h"
using namespace af;
using std::vector;
std::string toStr(const dtype dt) {
switch (dt) {
case f32: return "f32";
case f16: return "f16";
default: return "N/A";
}
}
float accuracy(const array &predicted, const array &target) {
array val, plabels, tlabels;
max(val, tlabels, target, 1);
max(val, plabels, predicted, 1);
return 100 * count<float>(plabels == tlabels) / tlabels.elements();
}
// Derivative of the activation function
array deriv(const array &out) { return out * (1 - out); }
// Cost function
double error(const array &out, const array &pred) {
array dif = (out - pred);
return sqrt((double)(sum<float>(dif * dif)));
}
class ann {
private:
int num_layers;
vector<array> weights;
dtype datatype;
// Add bias input to the output from previous layer
array add_bias(const array &in);
vector<array> forward_propagate(const array &input);
void back_propagate(const vector<array> signal, const array &pred,
const double &alpha);
public:
// Create a network with given parameters
ann(vector<int> layers, double range, dtype dt = f32);
// Output after single pass of forward propagation
array predict(const array &input);
// Method to train the neural net
double train(const array &input, const array &target, double alpha = 1.0,
int max_epochs = 300, int batch_size = 100,
double maxerr = 1.0, bool verbose = false);
};
array ann::add_bias(const array &in) {
// Bias input is added on top of given input
return join(1, constant(1, in.dims(0), 1, datatype), in);
}
vector<array> ann::forward_propagate(const array &input) {
// Get activations at each layer
vector<array> signal(num_layers);
signal[0] = input;
for (int i = 0; i < num_layers - 1; i++) {
array in = add_bias(signal[i]);
array out = matmul(in, weights[i]);
signal[i + 1] = sigmoid(out);
}
return signal;
}
void ann::back_propagate(const vector<array> signal, const array &target,
const double &alpha) {
// Get error for output layer
array out = signal[num_layers - 1];
array err = (out - target);
int m = target.dims(0);
for (int i = num_layers - 2; i >= 0; i--) {
array in = add_bias(signal[i]);
array delta = (deriv(out) * err).T();
// Adjust weights
array tg = alpha * matmul(delta, in);
array grad = -(tg) / m;
weights[i] += grad.T();
// Input to current layer is output of previous
out = signal[i];
err = matmulTT(delta, weights[i]);
// Remove the error of bias and propagate backward
err = err(span, seq(1, out.dims(1)));
}
}
ann::ann(vector<int> layers, double range, dtype dt)
: num_layers(layers.size()), weights(layers.size() - 1), datatype(dt) {
std::cout
<< "Initializing weights using a random uniformly distribution between "
<< -range / 2 << " and " << range / 2 << " at precision "
<< toStr(datatype) << std::endl;
for (int i = 0; i < num_layers - 1; i++) {
weights[i] = range * randu(layers[i] + 1, layers[i + 1]) - range / 2;
if (datatype != f32) weights[i] = weights[i].as(datatype);
}
}
array ann::predict(const array &input) {
vector<array> signal = forward_propagate(input);
array out = signal[num_layers - 1];
return out;
}
double ann::train(const array &input, const array &target, double alpha,
int max_epochs, int batch_size, double maxerr, bool verbose) {
const int num_samples = input.dims(0);
const int num_batches = num_samples / batch_size;
double err = 0;
// Training the entire network
for (int i = 0; i < max_epochs; i++) {
for (int j = 0; j < num_batches - 1; j++) {
int st = j * batch_size;
int en = st + batch_size - 1;
array x = input(seq(st, en), span);
array y = target(seq(st, en), span);
// Propagate the inputs forward
vector<array> signals = forward_propagate(x);
array out = signals[num_layers - 1];
// Propagate the error backward
back_propagate(signals, y, alpha);
}
// Validate with last batch
int st = (num_batches - 1) * batch_size;
int en = num_samples - 1;
array out = predict(input(seq(st, en), span));
err = error(out, target(seq(st, en), span));
// Check if convergence criteria has been met
if (err < maxerr) {
printf("Converged on Epoch: %4d\n", i + 1);
return err;
}
if (verbose) {
if ((i + 1) % 10 == 0)
printf("Epoch: %4d, Error: %0.4f\n", i + 1, err);
}
}
return err;
}
int ann_demo(bool console, int perc, const dtype dt) {
printf("** ArrayFire ANN Demo **\n\n");
array train_images, test_images;
array train_target, test_target;
int num_classes, num_train, num_test;
// Load mnist data
float frac = (float)(perc) / 100.0;
setup_mnist<true>(&num_classes, &num_train, &num_test, train_images,
test_images, train_target, test_target, frac);
if (dt != f32) {
train_images = train_images.as(dt);
test_images = test_images.as(dt);
train_target = train_target.as(dt);
}
int feature_size = train_images.elements() / num_train;
// Reshape images into feature vectors
array train_feats = moddims(train_images, feature_size, num_train).T();
array test_feats = moddims(test_images, feature_size, num_test).T();
train_target = train_target.T();
test_target = test_target.T();
// Network parameters
vector<int> layers;
layers.push_back(train_feats.dims(1));
layers.push_back(100);
layers.push_back(50);
layers.push_back(num_classes);
// Create network: architecture, range, datatype
ann network(layers, 0.05, dt);
// Train network
timer::start();
network.train(train_feats, train_target,
2.0, // learning rate / alpha
250, // max epochs
100, // batch size
0.5, // max error
true); // verbose
double train_time = timer::stop();
// Run the trained network and test accuracy.
array train_output = network.predict(train_feats);
array test_output = network.predict(test_feats);
// Benchmark prediction
timer::start();
for (int i = 0; i < 100; i++) { network.predict(test_feats); }
double test_time = timer::stop() / 100;
printf("\nTraining set:\n");
printf("Accuracy on training data: %2.2f\n",
accuracy(train_output, train_target));
printf("\nTest set:\n");
printf("Accuracy on testing data: %2.2f\n",
accuracy(test_output, test_target));
printf("\nTraining time: %4.4lf s\n", train_time);
printf("Prediction time: %4.4lf s\n\n", test_time);
if (!console) {
// Get 20 random test images.
test_output = test_output.T();
display_results<true>(test_images, test_output, test_target.T(), 20);
}
return 0;
}
int main(int argc, char **argv) {
// usage: neural_network_xxx (device) (console on/off) (percentage
// training/test set) (f32|f16)
int device = argc > 1 ? atoi(argv[1]) : 0;
bool console = argc > 2 ? argv[2][0] == '-' : false;
int perc = argc > 3 ? atoi(argv[3]) : 60;
if (perc < 0 || perc > 100) {
std::cerr << "Bad perc arg: " << perc << std::endl;
return EXIT_FAILURE;
}
std::string dts = argc > 4 ? argv[4] : "f32";
dtype dt = f32;
if (dts == "f16")
dt = f16;
else if (dts != "f32") {
std::cerr << "Unsupported datatype " << dts << ". Supported: f32 or f16"
<< std::endl;
return EXIT_FAILURE;
}
if (dts == "f16" && !af::isHalfAvailable(device)) {
std::cerr << "Half not available for device " << device << std::endl;
return EXIT_FAILURE;
}
try {
af::setDevice(device);
return ann_demo(console, perc, dt);
} catch (af::exception &ae) { std::cerr << ae.what() << std::endl; }
return 0;
}
dim4 dims() const
Get dimensions of the array.
const array as(dtype type) const
Casts the array into another data type.
array T() const
Get the transposed the array.
An ArrayFire exception class.
Definition: exception.h:22
virtual const char * what() const
Returns an error message for the exception in a string format.
Definition: exception.h:46
@ f32
32-bit floating point values
Definition: defines.h:211
@ f16
16-bit floating point value
Definition: defines.h:228
AFAPI array sqrt(const array &in)
C++ Interface for square root of input.
AFAPI array matmul(const array &lhs, const array &rhs, const matProp optLhs=AF_MAT_NONE, const matProp optRhs=AF_MAT_NONE)
Matrix multiply of two arrays.
AFAPI array matmulTT(const array &lhs, const array &rhs)
Matrix multiply of two arrays.
AFAPI void grad(array &dx, array &dy, const array &in)
C++ Interface for calculating the gradients.
array constant(T val, const dim4 &dims, const dtype ty=(af_dtype) dtype_traits< T >::ctype)
AFAPI array range(const dim4 &dims, const int seq_dim=-1, const dtype ty=f32)
AFAPI bool isHalfAvailable(const int device)
Queries the current device for half precision floating point support.
AFAPI void info()
AFAPI void setDevice(const int device)
Sets the current device.
AFAPI void sync(const int device=-1)
Blocks until the device is finished processing.
AFAPI array join(const int dim, const array &first, const array &second)
Join 2 arrays along dim.
AFAPI array moddims(const array &in, const unsigned ndims, const dim_t *const dims)
static af::array array(af::dim4 idims, cl_mem buf, af::dtype type, bool retain=false)
Create an af::array object from an OpenCL cl_mem buffer.
Definition: opencl.h:327
AFAPI array randu(const dim4 &dims, const dtype ty, randomEngine &r)
AFAPI array max(const array &in, const int dim=-1)
C++ Interface for maximum values in an array.
Definition: algorithm.h:15
af_dtype dtype
Definition: defines.h:549
AFAPI array sigmoid(const array &in)
C++ Interface for calculating sigmoid function of an array.
AFAPI seq span
A special value representing the entire axis of an af::array.