I was trying to make an OCR neural network using the MNIST OCR image library, however my process was killed every time I ran it by a kernel process called the OOM Killer. It kills processes which use too much memory. I am not sure whether this is because of my code, or something about the backpropagation code. Either way, any help would be appreciated.
also, just to note, when I run the program with the learning sample size cut down to only 250 images, it works, but above 500 it fails.
The C++ File:
#include "ocr.h"
int main(int argc, char const *argv[]) {
std::string lbels = "train-labels.idx1-ubyte";
std::string imges = "train-images.idx3-ubyte";
std::string outputFilename = (argc > 1) ? argv[1] : "ocr.txt";
int mgicNum;
int sizeNum;
std::cout << "Loading images from files..." << std::endl;
auto inputArr = read_mnist_images(imges, mgicNum, sizeNum);
auto outputArr = read_mnist_labels(lbels, mgicNum);
net::NeuralNet neuralNetwork = net::NeuralNet(sizeNum, 10, 1, sizeNum, "sigmoid");
std::vector< std::vector<double> > input;
std::vector< std::vector<double> > correctOutput;
std::cout << "Loading into vector...\n";
for (size_t i = 0; i < mgicNum; i++) {
std::vector<double> imgeArr;
for (size_t j = 0; j < sizeNum; j++) {
imgeArr.push_back(double(inputArr[i][j])/double(255));
}
//std::cout << imgeArr.size() << "; " << sizeNum << "\n";
input.push_back(imgeArr);
correctOutput.push_back(digits(outputArr[i]));
}
std::cout << "Done with loading.\n";
std::cout << "Freeing memory..." << std::endl;
delete [] inputArr; // <- Is this how you use delete? idk
delete [] outputArr;
// free(inputArr);
// free(outputArr);
std::cout << "Done with freeing memory." << std::endl;
std::cout << "Supposed # of samples: " << mgicNum << std::endl;
std::cout << "Actual # of samples: " << input.size() << std::endl;
net::Backpropagation backprop = net::Backpropagation(0.01, 0.9, 0.1, 10);
std::cout << "Inputs: " << neuralNetwork.numberOfInputs() << std::endl;
std::cout << "Hidden: " << neuralNetwork.numberOfHiddenNeurons() << std::endl;
std::cout << "Outputs: " << neuralNetwork.numberOfOutputs() << std::endl;
std::cout << "Input array: " << input[0].size() << std::endl;
std::cout << "Correct array: " << correctOutput[0].size() << std::endl;
if (input.size() != correctOutput.size()) {
throw std::runtime_error("Differing sizes between two of the same thing");
}
/* To decrease memory usage
#define RESIZE_Value 500
// Works at 100, 250
// Killed at 500 and above
std::cout << "Resizing arrays to " << RESIZE_Value << " each..." << std::endl;
input.resize(RESIZE_Value);
correctOutput.resize(RESIZE_Value);
// */
std::cout << "Beginning training..." << std::endl;
backprop.train(&neuralNetwork, input, correctOutput);
std::cout << "Done training. Storing..." << std::endl;
std::ofstream myfile;
myfile.open(outputFilename);
neuralNetwork.store(&myfile);
myfile.close();
std::cout << "Done storing to output file '" << outputFilename << "'. Testing..." << std::endl;
#define TEST_INDEX 23 // Random test index
std::cout << "Test: " << findTop(neuralNetwork.getOutput(input[TEST_INDEX])) << std::endl;
std::cout << "Correct answer: " << findTop(correctOutput[TEST_INDEX]) << std::endl;
return 0;
}
The header file which contains functions for loading test images and picking highest members of arrays:
(The MNIST functions I copied from somewhere else)
#include "include/Fido.h"
#ifndef OCR
#define OCR
typedef unsigned char uchar;
uchar** read_mnist_images(std::string full_path, int& number_of_images, int& image_size) {
auto reverseInt = [](int i) {
unsigned char c1, c2, c3, c4;
c1 = i & 255, c2 = (i >> 8) & 255, c3 = (i >> 16) & 255, c4 = (i >> 24) & 255;
return ((int)c1 << 24) + ((int)c2 << 16) + ((int)c3 << 8) + c4;
};
std::ifstream file(full_path);
if(file.is_open()) {
int magic_number = 0, n_rows = 0, n_cols = 0;
file.read((char *)&magic_number, sizeof(magic_number));
magic_number = reverseInt(magic_number);
if(magic_number != 2051) throw std::runtime_error("Invalid MNIST image file!");
file.read((char *)&number_of_images, sizeof(number_of_images)), number_of_images = reverseInt(number_of_images);
file.read((char *)&n_rows, sizeof(n_rows)), n_rows = reverseInt(n_rows);
file.read((char *)&n_cols, sizeof(n_cols)), n_cols = reverseInt(n_cols);
image_size = n_rows * n_cols;
uchar** _dataset = new uchar*[number_of_images];
for(int i = 0; i < number_of_images; i++) {
_dataset[i] = new uchar[image_size];
file.read((char *)_dataset[i], image_size);
}
return _dataset;
} else {
throw std::runtime_error("Cannot open file `" + full_path + "`!");
}
}
uchar* read_mnist_labels(std::string full_path, int& number_of_labels) {
auto reverseInt = [](int i) {
unsigned char c1, c2, c3, c4;
c1 = i & 255, c2 = (i >> 8) & 255, c3 = (i >> 16) & 255, c4 = (i >> 24) & 255;
return ((int)c1 << 24) + ((int)c2 << 16) + ((int)c3 << 8) + c4;
};
typedef unsigned char uchar;
std::ifstream file(full_path);
if(file.is_open()) {
int magic_number = 0;
file.read((char *)&magic_number, sizeof(magic_number));
magic_number = reverseInt(magic_number);
if(magic_number != 2049) throw std::runtime_error("Invalid MNIST label file!");
file.read((char *)&number_of_labels, sizeof(number_of_labels)), number_of_labels = reverseInt(number_of_labels);
uchar* _dataset = new uchar[number_of_labels];
for(int i = 0; i < number_of_labels; i++) {
file.read((char*)&_dataset[i], 1);
}
return _dataset;
} else {
throw std::runtime_error("Unable to open file `" + full_path + "`!");
}
}
std::vector<double> digits(uchar j) {
std::vector<double> v;
for (size_t i = 0; i < 10; i++) {
if (j == i) {
v.push_back(1);
} else {
v.push_back(0);
}
}
return v;
}
int findTop(std::vector<double> v) {
int best = -1;
double top = -1.0;
for (size_t i = 0; i < 10; i++) {
if (v[i] > top) {
best = i;
top = v[i];
}
}
return best;
}
#endif