Create an intermediate representation.

This way the visualiser does not need to know all about caffe, and can just work on the intermediate representation which is a lot easier on the compiler.
2017-10-12 14:21:53 +02:00
parent c725fcb8df
commit 19390f8d4f
6 changed files with 157 additions and 31 deletions
--- a/src/LayerData.cpp
+++ b/src/LayerData.cpp
@@ -0,0 +1,65 @@
 #include <cstring>
 #include <functional>
 #include <iostream>
 #include <numeric>
 #include <glog/logging.h>
 #include "LayerData.hpp"
 using namespace fmri;
 using namespace std;
 LayerData::LayerData(const string& name, const vector<int>& shape, const DType* data, Type type) :
 	name_(name),
 	shape_(shape),
 	type_(type)
 {
 	const auto dataSize = numEntries();
 	// Compute the dimension of the data area
 	data_.reset(new DType[dataSize]);
 	// Copy the data over with memcpy because it's just faster that way
 	memcpy(data_.get(), data, sizeof(DType) * dataSize);
 }
 size_t LayerData::numEntries() const
 {
 	return accumulate(shape_.begin(), shape_.end(), 1, multiplies<>());
 }
 const vector<int>& LayerData::shape() const
 {
 	return shape_;
 }
 typename LayerData::Type LayerData::type() const
 {
 	return type_;
 }
 const string& LayerData::name() const
 {
 	return name_;
 }
 DType const * LayerData::data() const
 {
 	return data_.get();
 }
 LayerData::Type LayerData::typeFromString(string_view typeName)
 {
 	if (typeName == "Input") {
 		return Type::Input;
 	} else if (typeName == "Convolution") {
 		return Type::Convolutional;
 	} else if (typeName == "ReLU") {
 		return Type::ReLU;
 	} else if (typeName == "Pooling") {
 		return Type::Pooling;
 	} else {
 		LOG(INFO) << "Received unknown layer type: " << typeName << endl;
 		return Type::Other;
 	}
 }
--- a/src/LayerData.hpp
+++ b/src/LayerData.hpp
@@ -0,0 +1,50 @@
 #pragma once
 #include <memory>
 #include <string>
 #include <string_view>
 #include <vector>
 #include "utils.hpp"
 namespace fmri {
 	using std::string;
 	using std::string_view;
 	using std::unique_ptr;
 	using std::vector;
 	class LayerData
 	{
 		public:
 			enum class Type {
 				Input,
 				Convolutional,
 				ReLU,
 				Pooling,
 				Output,
 				Other
 			};
 			LayerData(const string& name, const vector<int>& shape, const DType* data, Type type);
 			LayerData(const LayerData&) = delete;
 			LayerData(LayerData&&) = default;
 			LayerData& operator=(const LayerData&) = delete;
 			LayerData& operator=(LayerData&&) = default;
 			const string& name() const;
 			Type type() const;
 			const vector<int>& shape() const;
 			DType const * data() const;
 			size_t numEntries() const;
 			static Type typeFromString(string_view name);
 		private:
 			string name_;
 			vector<int> shape_;
 			unique_ptr<DType[]> data_;
 			Type type_;
 	};
 }
--- a/src/Simulator.cpp
+++ b/src/Simulator.cpp
@@ -28,8 +28,10 @@ Simulator::Simulator(const string& model_file, const string& weights_file, const
 }
-vector<Simulator::DType> Simulator::simulate(const string& image_file)
+vector<LayerData> Simulator::simulate(const string& image_file)
 {
 	typedef LayerData::Type LType;
 	cv::Mat im = cv::imread(image_file, -1);
    assert(!im.empty());
@@ -41,10 +43,20 @@ vector<Simulator::DType> Simulator::simulate(const string& image_file)
    net.Forward();
-    Blob<DType> *output_layer = net.output_blobs()[0];
+	vector<LayerData> result;
-    const DType *begin = output_layer->cpu_data();
+
-    const DType *end = begin + output_layer->channels();
+    Blob<DType>* input_layer = net.input_blobs()[0];
-    vector<DType> result(begin, end);
+
 	const auto& names = net.layer_names();
 	const auto& results = net.top_vecs();
 	const auto& layers = net.layers();
 	for (unsigned int i = 0; i < names.size(); ++i) {
 		CHECK_EQ(results[i].size(), 1) << "Multiple outputs per layer are not supported!" << endl;
 		const auto blob = results[i][0];
 		result.emplace_back(names[i], blob->shape(), blob->cpu_data(), LayerData::typeFromString(layers[i]->type()));
 	}
    return result;
 }
@@ -67,10 +79,6 @@ vector<cv::Mat> Simulator::getWrappedInputLayer()
 }
 static cv::Mat fix_channels(const int num_channels, cv::Mat original) {
    if (num_channels == original.channels()) {
        return original;
    }
    cv::Mat converted;
    if (num_channels == 1 && original.channels() == 3) {
@@ -82,8 +90,8 @@ static cv::Mat fix_channels(const int num_channels, cv::Mat original) {
    } else if (num_channels == 3 && original.channels() == 4) {
        cv::cvtColor(original, converted, cv::COLOR_BGRA2BGR);
    } else {
-        // Don't know how to convert.
+		CHECK(num_channels == original.channels()) << "Cannot convert between channel types. ";
-        abort();
+        return original;
    }
    return converted;
--- a/src/Simulator.hpp
+++ b/src/Simulator.hpp
@@ -9,17 +9,18 @@
 #include <opencv2/highgui/highgui.hpp>
 #include <opencv2/imgproc/imgproc.hpp>
 #include "utils.hpp"
 #include "LayerData.hpp"
 namespace fmri {
    using std::string;
    using std::vector;
    class Simulator {
    public:
        typedef float DType;
        Simulator(const string &model_file, const string &weights_file, const string &means_file = "");
-        vector<DType> simulate(const string &input_file);
+        vector<LayerData> simulate(const string &input_file);
    private:
        caffe::Net<DType> net;
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -8,7 +8,7 @@ using namespace std;
 using namespace fmri;
 int main(int argc, char *const argv[]) {
-    ::google::InitGoogleLogging(argv[0]);
+    google::InitGoogleLogging(argv[0]);
    Options options = Options::parse(argc, argv);
    vector<string> labels;
@@ -18,23 +18,24 @@ int main(int argc, char *const argv[]) {
    Simulator simulator(options.model(), options.weights(), options.means());
-    for (const auto &image : options.inputs()) {
+	for (const auto &image : options.inputs()) {
-        cout << "Result for " << image << ":" << endl;
+		const auto res = simulator.simulate(image);
-        auto res = simulator.simulate(image);
+		LOG(INFO) << "Result for " << image << ":" << endl;
        if (!labels.empty()) {
            auto scores = combine(res, labels);
            sort(scores.begin(), scores.end(), greater<>());
            for (unsigned int i = 0; i < scores.size() && i < 5; ++i) {
                cout << scores[i].first << " " << scores[i].second << endl;
            }
        } else {
            cout << "Best result: " << *(max_element(res.begin(), res.end())) << endl;
        }
-        cout << endl;
+		const auto& resultRow = res[res.size() - 1];
-    }
+		if (!labels.empty()) {
 			vector<DType> weights(resultRow.data(), resultRow.data() + resultRow.numEntries());
 			auto scores = combine(weights, labels);
 			sort(scores.begin(), scores.end(), greater<>());
 			for (unsigned int i = 0; i < scores.size() && i < 5; ++i) {
 				LOG(INFO) << scores[i].first << " " << scores[i].second << endl;
 			}
 		} else {
 			LOG(INFO) << "Best result: " << *(resultRow.data(), resultRow.data() + resultRow.numEntries()) << endl;
 		}
 	}
-    ::google::ShutdownGoogleLogging();
+    google::ShutdownGoogleLogging();
    return 0;
 }
--- a/src/utils.hpp
+++ b/src/utils.hpp
@@ -10,6 +10,7 @@
 namespace fmri
 {
 	typedef float DType;
    template<class T>
    inline T identity(T t) {
@@ -57,4 +58,4 @@ namespace fmri
        return res;
    }
-}
+}