Create an intermediate representation.

This way the visualiser does not need to know all about caffe, and can just work on the intermediate representation which is a lot easier on the compiler.
2017-10-12 14:21:53 +02:00
parent c725fcb8df
commit 19390f8d4f
6 changed files with 157 additions and 31 deletions
--- a/src/LayerData.cpp
+++ b/src/LayerData.cpp
@@ -0,0 +1,65 @@
+#include <cstring>
+#include <functional>
+#include <iostream>
+#include <numeric>
+
+#include <glog/logging.h>
+
+#include "LayerData.hpp"
+
+using namespace fmri;
+using namespace std;
+
+LayerData::LayerData(const string& name, const vector<int>& shape, const DType* data, Type type) :
+	name_(name),
+	shape_(shape),
+	type_(type)
+{
+	const auto dataSize = numEntries();
+	// Compute the dimension of the data area
+	data_.reset(new DType[dataSize]);
+
+	// Copy the data over with memcpy because it's just faster that way
+	memcpy(data_.get(), data, sizeof(DType) * dataSize);
+}
+
+size_t LayerData::numEntries() const
+{
+	return accumulate(shape_.begin(), shape_.end(), 1, multiplies<>());
+}
+
+const vector<int>& LayerData::shape() const
+{
+	return shape_;
+}
+
+typename LayerData::Type LayerData::type() const
+{
+	return type_;
+}
+
+const string& LayerData::name() const
+{
+	return name_;
+}
+
+DType const * LayerData::data() const
+{
+	return data_.get();
+}
+
+LayerData::Type LayerData::typeFromString(string_view typeName)
+{
+	if (typeName == "Input") {
+		return Type::Input;
+	} else if (typeName == "Convolution") {
+		return Type::Convolutional;
+	} else if (typeName == "ReLU") {
+		return Type::ReLU;
+	} else if (typeName == "Pooling") {
+		return Type::Pooling;
+	} else {
+		LOG(INFO) << "Received unknown layer type: " << typeName << endl;
+		return Type::Other;
+	}
+}
--- a/src/LayerData.hpp
+++ b/src/LayerData.hpp
@@ -0,0 +1,50 @@
+#pragma once
+
+#include <memory>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "utils.hpp"
+
+namespace fmri {
+
+	using std::string;
+	using std::string_view;
+	using std::unique_ptr;
+	using std::vector;
+
+	class LayerData
+	{
+		public:
+			enum class Type {
+				Input,
+				Convolutional,
+				ReLU,
+				Pooling,
+				Output,
+				Other
+			};
+
+			LayerData(const string& name, const vector<int>& shape, const DType* data, Type type);
+			LayerData(const LayerData&) = delete;
+			LayerData(LayerData&&) = default;
+
+			LayerData& operator=(const LayerData&) = delete;
+			LayerData& operator=(LayerData&&) = default;
+
+			const string& name() const;
+			Type type() const;
+			const vector<int>& shape() const;
+			DType const * data() const;
+			size_t numEntries() const;
+
+			static Type typeFromString(string_view name);
+
+		private:
+			string name_;
+			vector<int> shape_;
+			unique_ptr<DType[]> data_;
+			Type type_;
+	};
+}
--- a/src/Simulator.cpp
+++ b/src/Simulator.cpp
@@ -28,8 +28,10 @@ Simulator::Simulator(const string& model_file, const string& weights_file, const

 }

-vector<Simulator::DType> Simulator::simulate(const string& image_file)
+vector<LayerData> Simulator::simulate(const string& image_file)
 {
+	typedef LayerData::Type LType;
+
 	cv::Mat im = cv::imread(image_file, -1);

    assert(!im.empty());
@@ -41,10 +43,20 @@ vector<Simulator::DType> Simulator::simulate(const string& image_file)

    net.Forward();

-    Blob<DType> *output_layer = net.output_blobs()[0];
-    const DType *begin = output_layer->cpu_data();
-    const DType *end = begin + output_layer->channels();
-    vector<DType> result(begin, end);
+	vector<LayerData> result;
+
+    Blob<DType>* input_layer = net.input_blobs()[0];
+
+	const auto& names = net.layer_names();
+	const auto& results = net.top_vecs();
+	const auto& layers = net.layers();
+
+	for (unsigned int i = 0; i < names.size(); ++i) {
+		CHECK_EQ(results[i].size(), 1) << "Multiple outputs per layer are not supported!" << endl;
+		const auto blob = results[i][0];
+
+		result.emplace_back(names[i], blob->shape(), blob->cpu_data(), LayerData::typeFromString(layers[i]->type()));
+	}

    return result;
 }
@@ -67,10 +79,6 @@ vector<cv::Mat> Simulator::getWrappedInputLayer()
 }

 static cv::Mat fix_channels(const int num_channels, cv::Mat original) {
-    if (num_channels == original.channels()) {
-        return original;
-    }
-
    cv::Mat converted;

    if (num_channels == 1 && original.channels() == 3) {
@@ -82,8 +90,8 @@ static cv::Mat fix_channels(const int num_channels, cv::Mat original) {
    } else if (num_channels == 3 && original.channels() == 4) {
        cv::cvtColor(original, converted, cv::COLOR_BGRA2BGR);
    } else {
-        // Don't know how to convert.
-        abort();
+		CHECK(num_channels == original.channels()) << "Cannot convert between channel types. ";
+        return original;
    }

    return converted;
--- a/src/Simulator.hpp
+++ b/src/Simulator.hpp
@@ -9,17 +9,18 @@
 #include <opencv2/highgui/highgui.hpp>
 #include <opencv2/imgproc/imgproc.hpp>

+#include "utils.hpp"
+#include "LayerData.hpp"
+
 namespace fmri {
    using std::string;
    using std::vector;

    class Simulator {
    public:
-        typedef float DType;
-
        Simulator(const string &model_file, const string &weights_file, const string &means_file = "");

-        vector<DType> simulate(const string &input_file);
+        vector<LayerData> simulate(const string &input_file);

    private:
        caffe::Net<DType> net;
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -8,7 +8,7 @@ using namespace std;
 using namespace fmri;

 int main(int argc, char *const argv[]) {
-    ::google::InitGoogleLogging(argv[0]);
+    google::InitGoogleLogging(argv[0]);

    Options options = Options::parse(argc, argv);
    vector<string> labels;
@@ -18,23 +18,24 @@ int main(int argc, char *const argv[]) {

    Simulator simulator(options.model(), options.weights(), options.means());

-    for (const auto &image : options.inputs()) {
-        cout << "Result for " << image << ":" << endl;
-        auto res = simulator.simulate(image);
-        if (!labels.empty()) {
-            auto scores = combine(res, labels);
-            sort(scores.begin(), scores.end(), greater<>());
-            for (unsigned int i = 0; i < scores.size() && i < 5; ++i) {
-                cout << scores[i].first << " " << scores[i].second << endl;
-            }
-        } else {
-            cout << "Best result: " << *(max_element(res.begin(), res.end())) << endl;
-        }
+	for (const auto &image : options.inputs()) {
+		const auto res = simulator.simulate(image);
+		LOG(INFO) << "Result for " << image << ":" << endl;

-        cout << endl;
-    }
+		const auto& resultRow = res[res.size() - 1];
+		if (!labels.empty()) {
+			vector<DType> weights(resultRow.data(), resultRow.data() + resultRow.numEntries());
+			auto scores = combine(weights, labels);
+			sort(scores.begin(), scores.end(), greater<>());
+			for (unsigned int i = 0; i < scores.size() && i < 5; ++i) {
+				LOG(INFO) << scores[i].first << " " << scores[i].second << endl;
+			}
+		} else {
+			LOG(INFO) << "Best result: " << *(resultRow.data(), resultRow.data() + resultRow.numEntries()) << endl;
+		}
+	}

-    ::google::ShutdownGoogleLogging();
+    google::ShutdownGoogleLogging();

    return 0;
 }
--- a/src/utils.hpp
+++ b/src/utils.hpp
@@ -10,6 +10,7 @@

 namespace fmri
 {
+	typedef float DType;

    template<class T>
    inline T identity(T t) {