From e7f543bb264ad8597a3edaf3b938e9c3cc57bf33 Mon Sep 17 00:00:00 2001
From: ih4cku <ih4cku@gmail.com>
Date: Wed, 17 Jun 2015 12:15:28 +0800
Subject: [PATCH 001/264] register a dummy reducer to prevent mincepie runtime
 error

---
 tools/extra/resize_and_crop_images.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/extra/resize_and_crop_images.py b/tools/extra/resize_and_crop_images.py
index c844f590c06..fd2c3134edb 100755
--- a/tools/extra/resize_and_crop_images.py
+++ b/tools/extra/resize_and_crop_images.py
@@ -101,7 +101,7 @@ def map(self, key, value):
         yield value, FLAGS.output_folder
 
 mapreducer.REGISTER_DEFAULT_MAPPER(ResizeCropImagesMapper)
-
+mapreducer.REGISTER_DEFAULT_REDUCER(mapreducer.NoPassReducer)
 mapreducer.REGISTER_DEFAULT_READER(mapreducer.FileReader)
 mapreducer.REGISTER_DEFAULT_WRITER(mapreducer.FileWriter)
  

From ac87850887f064752c2ad815367484c07eaf5449 Mon Sep 17 00:00:00 2001
From: Marco Castelluccio <mcastelluccio@mozilla.com>
Date: Wed, 26 Aug 2015 19:03:59 -0700
Subject: [PATCH 002/264] No need to squeeze the output of the network

---
 python/caffe/detector.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/caffe/detector.py b/python/caffe/detector.py
index 75cd3b1202f..ef1f91730bf 100644
--- a/python/caffe/detector.py
+++ b/python/caffe/detector.py
@@ -83,7 +83,7 @@ def detect_windows(self, images_windows):
         for ix, window_in in enumerate(window_inputs):
             caffe_in[ix] = self.transformer.preprocess(in_, window_in)
         out = self.forward_all(**{in_: caffe_in})
-        predictions = out[self.outputs[0]].squeeze(axis=(2, 3))
+        predictions = out[self.outputs[0]]
 
         # Package predictions with images and windows.
         detections = []

From 10725393518df14b9b6976686f72fae792c3f393 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Mon, 5 Oct 2015 15:46:54 -0700
Subject: [PATCH 003/264] NetSpec: type-check Function inputs (they must be Top
 instances)

---
 python/caffe/net_spec.py           | 4 ++++
 python/caffe/test/test_net_spec.py | 8 ++++++++
 2 files changed, 12 insertions(+)

diff --git a/python/caffe/net_spec.py b/python/caffe/net_spec.py
index 93fc01927db..b6520627a4b 100644
--- a/python/caffe/net_spec.py
+++ b/python/caffe/net_spec.py
@@ -103,6 +103,10 @@ class Function(object):
 
     def __init__(self, type_name, inputs, params):
         self.type_name = type_name
+        for index, input in enumerate(inputs):
+            if not isinstance(input, Top):
+                raise TypeError('%s input %d is not a Top (type is %s)' %
+                                (type_name, index, type(input)))
         self.inputs = inputs
         self.params = params
         self.ntop = self.params.get('ntop', 1)
diff --git a/python/caffe/test/test_net_spec.py b/python/caffe/test/test_net_spec.py
index fee3c0aaebe..ffe71bacb08 100644
--- a/python/caffe/test/test_net_spec.py
+++ b/python/caffe/test/test_net_spec.py
@@ -79,3 +79,11 @@ def test_zero_tops(self):
         net_proto = silent_net()
         net = self.load_net(net_proto)
         self.assertEqual(len(net.forward()), 0)
+
+    def test_type_error(self):
+        """Test that a TypeError is raised when a Function input isn't a Top."""
+        data = L.DummyData(ntop=2)  # data is a 2-tuple of Tops
+        r = r"^Silence input 0 is not a Top \(type is <(type|class) 'tuple'>\)$"
+        with self.assertRaisesRegexp(TypeError, r):
+            L.Silence(data, ntop=0)  # should raise: data is a tuple, not a Top
+        L.Silence(*data, ntop=0)  # shouldn't raise: each elt of data is a Top

From 5395cc66d68df74ff5d0920ed80eabcdd439c660 Mon Sep 17 00:00:00 2001
From: ixartz <contact@ixartz.com>
Date: Mon, 2 Nov 2015 23:07:45 -0500
Subject: [PATCH 004/264] OSX 10.10 (and more) use Accelerate Framework instead
 of veclib

---
 cmake/Dependencies.cmake             | 6 ++++++
 include/caffe/util/mkl_alternate.hpp | 5 +++++
 2 files changed, 11 insertions(+)

diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index 51a803c1a73..64e6500ed3d 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -102,6 +102,12 @@ elseif(APPLE)
   find_package(vecLib REQUIRED)
   include_directories(SYSTEM ${vecLib_INCLUDE_DIR})
   list(APPEND Caffe_LINKER_LIBS ${vecLib_LINKER_LIBS})
+
+  if(VECLIB_FOUND)
+    if(NOT vecLib_INCLUDE_DIR MATCHES "^/System/Library/Frameworks/vecLib.framework.*")
+      add_definitions(-DUSE_ACCELERATE)
+    endif()
+  endif()
 endif()
 
 # ---[ Python
diff --git a/include/caffe/util/mkl_alternate.hpp b/include/caffe/util/mkl_alternate.hpp
index 3355b6658a3..95df0f93b5e 100644
--- a/include/caffe/util/mkl_alternate.hpp
+++ b/include/caffe/util/mkl_alternate.hpp
@@ -7,9 +7,14 @@
 
 #else  // If use MKL, simply include the MKL header
 
+#ifdef USE_ACCELERATE
+#include <Accelerate/Accelerate.h>
+#else
 extern "C" {
 #include <cblas.h>
 }
+#endif  // USE_ACCELERATE
+
 #include <math.h>
 
 // Functions that caffe uses but are not present if MKL is not linked.

From 37413f9d0b102950dc6b94e52e367c762974f02a Mon Sep 17 00:00:00 2001
From: Tea <tea.desouza@gmail.com>
Date: Mon, 23 Nov 2015 11:36:46 +0800
Subject: [PATCH 005/264] Scope macros inside switch

---
 src/caffe/util/hdf5.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/caffe/util/hdf5.cpp b/src/caffe/util/hdf5.cpp
index 7730e76ab87..d255877b63b 100644
--- a/src/caffe/util/hdf5.cpp
+++ b/src/caffe/util/hdf5.cpp
@@ -29,10 +29,10 @@ void hdf5_load_nd_dataset_helper(
   CHECK_GE(status, 0) << "Failed to get dataset info for " << dataset_name_;
   switch (class_) {
   case H5T_FLOAT:
-    LOG_FIRST_N(INFO, 1) << "Datatype class: H5T_FLOAT";
+    { LOG_FIRST_N(INFO, 1) << "Datatype class: H5T_FLOAT"; }
     break;
   case H5T_INTEGER:
-    LOG_FIRST_N(INFO, 1) << "Datatype class: H5T_INTEGER";
+    { LOG_FIRST_N(INFO, 1) << "Datatype class: H5T_INTEGER"; }
     break;
   case H5T_TIME:
     LOG(FATAL) << "Unsupported datatype class: H5T_TIME";

From 52dcf4801dddf05df3ddef238895cabbc6c4384a Mon Sep 17 00:00:00 2001
From: Azat <davletag@mail.ru>
Date: Thu, 3 Dec 2015 13:56:48 +0300
Subject: [PATCH 006/264] sigmoid fix (cu)

Previous implementation caused FP overflow for x less than -90
---
 src/caffe/layers/sigmoid_layer.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/caffe/layers/sigmoid_layer.cu b/src/caffe/layers/sigmoid_layer.cu
index 184c61ede83..8a4ea6616e0 100644
--- a/src/caffe/layers/sigmoid_layer.cu
+++ b/src/caffe/layers/sigmoid_layer.cu
@@ -8,7 +8,7 @@ namespace caffe {
 template <typename Dtype>
 __global__ void SigmoidForward(const int n, const Dtype* in, Dtype* out) {
   CUDA_KERNEL_LOOP(index, n) {
-    out[index] = 1. / (1. + exp(-in[index]));
+    out[index] = 0.5 * tanh(0.5 * in[index]) + 0.5;
   }
 }
 

From 0f61cc09467afa35835dc09617f1042e4f77c9fb Mon Sep 17 00:00:00 2001
From: Azat <davletag@mail.ru>
Date: Thu, 3 Dec 2015 14:00:08 +0300
Subject: [PATCH 007/264] sigmoid fix (cpp)

Previous implementation caused FP overflow for x less than -90
---
 src/caffe/layers/sigmoid_layer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/caffe/layers/sigmoid_layer.cpp b/src/caffe/layers/sigmoid_layer.cpp
index 85fd9676812..f8aa769a174 100644
--- a/src/caffe/layers/sigmoid_layer.cpp
+++ b/src/caffe/layers/sigmoid_layer.cpp
@@ -7,7 +7,7 @@ namespace caffe {
 
 template <typename Dtype>
 inline Dtype sigmoid(Dtype x) {
-  return 1. / (1. + exp(-x));
+  return 0.5 * tanh(0.5 * x) + 0.5;
 }
 
 template <typename Dtype>

From c1c559c2cb98d6de955f1d469c6104cb265f5dc5 Mon Sep 17 00:00:00 2001
From: Luke Yeager <luke.yeager@gmail.com>
Date: Mon, 29 Feb 2016 12:28:15 -0800
Subject: [PATCH 008/264] Don't force datum.label=0 in array_to_datum

---
 python/caffe/io.py           |  5 +++--
 python/caffe/test/test_io.py | 15 +++++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/python/caffe/io.py b/python/caffe/io.py
index 75310589cec..cee5ace2e88 100644
--- a/python/caffe/io.py
+++ b/python/caffe/io.py
@@ -63,7 +63,7 @@ def blobprotovector_str_to_arraylist(str):
     return [blobproto_to_array(blob) for blob in vec.blobs]
 
 
-def array_to_datum(arr, label=0):
+def array_to_datum(arr, label=None):
     """Converts a 3-dimensional array to datum. If the array has dtype uint8,
     the output data will be encoded as a string. Otherwise, the output data
     will be stored in float format.
@@ -76,7 +76,8 @@ def array_to_datum(arr, label=0):
         datum.data = arr.tostring()
     else:
         datum.float_data.extend(arr.flat)
-    datum.label = label
+    if label is not None:
+        datum.label = label
     return datum
 
 
diff --git a/python/caffe/test/test_io.py b/python/caffe/test/test_io.py
index 8c86ef75fb2..4a16b5b9128 100644
--- a/python/caffe/test/test_io.py
+++ b/python/caffe/test/test_io.py
@@ -39,3 +39,18 @@ def test_scalar(self):
 
         arr = caffe.io.blobproto_to_array(blob)
         self.assertEqual(arr, 123)
+
+
+class TestArrayToDatum(unittest.TestCase):
+
+    def test_label_none_size(self):
+        # Set label
+        d1 = caffe.io.array_to_datum(
+            np.ones((10,10,3)), label=1)
+        # Don't set label
+        d2 = caffe.io.array_to_datum(
+            np.ones((10,10,3)))
+        # Not setting the label should result in a smaller object
+        self.assertGreater(
+            len(d1.SerializeToString()),
+            len(d2.SerializeToString()))

From 542d216bb28343111e6b7df2c24824c3f90e435a Mon Sep 17 00:00:00 2001
From: JacekR <jreniecki@gmail.com>
Date: Tue, 15 Mar 2016 10:43:34 +0100
Subject: [PATCH 009/264] Update Makefile: Changed MKL_DIR to MKLROOT

MKLROOT variable is set by MKL scripts, so it also should be used in Makefile.
---
 Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index 2f81aca84e7..5424c3a1858 100644
--- a/Makefile
+++ b/Makefile
@@ -364,9 +364,9 @@ ifeq ($(BLAS), mkl)
 	# MKL
 	LIBRARIES += mkl_rt
 	COMMON_FLAGS += -DUSE_MKL
-	MKL_DIR ?= /opt/intel/mkl
-	BLAS_INCLUDE ?= $(MKL_DIR)/include
-	BLAS_LIB ?= $(MKL_DIR)/lib $(MKL_DIR)/lib/intel64
+	MKLROOT ?= /opt/intel/mkl
+	BLAS_INCLUDE ?= $(MKLROOT)/include
+	BLAS_LIB ?= $(MKLROOT)/lib $(MKLROOT)/lib/intel64
 else ifeq ($(BLAS), open)
 	# OpenBLAS
 	LIBRARIES += openblas

From 337b07589f4e44761bdb9ef4c242f83ca40c9da5 Mon Sep 17 00:00:00 2001
From: shai <shai@magisto.com>
Date: Mon, 21 Mar 2016 09:08:02 +0200
Subject: [PATCH 010/264] upgrading InfogainLoss layer: (1) incorporating
 Softmax layer to make the gradeint computation robust, much like
 SoftmaxWithLoss layer (see: http://stackoverflow.com/a/34917052/1714410 for
 more information). (2) supporting loss along axis

---
 include/caffe/layers/infogain_loss_layer.hpp |  35 ++++
 src/caffe/layers/infogain_loss_layer.cpp     | 172 ++++++++++++++++---
 src/caffe/proto/caffe.proto                  |   1 +
 src/caffe/test/test_infogain_loss_layer.cpp  |  83 ++++++++-
 4 files changed, 257 insertions(+), 34 deletions(-)

diff --git a/include/caffe/layers/infogain_loss_layer.hpp b/include/caffe/layers/infogain_loss_layer.hpp
index 633f339a28e..edecde829ad 100644
--- a/include/caffe/layers/infogain_loss_layer.hpp
+++ b/include/caffe/layers/infogain_loss_layer.hpp
@@ -8,6 +8,7 @@
 #include "caffe/proto/caffe.pb.h"
 
 #include "caffe/layers/loss_layer.hpp"
+#include "caffe/layers/softmax_layer.hpp"
 
 namespace caffe {
 
@@ -60,6 +61,12 @@ class InfogainLossLayer : public LossLayer<Dtype> {
   virtual inline int MinBottomBlobs() const { return 2; }
   virtual inline int MaxBottomBlobs() const { return 3; }
 
+  // InfogainLossLayer computes softmax prob internally.
+  // optional second "top" outputs the softmax prob
+  virtual inline int ExactNumTopBlobs() const { return -1; }
+  virtual inline int MinTopBlobs() const { return 1; }
+  virtual inline int MaxTopBlobs() const { return 2; }
+
   virtual inline const char* type() const { return "InfogainLoss"; }
 
  protected:
@@ -102,7 +109,35 @@ class InfogainLossLayer : public LossLayer<Dtype> {
   virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 
+  /// Read the normalization mode parameter and compute the normalizer based
+  /// on the blob size.  If normalization_mode is VALID, the count of valid
+  /// outputs will be read from valid_count, unless it is -1 in which case
+  /// all outputs are assumed to be valid.
+  virtual Dtype get_normalizer(
+      LossParameter_NormalizationMode normalization_mode, int valid_count);
+  /// fill sum_rows_H_ according to matrix H
+  virtual void sum_rows_of_H(const Blob<Dtype>* H);
+
+  /// The internal SoftmaxLayer used to map predictions to a distribution.
+  shared_ptr<Layer<Dtype> > softmax_layer_;
+  /// prob stores the output probability predictions from the SoftmaxLayer.
+  Blob<Dtype> prob_;
+  /// bottom vector holder used in call to the underlying SoftmaxLayer::Forward
+  vector<Blob<Dtype>*> softmax_bottom_vec_;
+  /// top vector holder used in call to the underlying SoftmaxLayer::Forward
+  vector<Blob<Dtype>*> softmax_top_vec_;
+
   Blob<Dtype> infogain_;
+  Blob<Dtype> sum_rows_H_;  // cache the row sums of H.
+
+  /// Whether to ignore instances with a certain label.
+  bool has_ignore_label_;
+  /// The label indicating that an instance should be ignored.
+  int ignore_label_;
+  /// How to normalize the output loss.
+  LossParameter_NormalizationMode normalization_;
+
+  int infogain_axis_, outer_num_, inner_num_, num_labels_;
 };
 
 }  // namespace caffe
diff --git a/src/caffe/layers/infogain_loss_layer.cpp b/src/caffe/layers/infogain_loss_layer.cpp
index 624d3118124..3c3f460ec34 100644
--- a/src/caffe/layers/infogain_loss_layer.cpp
+++ b/src/caffe/layers/infogain_loss_layer.cpp
@@ -3,7 +3,8 @@
 #include <vector>
 
 #include "caffe/layers/infogain_loss_layer.hpp"
-#include "caffe/util/io.hpp"
+#include "caffe/util/io.hpp"  // for bolb reading of matrix H
+#include "caffe/util/math_functions.hpp"
 
 namespace caffe {
 
@@ -11,6 +12,31 @@ template <typename Dtype>
 void InfogainLossLayer<Dtype>::LayerSetUp(
     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
   LossLayer<Dtype>::LayerSetUp(bottom, top);
+  // internal softmax layer
+  LayerParameter softmax_layer_param(this->layer_param_);
+  SoftmaxParameter* softmax_param = softmax_layer_param.mutable_softmax_param();
+  softmax_param->set_axis(this->layer_param_.infogain_loss_param().axis());
+  softmax_layer_param.set_type("Softmax");
+  softmax_layer_param.clear_loss_weight();
+  softmax_layer_param.add_loss_weight(1);
+  softmax_layer_ = LayerRegistry<Dtype>::CreateLayer(softmax_layer_param);
+  softmax_bottom_vec_.clear();
+  softmax_bottom_vec_.push_back(bottom[0]);
+  softmax_top_vec_.clear();
+  softmax_top_vec_.push_back(&prob_);
+  softmax_layer_->SetUp(softmax_bottom_vec_, softmax_top_vec_);
+
+  // ignore label
+  has_ignore_label_ =
+    this->layer_param_.loss_param().has_ignore_label();
+  if (has_ignore_label_) {
+    ignore_label_ = this->layer_param_.loss_param().ignore_label();
+  }
+  // normalization
+  CHECK(!this->layer_param_.loss_param().has_normalize())
+    << "normalize is deprecated. use \"normalization\"";
+  normalization_ = this->layer_param_.loss_param().normalization();
+  // matrix H
   if (bottom.size() < 3) {
     CHECK(this->layer_param_.infogain_loss_param().has_source())
         << "Infogain matrix source must be specified.";
@@ -25,28 +51,86 @@ template <typename Dtype>
 void InfogainLossLayer<Dtype>::Reshape(
     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
   LossLayer<Dtype>::Reshape(bottom, top);
+  softmax_layer_->Reshape(softmax_bottom_vec_, softmax_top_vec_);
+  infogain_axis_ =
+    bottom[0]->CanonicalAxisIndex(
+      this->layer_param_.infogain_loss_param().axis());
+  outer_num_ = bottom[0]->count(0, infogain_axis_);
+  inner_num_ = bottom[0]->count(infogain_axis_ + 1);
+  CHECK_EQ(outer_num_ * inner_num_, bottom[1]->count())
+      << "Number of labels must match number of predictions; "
+      << "e.g., if infogain axis == 1 and prediction shape is (N, C, H, W), "
+      << "label count (number of labels) must be N*H*W, "
+      << "with integer values in {0, 1, ..., C-1}.";
+  num_labels_ = bottom[0]->shape(infogain_axis_);
   Blob<Dtype>* infogain = NULL;
   if (bottom.size() < 3) {
     infogain = &infogain_;
   } else {
     infogain = bottom[2];
   }
-  CHECK_EQ(bottom[1]->channels(), 1);
-  CHECK_EQ(bottom[1]->height(), 1);
-  CHECK_EQ(bottom[1]->width(), 1);
-  const int num = bottom[0]->num();
-  const int dim = bottom[0]->count() / num;
-  CHECK_EQ(infogain->num(), 1);
-  CHECK_EQ(infogain->channels(), 1);
-  CHECK_EQ(infogain->height(), dim);
-  CHECK_EQ(infogain->width(), dim);
+  CHECK_EQ(infogain->count(), num_labels_*num_labels_);
+  sum_rows_H_.Reshape(vector<int>(1, num_labels_));
+  if (bottom.size() == 2) {
+    // H is provided as a parameter and will not change. sum rows once
+    sum_rows_of_H(infogain);
+  }
+  if (top.size() >= 2) {
+    // softmax output
+    top[1]->ReshapeLike(*bottom[0]);
+  }
+}
+
+template <typename Dtype>
+Dtype InfogainLossLayer<Dtype>::get_normalizer(
+    LossParameter_NormalizationMode normalization_mode, int valid_count) {
+  Dtype normalizer;
+  switch (normalization_mode) {
+    case LossParameter_NormalizationMode_FULL:
+      normalizer = Dtype(outer_num_ * inner_num_);
+      break;
+    case LossParameter_NormalizationMode_VALID:
+      if (valid_count == -1) {
+        normalizer = Dtype(outer_num_ * inner_num_);
+      } else {
+        normalizer = Dtype(valid_count);
+      }
+      break;
+    case LossParameter_NormalizationMode_BATCH_SIZE:
+      normalizer = Dtype(outer_num_);
+      break;
+    case LossParameter_NormalizationMode_NONE:
+      normalizer = Dtype(1);
+      break;
+    default:
+      LOG(FATAL) << "Unknown normalization mode: "
+          << LossParameter_NormalizationMode_Name(normalization_mode);
+  }
+  // Some users will have no labels for some examples in order to 'turn off' a
+  // particular loss in a multi-task setup. The max prevents NaNs in that case.
+  return std::max(Dtype(1.0), normalizer);
 }
 
+template <typename Dtype>
+void InfogainLossLayer<Dtype>::sum_rows_of_H(const Blob<Dtype>* H) {
+  CHECK_EQ(H->count(), num_labels_*num_labels_)
+    << "H must be " << num_labels_ << "x" << num_labels_;
+  const Dtype* infogain_mat = H->cpu_data();
+  Dtype* sum = sum_rows_H_.mutable_cpu_data();
+  for ( int row = 0; row < num_labels_ ; row++ ) {
+    sum[row] = 0;
+    for ( int col = 0; col < num_labels_ ; col++ ) {
+      sum[row] += infogain_mat[row*num_labels_+col];
+    }
+  }
+}
 
 template <typename Dtype>
 void InfogainLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
     const vector<Blob<Dtype>*>& top) {
-  const Dtype* bottom_data = bottom[0]->cpu_data();
+  // The forward pass computes the softmax prob values.
+  softmax_layer_->Forward(softmax_bottom_vec_, softmax_top_vec_);
+  const Dtype* prob_data = prob_.cpu_data();
   const Dtype* bottom_label = bottom[1]->cpu_data();
   const Dtype* infogain_mat = NULL;
   if (bottom.size() < 3) {
@@ -54,17 +138,30 @@ void InfogainLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
   } else {
     infogain_mat = bottom[2]->cpu_data();
   }
-  int num = bottom[0]->num();
-  int dim = bottom[0]->count() / bottom[0]->num();
+  int count = 0;
   Dtype loss = 0;
-  for (int i = 0; i < num; ++i) {
-    int label = static_cast<int>(bottom_label[i]);
-    for (int j = 0; j < dim; ++j) {
-      Dtype prob = std::max(bottom_data[i * dim + j], Dtype(kLOG_THRESHOLD));
-      loss -= infogain_mat[label * dim + j] * log(prob);
+  for (int i = 0; i < outer_num_; ++i) {
+    for (int j = 0; j < inner_num_; j++) {
+      const int label_value =
+        static_cast<int>(bottom_label[i * inner_num_ + j]);
+      if (has_ignore_label_ && label_value == ignore_label_) {
+        continue;
+      }
+      DCHECK_GE(label_value, 0);
+      DCHECK_LT(label_value, num_labels_);
+      for (int l = 0; l < num_labels_; l++) {
+        loss -= infogain_mat[label_value * num_labels_ + l] *
+          log(std::max(
+                prob_data[i * inner_num_*num_labels_ + l * inner_num_ + j],
+                Dtype(kLOG_THRESHOLD)));
+      }
+      ++count;
     }
   }
-  top[0]->mutable_cpu_data()[0] = loss / num;
+  top[0]->mutable_cpu_data()[0] = loss / get_normalizer(normalization_, count);
+  if (top.size() == 2) {
+    top[1]->ShareData(prob_);
+  }
 }
 
 template <typename Dtype>
@@ -80,25 +177,44 @@ void InfogainLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
                << " Layer cannot backpropagate to infogain inputs.";
   }
   if (propagate_down[0]) {
-    const Dtype* bottom_data = bottom[0]->cpu_data();
+    const Dtype* prob_data = prob_.cpu_data();
     const Dtype* bottom_label = bottom[1]->cpu_data();
     const Dtype* infogain_mat = NULL;
     if (bottom.size() < 3) {
       infogain_mat = infogain_.cpu_data();
     } else {
       infogain_mat = bottom[2]->cpu_data();
+      // H is provided as a "bottom" and might change. sum rows every time.
+      sum_rows_of_H(bottom[2]);
     }
+    const Dtype* sum_rows_H = sum_rows_H_.cpu_data();
     Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
-    int num = bottom[0]->num();
-    int dim = bottom[0]->count() / bottom[0]->num();
-    const Dtype scale = - top[0]->cpu_diff()[0] / num;
-    for (int i = 0; i < num; ++i) {
-      const int label = static_cast<int>(bottom_label[i]);
-      for (int j = 0; j < dim; ++j) {
-        Dtype prob = std::max(bottom_data[i * dim + j], Dtype(kLOG_THRESHOLD));
-        bottom_diff[i * dim + j] = scale * infogain_mat[label * dim + j] / prob;
+    const int dim = bottom[0]->count() / outer_num_;
+    int count = 0;
+    for (int i = 0; i < outer_num_; ++i) {
+      for (int j = 0; j < inner_num_; ++j) {
+        const int label_value =
+          static_cast<int>(bottom_label[i * inner_num_ + j]);
+        DCHECK_GE(label_value, 0);
+        DCHECK_LT(label_value, num_labels_);
+        if (has_ignore_label_ && label_value == ignore_label_) {
+          for (int l = 0; l < num_labels_; ++l) {
+            bottom_diff[i * dim + l * inner_num_ + j] = 0;
+          }
+        } else {
+          for (int l = 0; l < num_labels_; ++l) {
+            bottom_diff[i * dim + l * inner_num_ + j] =
+               prob_data[i*dim + l*inner_num_ + j]*sum_rows_H[label_value]
+               - infogain_mat[label_value * num_labels_ + l];
+          }
+          ++count;
+        }
       }
     }
+    // Scale gradient
+    Dtype loss_weight = top[0]->cpu_diff()[0] /
+                        get_normalizer(normalization_, count);
+    caffe_scal(bottom[0]->count(), loss_weight, bottom_diff);
   }
 }
 
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 6900bb71482..591e9647258 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -794,6 +794,7 @@ message ImageDataParameter {
 message InfogainLossParameter {
   // Specify the infogain matrix source.
   optional string source = 1;
+  optional int32 axis = 2 [default = 1]; // axis of prob
 }
 
 message InnerProductParameter {
diff --git a/src/caffe/test/test_infogain_loss_layer.cpp b/src/caffe/test/test_infogain_loss_layer.cpp
index a24ac683dc5..34f21271a62 100644
--- a/src/caffe/test/test_infogain_loss_layer.cpp
+++ b/src/caffe/test/test_infogain_loss_layer.cpp
@@ -1,3 +1,4 @@
+#include <algorithm>
 #include <vector>
 
 #include "gtest/gtest.h"
@@ -18,17 +19,22 @@ class InfogainLossLayerTest : public MultiDeviceTest<TypeParam> {
 
  protected:
   InfogainLossLayerTest()
-      : blob_bottom_data_(new Blob<Dtype>(10, 5, 1, 1)),
-        blob_bottom_label_(new Blob<Dtype>(10, 1, 1, 1)),
+      : blob_bottom_data_(new Blob<Dtype>(4, 2, 5, 2)),
+        blob_bottom_label_(new Blob<Dtype>(4, 2, 1, 2)),
         blob_bottom_infogain_(new Blob<Dtype>(1, 1, 5, 5)),
-        blob_top_loss_(new Blob<Dtype>()) {
+        blob_top_loss_(new Blob<Dtype>()),
+        blob_top_prob_(new Blob<Dtype>()),
+        inner_(2), outer_(4*2), num_labels_(5) {
     Caffe::set_random_seed(1701);
     FillerParameter filler_param;
-    PositiveUnitballFiller<Dtype> filler(filler_param);
+    filler_param.set_min(-0.5);
+    filler_param.set_max(2.0);
+    UniformFiller<Dtype> filler(filler_param);
     filler.Fill(this->blob_bottom_data_);
     blob_bottom_vec_.push_back(blob_bottom_data_);
     for (int i = 0; i < blob_bottom_label_->count(); ++i) {
-      blob_bottom_label_->mutable_cpu_data()[i] = caffe_rng_rand() % 5;
+      blob_bottom_label_->mutable_cpu_data()[i] =
+        caffe_rng_rand() % num_labels_;
     }
     blob_bottom_vec_.push_back(blob_bottom_label_);
     filler_param.set_min(0.1);
@@ -37,29 +43,94 @@ class InfogainLossLayerTest : public MultiDeviceTest<TypeParam> {
     infogain_filler.Fill(this->blob_bottom_infogain_);
     blob_bottom_vec_.push_back(blob_bottom_infogain_);
     blob_top_vec_.push_back(blob_top_loss_);
+    blob_top_vec_.push_back(blob_top_prob_);
   }
   virtual ~InfogainLossLayerTest() {
     delete blob_bottom_data_;
     delete blob_bottom_label_;
     delete blob_bottom_infogain_;
     delete blob_top_loss_;
+    delete blob_top_prob_;
   }
   Blob<Dtype>* const blob_bottom_data_;
   Blob<Dtype>* const blob_bottom_label_;
   Blob<Dtype>* const blob_bottom_infogain_;
   Blob<Dtype>* const blob_top_loss_;
+  Blob<Dtype>* const blob_top_prob_;
   vector<Blob<Dtype>*> blob_bottom_vec_;
   vector<Blob<Dtype>*> blob_top_vec_;
+  int inner_, outer_, num_labels_;
 };
 
 TYPED_TEST_CASE(InfogainLossLayerTest, TestDtypesAndDevices);
 
+TYPED_TEST(InfogainLossLayerTest, TestInfogainLoss) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  layer_param.mutable_infogain_loss_param()->set_axis(2);
+  layer_param.clear_loss_weight();
+  layer_param.add_loss_weight(1);
+  layer_param.add_loss_weight(0);
+  /*vector<float>* lw = layer_param.mutable_loss_weight();
+  lw->clear();
+  lw->push_back(1);
+  lw->push_back(1);*/
+  InfogainLossLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+  // Now, check values
+  const Dtype* data = this->blob_bottom_vec_[0]->cpu_data();
+  const Dtype* prob = this->blob_top_vec_[1]->cpu_data();
+  const Dtype* labels = this->blob_bottom_vec_[1]->cpu_data();
+  const Dtype* H = this->blob_bottom_vec_[2]->cpu_data();
+  // first. test the prob top
+  CHECK_EQ(this->blob_bottom_vec_[0]->num_axes(),
+    this->blob_top_vec_[1]->num_axes())
+      << "prob top shape not match bottom data";
+  for (int ai = 0 ; ai < this->blob_bottom_vec_[0]->num_axes(); ai++) {
+    CHECK_EQ(this->blob_bottom_vec_[0]->shape(ai),
+      this->blob_top_vec_[1]->shape(ai))
+        << "prob top shape not match bottom data";
+  }
+  vector<Dtype> est_prob(this->num_labels_, 0);
+  for ( int i = 0 ; i < this->outer_; i++ ) {
+    for ( int j = 0; j < this->inner_; j++ ) {
+      Dtype den = 0;
+      for ( int  l = 0; l < this->num_labels_; l++ ) {
+        est_prob[l] = std::exp(
+          data[i*this->num_labels_*this->inner_ + l*this->inner_ + j]);
+        den += est_prob[l];
+      }
+      for ( int l = 0; l < this->num_labels_; l++ ) {
+        EXPECT_NEAR(prob[i*this->num_labels_*this->inner_ + l*this->inner_ + j],
+          est_prob[l]/den, 1e-6);
+      }
+    }
+  }
+  Dtype loss = 0;  // loss from prob top
+  for ( int i = 0 ; i < this->outer_; i++ ) {
+    for ( int j = 0; j < this->inner_; j++ ) {
+      int gt = static_cast<int>(labels[i*this->inner_+j]);
+      for ( int l = 0; l < this->num_labels_; l++ ) {
+        loss -= H[gt*this->num_labels_ + l] *
+          log(std::max(
+            prob[i*this->num_labels_*this->inner_ + l*this->inner_ + j],
+            Dtype(kLOG_THRESHOLD)));
+      }
+    }
+  }
+  EXPECT_NEAR(this->blob_top_loss_->cpu_data()[0],
+    loss/(this->outer_*this->inner_), 1e-6);
+}
 
 TYPED_TEST(InfogainLossLayerTest, TestGradient) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;
+  layer_param.mutable_infogain_loss_param()->set_axis(2);
   InfogainLossLayer<Dtype> layer(layer_param);
-  GradientChecker<Dtype> checker(1e-4, 2e-2, 1701, 1, 0.01);
+  this->blob_top_vec_.clear();  // ignore prob top.
+  this->blob_top_vec_.push_back(this->blob_top_loss_);
+  GradientChecker<Dtype> checker(1e-4, 2e-2, 1701);  // no "kink"
   checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
       this->blob_top_vec_, 0);
 }

From 8c041a7cf3e571b175cfd8859f1af5f067f8cd7a Mon Sep 17 00:00:00 2001
From: rscohn2 <rscohn2@gmail.com>
Date: Sat, 26 Mar 2016 10:00:26 -0400
Subject: [PATCH 011/264] Update info about MKL licensing

The instructions say that MKL is free for students, but as of 8/2015, MKL is free for everyone with community licensing.
---
 docs/installation.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/installation.md b/docs/installation.md
index 893164584d9..e273034fe08 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -52,7 +52,7 @@ Caffe requires BLAS as the backend of its matrix and vector computations.
 There are several implementations of this library. The choice is yours:
 
 * [ATLAS](http://math-atlas.sourceforge.net/): free, open source, and so the default for Caffe.
-* [Intel MKL](http://software.intel.com/en-us/intel-mkl): commercial and optimized for Intel CPUs, with a free trial and [student](http://software.intel.com/en-us/intel-education-offerings) licenses.
+* [Intel MKL](http://software.intel.com/en-us/intel-mkl): commercial and optimized for Intel CPUs, with [free](https://registrationcenter.intel.com/en/forms/?productid=2558) licenses.
     1. Install MKL.
     2. Set up MKL environment (Details: [Linux](https://software.intel.com/en-us/node/528499), [OS X](https://software.intel.com/en-us/node/528659)). Example: *source /opt/intel/mkl/bin/mklvars.sh intel64*
     3. Set `BLAS := mkl` in `Makefile.config`

From a66bea30d6c0706f106b355c7cafc9e7ffae7bb5 Mon Sep 17 00:00:00 2001
From: An Tran <tranlaman@gmail.com>
Date: Wed, 30 Mar 2016 17:32:10 +0800
Subject: [PATCH 012/264] small bug in pooling_layer.cu

---
 src/caffe/layers/pooling_layer.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/caffe/layers/pooling_layer.cu b/src/caffe/layers/pooling_layer.cu
index 1ea46cc81b1..81ead1e8686 100644
--- a/src/caffe/layers/pooling_layer.cu
+++ b/src/caffe/layers/pooling_layer.cu
@@ -138,7 +138,7 @@ __global__ void StoPoolForwardTest(const int nthreads,
     const int wstart = pw * stride_w;
     const int wend = min(wstart + kernel_w, width);
     // We set cumsum to be 0 to avoid divide-by-zero problems
-    Dtype cumsum = FLT_MIN;
+    Dtype cumsum = 0.;
     Dtype cumvalues = 0.;
     const Dtype* const bottom_slice =
         bottom_data + (n * channels + c) * height * width;

From 7a8183642cb1a12945d0a9ad2bddf8304428b4c8 Mon Sep 17 00:00:00 2001
From: Daniel Gordon <xkcd@cs.washington.edu>
Date: Wed, 30 Mar 2016 14:27:19 -0700
Subject: [PATCH 013/264] Use lazy initialization to reuse orderd dict/list
 creations to save time on repeated calls.

---
 python/caffe/pycaffe.py | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/python/caffe/pycaffe.py b/python/caffe/pycaffe.py
index c5c0b824a77..ca6d050e2bd 100644
--- a/python/caffe/pycaffe.py
+++ b/python/caffe/pycaffe.py
@@ -27,7 +27,9 @@ def _Net_blobs(self):
     An OrderedDict (bottom to top, i.e., input to output) of network
     blobs indexed by name
     """
-    return OrderedDict(zip(self._blob_names, self._blobs))
+    if not hasattr(self, '_blobs_dict'):
+        self._blobs_dict = OrderedDict(zip(self._blob_names, self._blobs))
+    return self._blobs_dict
 
 
 @property
@@ -36,7 +38,10 @@ def _Net_blob_loss_weights(self):
     An OrderedDict (bottom to top, i.e., input to output) of network
     blob loss weights indexed by name
     """
-    return OrderedDict(zip(self._blob_names, self._blob_loss_weights))
+    if not hasattr(self, '_blobs_loss_weights_dict'):
+        self._blob_loss_weights_dict = OrderedDict(zip(self._blob_names,
+                                                       self._blob_loss_weights))
+    return self._blob_loss_weights_dict
 
 
 @property
@@ -46,19 +51,28 @@ def _Net_params(self):
     parameters indexed by name; each is a list of multiple blobs (e.g.,
     weights and biases)
     """
-    return OrderedDict([(name, lr.blobs)
-                        for name, lr in zip(self._layer_names, self.layers)
-                        if len(lr.blobs) > 0])
+    if not hasattr(self, '_params_dict'):
+        self._params_dict = OrderedDict([(name, lr.blobs)
+                                        for name, lr in zip(
+                                            self._layer_names, self.layers)
+                                        if len(lr.blobs) > 0])
+    return self._params_dict
 
 
 @property
 def _Net_inputs(self):
-    return [list(self.blobs.keys())[i] for i in self._inputs]
+    if not hasattr(self, '_input_list'):
+        keys = list(self.blobs.keys())
+        self._input_list = [keys[i] for i in self._inputs]
+    return self._input_list
 
 
 @property
 def _Net_outputs(self):
-    return [list(self.blobs.keys())[i] for i in self._outputs]
+    if not hasattr(self, '_output_list'):
+        keys = list(self.blobs.keys())
+        self._output_list = [keys[i] for i in self._outputs]
+    return self._output_list
 
 
 def _Net_forward(self, blobs=None, start=None, end=None, **kwargs):

From d17fbea6aad122c3818d5ef3593487869948b4b7 Mon Sep 17 00:00:00 2001
From: An Tran <tranlaman@gmail.com>
Date: Thu, 31 Mar 2016 10:27:31 +0800
Subject: [PATCH 014/264] avoid divide by zeros, suggested by SeanBell

---
 src/caffe/layers/pooling_layer.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/caffe/layers/pooling_layer.cu b/src/caffe/layers/pooling_layer.cu
index 81ead1e8686..46eddb94924 100644
--- a/src/caffe/layers/pooling_layer.cu
+++ b/src/caffe/layers/pooling_layer.cu
@@ -149,7 +149,7 @@ __global__ void StoPoolForwardTest(const int nthreads,
         cumvalues += bottom_slice[h * width + w] * bottom_slice[h * width + w];
       }
     }
-    top_data[index] = cumvalues / cumsum;
+    top_data[index] = (cumsum > 0.) ? cumvalues / cumsum : 0.;
   }
 }
 

From dee01c8b5f90a69fd3e73ee455f89aab56e2dbb7 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Mon, 4 Apr 2016 11:36:15 -0700
Subject: [PATCH 015/264] test_net.cpp: add TestForcePropagateDown

---
 src/caffe/test/test_net.cpp | 102 ++++++++++++++++++++++++++++++++++++
 1 file changed, 102 insertions(+)

diff --git a/src/caffe/test/test_net.cpp b/src/caffe/test/test_net.cpp
index 1e0788ec127..92fd317fee8 100644
--- a/src/caffe/test/test_net.cpp
+++ b/src/caffe/test/test_net.cpp
@@ -716,6 +716,61 @@ class NetTest : public MultiDeviceTest<TypeParam> {
     InitNetFromProtoString(proto);
   }
 
+  virtual void InitForcePropNet(bool test_force_true) {
+    string proto =
+      "name: 'ForcePropTestNetwork' "
+      "layer { "
+      "  name: 'data' "
+      "  type: 'DummyData' "
+      "  dummy_data_param { "
+      "    shape { "
+      "      dim: 5 "
+      "      dim: 2 "
+      "      dim: 3 "
+      "      dim: 4 "
+      "    } "
+      "    data_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "    shape { "
+      "      dim: 5 "
+      "    } "
+      "    data_filler { "
+      "      type: 'constant' "
+      "      value: 0 "
+      "    } "
+      "  } "
+      "  top: 'data' "
+      "  top: 'label' "
+      "} "
+      "layer { "
+      "  name: 'innerproduct' "
+      "  type: 'InnerProduct' "
+      "  inner_product_param { "
+      "    num_output: 1 "
+      "    weight_filler { "
+      "      type: 'gaussian' "
+      "      std: 0.01 "
+      "    } "
+      "  } "
+      "  bottom: 'data' "
+      "  top: 'innerproduct' ";
+    if (test_force_true) {
+      proto += "  propagate_down: true ";
+    }
+    proto +=
+      "} "
+      "layer { "
+      "  name: 'loss' "
+      "  bottom: 'innerproduct' "
+      "  bottom: 'label' "
+      "  top: 'cross_entropy_loss' "
+      "  type: 'SigmoidCrossEntropyLoss' "
+      "} ";
+    InitNetFromProtoString(proto);
+  }
+
   int seed_;
   shared_ptr<Net<Dtype> > net_;
 };
@@ -2371,4 +2426,51 @@ TYPED_TEST(NetTest, TestSkipPropagateDown) {
   }
 }
 
+TYPED_TEST(NetTest, TestForcePropagateDown) {
+  this->InitForcePropNet(false);
+  vector<bool> layer_need_backward = this->net_->layer_need_backward();
+  for (int layer_id = 0; layer_id < this->net_->layers().size(); ++layer_id) {
+    const string& layer_name = this->net_->layer_names()[layer_id];
+    const vector<bool> need_backward =
+        this->net_->bottom_need_backward()[layer_id];
+    if (layer_name == "data") {
+      ASSERT_EQ(need_backward.size(), 0);
+      EXPECT_FALSE(layer_need_backward[layer_id]);
+    } else if (layer_name == "innerproduct") {
+      ASSERT_EQ(need_backward.size(), 1);
+      EXPECT_FALSE(need_backward[0]);  // data
+      EXPECT_TRUE(layer_need_backward[layer_id]);
+    } else if (layer_name == "loss") {
+      ASSERT_EQ(need_backward.size(), 2);
+      EXPECT_TRUE(need_backward[0]);   // innerproduct
+      EXPECT_FALSE(need_backward[1]);  // label
+      EXPECT_TRUE(layer_need_backward[layer_id]);
+    } else {
+      LOG(FATAL) << "Unknown layer: " << layer_name;
+    }
+  }
+  this->InitForcePropNet(true);
+  layer_need_backward = this->net_->layer_need_backward();
+  for (int layer_id = 0; layer_id < this->net_->layers().size(); ++layer_id) {
+    const string& layer_name = this->net_->layer_names()[layer_id];
+    const vector<bool> need_backward =
+        this->net_->bottom_need_backward()[layer_id];
+    if (layer_name == "data") {
+      ASSERT_EQ(need_backward.size(), 0);
+      EXPECT_FALSE(layer_need_backward[layer_id]);
+    } else if (layer_name == "innerproduct") {
+      ASSERT_EQ(need_backward.size(), 1);
+      EXPECT_TRUE(need_backward[0]);  // data
+      EXPECT_TRUE(layer_need_backward[layer_id]);
+    } else if (layer_name == "loss") {
+      ASSERT_EQ(need_backward.size(), 2);
+      EXPECT_TRUE(need_backward[0]);   // innerproduct
+      EXPECT_FALSE(need_backward[1]);  // label
+      EXPECT_TRUE(layer_need_backward[layer_id]);
+    } else {
+      LOG(FATAL) << "Unknown layer: " << layer_name;
+    }
+  }
+}
+
 }  // namespace caffe

From 77cde9c84126cb108f59e2673c2e6f59b33180fa Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Wed, 27 Jan 2016 12:55:41 -0800
Subject: [PATCH 016/264] Net: setting `propagate_down: true` forces backprop

---
 src/caffe/net.cpp           | 9 ++++-----
 src/caffe/proto/caffe.proto | 7 ++++++-
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index 23d94c97c07..f0bf594936c 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -427,12 +427,11 @@ int Net<Dtype>::AppendBottom(const NetParameter& param, const int layer_id,
   bottom_vecs_[layer_id].push_back(blobs_[blob_id].get());
   bottom_id_vecs_[layer_id].push_back(blob_id);
   available_blobs->erase(blob_name);
-  bool propagate_down = true;
+  bool need_backward = blob_need_backward_[blob_id];
   // Check if the backpropagation on bottom_id should be skipped
-  if (layer_param.propagate_down_size() > 0)
-    propagate_down = layer_param.propagate_down(bottom_id);
-  const bool need_backward = blob_need_backward_[blob_id] &&
-                          propagate_down;
+  if (layer_param.propagate_down_size() > 0) {
+    need_backward = layer_param.propagate_down(bottom_id);
+  }
   bottom_need_backward_[layer_id].push_back(need_backward);
   return blob_id;
 }
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 6900bb71482..650c87ae3a6 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -328,7 +328,12 @@ message LayerParameter {
   // The blobs containing the numeric parameters of the layer.
   repeated BlobProto blobs = 7;
 
-  // Specifies on which bottoms the backpropagation should be skipped.
+  // Specifies whether to backpropagate to each bottom. If unspecified,
+  // Caffe will automatically infer whether each input needs backpropagation
+  // to compute parameter gradients. If set to true for some inputs,
+  // backpropagation to those inputs is forced; if set false for some inputs,
+  // backpropagation to those inputs is skipped.
+  //
   // The size must be either 0 or equal to the number of bottoms.
   repeated bool propagate_down = 11;
 

From 3c3dc95766c8caa374c643b51bd92a27f787b8b5 Mon Sep 17 00:00:00 2001
From: emmanuel maggiori <emaggior@rachmaninov.inria.fr>
Date: Fri, 8 Apr 2016 10:25:12 +0200
Subject: [PATCH 017/264] Solving issue with exp layer with base e

---
 src/caffe/layers/exp_layer.cpp       |  3 ++-
 src/caffe/test/test_neuron_layer.cpp | 20 ++++++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/src/caffe/layers/exp_layer.cpp b/src/caffe/layers/exp_layer.cpp
index 1f4a309fe25..0c1b463ae12 100644
--- a/src/caffe/layers/exp_layer.cpp
+++ b/src/caffe/layers/exp_layer.cpp
@@ -23,7 +23,8 @@ void ExpLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   const Dtype input_scale = this->layer_param_.exp_param().scale();
   const Dtype input_shift = this->layer_param_.exp_param().shift();
   inner_scale_ = log_base * input_scale;
-  outer_scale_ = (input_shift == Dtype(0)) ? Dtype(1) : pow(base, input_shift);
+  outer_scale_ = (input_shift == Dtype(0)) ? Dtype(1) :
+     ( (base != Dtype(-1)) ? pow(base, input_shift) : exp(input_shift) );
 }
 
 template <typename Dtype>
diff --git a/src/caffe/test/test_neuron_layer.cpp b/src/caffe/test/test_neuron_layer.cpp
index dd591f7d204..342f825cec3 100644
--- a/src/caffe/test/test_neuron_layer.cpp
+++ b/src/caffe/test/test_neuron_layer.cpp
@@ -394,6 +394,26 @@ TYPED_TEST(NeuronLayerTest, TestExpGradient) {
   this->TestExpGradient(kBase, kScale, kShift);
 }
 
+TYPED_TEST(NeuronLayerTest, TestExpLayerWithShift) {
+  typedef typename TypeParam::Dtype Dtype;
+  // Test default base of "-1" -- should actually set base := e,
+  // with a non-zero shift
+  const Dtype kBase = -1;
+  const Dtype kScale = 1;
+  const Dtype kShift = 1;
+  this->TestExpForward(kBase, kScale, kShift);
+}
+
+TYPED_TEST(NeuronLayerTest, TestExpGradientWithShift) {
+  typedef typename TypeParam::Dtype Dtype;
+  // Test default base of "-1" -- should actually set base := e,
+  // with a non-zero shift
+  const Dtype kBase = -1;
+  const Dtype kScale = 1;
+  const Dtype kShift = 1;
+  this->TestExpGradient(kBase, kScale, kShift);
+}
+
 TYPED_TEST(NeuronLayerTest, TestExpLayerBase2) {
   typedef typename TypeParam::Dtype Dtype;
   const Dtype kBase = 2;

From 09130ce35604a991cee41c942ff8845468cacfa7 Mon Sep 17 00:00:00 2001
From: Thomas <twatson52@mac.com>
Date: Mon, 11 Apr 2016 12:52:34 -0500
Subject: [PATCH 018/264] Fix protobuf message generation

The latest versions of protobuf do not reveal empty message fields with dir(). This uses the documented way of determining all of a message's fields and so is compatible with past and future versions of protobuf.
---
 python/caffe/net_spec.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/caffe/net_spec.py b/python/caffe/net_spec.py
index 63de4cce4b2..5fb1f0b3fb1 100644
--- a/python/caffe/net_spec.py
+++ b/python/caffe/net_spec.py
@@ -32,7 +32,7 @@ def param_name_dict():
     # get all parameter names (typically underscore case) and corresponding
     # type names (typically camel case), which contain the layer names
     # (note that not all parameters correspond to layers, but we'll ignore that)
-    param_names = [s for s in dir(layer) if s.endswith('_param')]
+    param_names = [f.name for f in layer.DESCRIPTOR.fields if f.name.endswith('_param')]
     param_type_names = [type(getattr(layer, s)).__name__ for s in param_names]
     # strip the final '_param' or 'Parameter'
     param_names = [s[:-len('_param')] for s in param_names]

From 219532f5552fb48931776f5236b5ec3d99eccb2a Mon Sep 17 00:00:00 2001
From: Muneyuki Noguchi <nogu.dev@gmail.com>
Date: Tue, 12 Apr 2016 23:19:27 +0900
Subject: [PATCH 019/264] Fix typo in help text for "-model" option

---
 tools/caffe.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/caffe.cpp b/tools/caffe.cpp
index 305cfc3635d..d121fefc9d7 100644
--- a/tools/caffe.cpp
+++ b/tools/caffe.cpp
@@ -32,7 +32,7 @@ DEFINE_string(gpu, "",
 DEFINE_string(solver, "",
     "The solver definition protocol buffer text file.");
 DEFINE_string(model, "",
-    "The model definition protocol buffer text file..");
+    "The model definition protocol buffer text file.");
 DEFINE_string(snapshot, "",
     "Optional; the snapshot solver state to resume training.");
 DEFINE_string(weights, "",

From b265134710d78db4007471ccbe376c2c4221441a Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Wed, 13 Apr 2016 16:40:30 -0700
Subject: [PATCH 020/264] [docs] install: CUDA 7+ and cuDNN v4 compatible

Latest CUDA versions are all compatible, and
Caffe has been compatible with cuDNN v4 since PR #3439
---
 docs/installation.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/installation.md b/docs/installation.md
index 893164584d9..e6c6886df52 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -20,7 +20,7 @@ When updating Caffe, it's best to `make clean` before re-compiling.
 Caffe has several dependencies:
 
 * [CUDA](https://developer.nvidia.com/cuda-zone) is required for GPU mode.
-    * library version 7.0 and the latest driver version are recommended, but 6.* is fine too
+    * library version 7+ and the latest driver version are recommended, but 6.* is fine too
     * 5.5, and 5.0 are compatible but considered legacy
 * [BLAS](http://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms) via ATLAS, MKL, or OpenBLAS.
 * [Boost](http://www.boost.org/) >= 1.55
@@ -30,14 +30,14 @@ Optional dependencies:
 
 * [OpenCV](http://opencv.org/) >= 2.4 including 3.0
 * IO libraries: `lmdb`, `leveldb` (note: leveldb requires `snappy`)
-* cuDNN for GPU acceleration (v3)
+* cuDNN for GPU acceleration (v4)
 
 Pycaffe and Matcaffe interfaces have their own natural needs.
 
 * For Python Caffe:  `Python 2.7` or `Python 3.3+`, `numpy (>= 1.7)`, boost-provided `boost.python`
 * For MATLAB Caffe: MATLAB with the `mex` compiler.
 
-**cuDNN Caffe**: for fastest operation Caffe is accelerated by drop-in integration of [NVIDIA cuDNN](https://developer.nvidia.com/cudnn). To speed up your Caffe models, install cuDNN then uncomment the `USE_CUDNN := 1` flag in `Makefile.config` when installing Caffe. Acceleration is automatic. The current version is cuDNN v3; older versions are supported in older Caffe.
+**cuDNN Caffe**: for fastest operation Caffe is accelerated by drop-in integration of [NVIDIA cuDNN](https://developer.nvidia.com/cudnn). To speed up your Caffe models, install cuDNN then uncomment the `USE_CUDNN := 1` flag in `Makefile.config` when installing Caffe. Acceleration is automatic. The current version is cuDNN v4; older versions are supported in older Caffe.
 
 **CPU-only Caffe**: for cold-brewed CPU-only Caffe uncomment the `CPU_ONLY := 1` flag in `Makefile.config` to configure and build Caffe without CUDA. This is helpful for cloud or cluster deployment.
 

From 462a688fb8997f87b19c3c51860eb32d5458b246 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Wed, 13 Apr 2016 16:43:39 -0700
Subject: [PATCH 021/264] [docs] install: include latest versions and
 platforms, highlight guides

Caffe runs on Ubuntu, OS X, and RHEL (+ company) in master
with branches for OpenCL and Windows.

Docker is a nice route to out-of-the-box brewing.
---
 docs/installation.md | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/docs/installation.md b/docs/installation.md
index e6c6886df52..9aa83527fd5 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -5,13 +5,23 @@ title: Installation
 # Installation
 
 Prior to installing, have a glance through this guide and take note of the details for your platform.
-We install and run Caffe on Ubuntu 14.04 and 12.04, OS X 10.10 / 10.9 / 10.8, and AWS.
-The official Makefile and `Makefile.config` build are complemented by an automatic CMake build from the community.
+We install and run Caffe on Ubuntu 16.04–12.04, OS X 10.11–10.8, and through Docker and AWS.
+The official Makefile and `Makefile.config` build are complemented by a [community CMake build](#cmake-build).
+
+**Step-by-step Instructions**:
+
+- [Docker setup](https://github.com/BVLC/caffe/tree/master/docker) *out-of-the-box brewing*
+- [Ubuntu installation](install_apt.html) *the standard platform*
+- [OS X installation](install_osx.html)
+- [RHEL / CentOS / Fedora installation](install_yum.html)
+- [Windows](https://github.com/BVLC/caffe/tree/windows) *see the Windows branch led by Microsoft*
+- [OpenCL](https://github.com/BVLC/caffe/tree/opencl) *see the OpenCL branch led by Fabian Tschopp*
+
+**Overview**:
 
 - [Prerequisites](#prerequisites)
 - [Compilation](#compilation)
 - [Hardware](#hardware)
-- Platforms: [Ubuntu guide](install_apt.html), [OS X guide](install_osx.html), and [RHEL / CentOS / Fedora guide](install_yum.html)
 
 When updating Caffe, it's best to `make clean` before re-compiling.
 
@@ -82,10 +92,6 @@ Install MATLAB, and make sure that its `mex` is in your `$PATH`.
 
 *Caffe's MATLAB interface works with versions 2015a, 2014a/b, 2013a/b, and 2012b.*
 
-#### Windows
-
-There is an unofficial Windows port of Caffe at [niuzhiheng/caffe:windows](https://github.com/niuzhiheng/caffe). Thanks [@niuzhiheng](https://github.com/niuzhiheng)!
-
 ## Compilation
 
 Caffe can be compiled with either Make or CMake. Make is officially supported while CMake is supported by the community.
@@ -113,7 +119,7 @@ Be sure to set your MATLAB and Python paths in `Makefile.config` first!
 
 Now that you have installed Caffe, check out the [MNIST tutorial](gathered/examples/mnist.html) and the [reference ImageNet model tutorial](gathered/examples/imagenet.html).
 
-### Compilation with CMake
+### CMake Build
 
 In lieu of manually editing `Makefile.config` to configure the build, Caffe offers an unofficial CMake build thanks to @Nerei, @akosiorek, and other members of the community. It requires CMake version >= 2.8.7.
 The basic steps are as follows:

From 0ef5918bbb7cb6e6d733ef91acff5349febc2bc7 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Wed, 13 Apr 2016 18:52:50 -0700
Subject: [PATCH 022/264] [docs] install: be more firm about compute capability
 >= 3.0

---
 docs/installation.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/installation.md b/docs/installation.md
index 9aa83527fd5..95a57fdffda 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -137,7 +137,7 @@ See [PR #1667](https://github.com/BVLC/caffe/pull/1667) for options and details.
 
 **Laboratory Tested Hardware**: Berkeley Vision runs Caffe with K40s, K20s, and Titans including models at ImageNet/ILSVRC scale. We also run on GTX series cards (980s and 770s) and GPU-equipped MacBook Pros. We have not encountered any trouble in-house with devices with CUDA capability >= 3.0. All reported hardware issues thus-far have been due to GPU configuration, overheating, and the like.
 
-**CUDA compute capability**: devices with compute capability <= 2.0 may have to reduce CUDA thread numbers and batch sizes due to hardware constraints. Your mileage may vary.
+**CUDA compute capability**: devices with compute capability <= 2.0 may have to reduce CUDA thread numbers and batch sizes due to hardware constraints. Brew with caution; we recommend compute capbility >= 3.0.
 
 Once installed, check your times against our [reference performance numbers](performance_hardware.html) to make sure everything is configured properly.
 

From b9164503ff51e8167cac9feb3f9a3d99778f13a8 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Wed, 13 Apr 2016 18:53:28 -0700
Subject: [PATCH 023/264] [docs] install: include more lab tested hardware

---
 docs/installation.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/installation.md b/docs/installation.md
index 95a57fdffda..aa946911c63 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -135,7 +135,7 @@ See [PR #1667](https://github.com/BVLC/caffe/pull/1667) for options and details.
 
 ## Hardware
 
-**Laboratory Tested Hardware**: Berkeley Vision runs Caffe with K40s, K20s, and Titans including models at ImageNet/ILSVRC scale. We also run on GTX series cards (980s and 770s) and GPU-equipped MacBook Pros. We have not encountered any trouble in-house with devices with CUDA capability >= 3.0. All reported hardware issues thus-far have been due to GPU configuration, overheating, and the like.
+**Laboratory Tested Hardware**: Berkeley Vision runs Caffe with Titan Xs, K80s, GTX 980s, K40s, K20s, Titans, and GTX 770s including models at ImageNet/ILSVRC scale. We have not encountered any trouble in-house with devices with CUDA capability >= 3.0. All reported hardware issues thus-far have been due to GPU configuration, overheating, and the like.
 
 **CUDA compute capability**: devices with compute capability <= 2.0 may have to reduce CUDA thread numbers and batch sizes due to hardware constraints. Brew with caution; we recommend compute capbility >= 3.0.
 

From e867e60fa24985b112af9885ec553d5dd62f49bf Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Thu, 14 Apr 2016 22:56:37 -0700
Subject: [PATCH 024/264] [test] CropLayer: test dimensions check to reveal
 bounds checking bug

---
 src/caffe/test/test_crop_layer.cpp | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/src/caffe/test/test_crop_layer.cpp b/src/caffe/test/test_crop_layer.cpp
index 45f24e2ee8d..ce2c736f644 100644
--- a/src/caffe/test/test_crop_layer.cpp
+++ b/src/caffe/test/test_crop_layer.cpp
@@ -91,6 +91,24 @@ TYPED_TEST(CropLayerTest, TestSetupShapeNegativeIndexing) {
   }
 }
 
+TYPED_TEST(CropLayerTest, TestDimensionsCheck) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  // Reshape size blob to have incompatible sizes for uncropped dimensions:
+  // the size blob has more channels than the data blob, but this is fine
+  // since the channels dimension is not cropped in this configuration.
+  this->blob_bottom_1_->Reshape(2, 5, 4, 2);
+  CropLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  for (int i = 0; i < this->blob_top_->num_axes(); ++i) {
+    if (i < 2) {
+      EXPECT_EQ(this->blob_bottom_0_->shape(i), this->blob_top_->shape(i));
+    } else {
+      EXPECT_EQ(this->blob_bottom_1_->shape(i), this->blob_top_->shape(i));
+    }
+  }
+}
+
 TYPED_TEST(CropLayerTest, TestCropAll) {
   typedef typename TypeParam::Dtype Dtype;
   LayerParameter layer_param;

From 75b0d40a856dda87f2e0de77b2c6626753e1e231 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Thu, 14 Apr 2016 22:16:07 -0700
Subject: [PATCH 025/264] [fix] CropLayer: check dimension bounds only for
 cropped dimensions

check only the dimensions to be cropped for compatible sizes and offsets
---
 src/caffe/layers/crop_layer.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/caffe/layers/crop_layer.cpp b/src/caffe/layers/crop_layer.cpp
index e81bdd732f3..849208f5666 100644
--- a/src/caffe/layers/crop_layer.cpp
+++ b/src/caffe/layers/crop_layer.cpp
@@ -61,12 +61,11 @@ void CropLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
         // following axis
         crop_offset = param.offset(i - start_axis);
       }
+      // check that the crop and offset are within the dimension bounds
+      CHECK_GE(bottom[0]->shape(i) - crop_offset, bottom[1]->shape(i))
+          << "the crop for dimension " << i << " is out-of-bounds with "
+          << "size " << bottom[1]->shape(i) << " and offset " << crop_offset;
     }
-    // Check that the image we are cropping minus the margin is bigger
-    // than the destination image.
-    CHECK_GE(bottom[0]->shape(i) - crop_offset,
-             bottom[1]->shape(i))
-        << "invalid crop parameters in dimension: " << i;
     // Now set new size and offsets
     new_shape[i] = new_size;
     offsets[i] = crop_offset;

From 00dc3d1ced4467be00ccc82b8509e4a25d54808d Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Thu, 14 Apr 2016 22:31:38 -0700
Subject: [PATCH 026/264] CropLayer: groom comments

---
 include/caffe/layers/crop_layer.hpp |  9 +++++++++
 src/caffe/layers/crop_layer.cpp     | 22 ++++++++--------------
 src/caffe/layers/crop_layer.cu      |  9 ---------
 3 files changed, 17 insertions(+), 23 deletions(-)

diff --git a/include/caffe/layers/crop_layer.hpp b/include/caffe/layers/crop_layer.hpp
index 5c605b2ae9e..c4fda1220c3 100644
--- a/include/caffe/layers/crop_layer.hpp
+++ b/include/caffe/layers/crop_layer.hpp
@@ -44,6 +44,7 @@ class CropLayer : public Layer<Dtype> {
   vector<int> offsets;
 
  private:
+  // Recursive copy function.
   void crop_copy(const vector<Blob<Dtype>*>& bottom,
                const vector<Blob<Dtype>*>& top,
                const vector<int>& offsets,
@@ -53,6 +54,14 @@ class CropLayer : public Layer<Dtype> {
                Dtype* dest_data,
                bool is_forward);
 
+  // Recursive copy function: this is similar to crop_copy() but loops over all
+  // but the last two dimensions to allow for ND cropping while still relying on
+  // a CUDA kernel for the innermost two dimensions for performance reasons.  An
+  // alterantive implementation could rely on the kernel more by passing
+  // offsets, but this is problematic because of its variable length.
+  // Since in the standard (N,C,W,H) case N,C are usually not cropped a speedup
+  // could be achieved by not looping the application of the copy_kernel around
+  // these dimensions.
   void crop_copy_gpu(const vector<Blob<Dtype>*>& bottom,
                 const vector<Blob<Dtype>*>& top,
                 const vector<int>& offsets,
diff --git a/src/caffe/layers/crop_layer.cpp b/src/caffe/layers/crop_layer.cpp
index 849208f5666..aecdcd63194 100644
--- a/src/caffe/layers/crop_layer.cpp
+++ b/src/caffe/layers/crop_layer.cpp
@@ -15,8 +15,7 @@ namespace caffe {
 template <typename Dtype>
 void CropLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
     const vector<Blob<Dtype>*>& top) {
-  // All logic that depends only on the number of dimensions is here,
-  // the rest is in Reshape because it depends on Blob size.
+  // LayerSetup() handles the number of dimensions; Reshape() handles the sizes.
   // bottom[0] supplies the data
   // bottom[1] supplies the size
   const CropParameter& param = this->layer_param_.crop_param();
@@ -40,40 +39,35 @@ void CropLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
   int input_dim = bottom[0]->num_axes();
   const int start_axis = bottom[0]->CanonicalAxisIndex(param.axis());
 
-  // initialize all offsets to 0
+  // Initialize offsets to 0 and the new shape to the current shape of the data.
   offsets = vector<int>(input_dim, 0);
-  // initialize new shape to bottom[0]
   vector<int> new_shape(bottom[0]->shape());
 
-  // apply crops
+  // Determine crop offsets and the new shape post-crop.
   for (int i = 0; i < input_dim; ++i) {
     int crop_offset = 0;
-    int new_size    = bottom[0]->shape(i);
+    int new_size = bottom[0]->shape(i);
     if (i >= start_axis) {
       new_size = bottom[1]->shape(i);
-
       if (param.offset_size() == 1) {
-        // if only one crop value is supplied, crop all dimensions after axis
-        // by this crop value
+        // If only one offset is given, all crops have the same offset.
         crop_offset = param.offset(0);
       } else if (param.offset_size() > 1) {
-        // crop values specified must be equal to the number of dimensions
-        // following axis
+        // For several offsets, the number of offsets must be equal to the
+        // number of dimensions to crop, that is dimensions after the axis.
         crop_offset = param.offset(i - start_axis);
       }
-      // check that the crop and offset are within the dimension bounds
+      // Check that the crop and offset are within the dimension's bounds.
       CHECK_GE(bottom[0]->shape(i) - crop_offset, bottom[1]->shape(i))
           << "the crop for dimension " << i << " is out-of-bounds with "
           << "size " << bottom[1]->shape(i) << " and offset " << crop_offset;
     }
-    // Now set new size and offsets
     new_shape[i] = new_size;
     offsets[i] = crop_offset;
   }
   top[0]->Reshape(new_shape);
 }
 
-// recursive copy function
 template <typename Dtype>
 void CropLayer<Dtype>::crop_copy(const vector<Blob<Dtype>*>& bottom,
              const vector<Blob<Dtype>*>& top,
diff --git a/src/caffe/layers/crop_layer.cu b/src/caffe/layers/crop_layer.cu
index 9ed8f7cce57..f78cecbbeee 100644
--- a/src/caffe/layers/crop_layer.cu
+++ b/src/caffe/layers/crop_layer.cu
@@ -22,15 +22,6 @@ __global__ void copy_kernel(const int n, const int height, const int width,
   }
 }
 
-// recursive copy function, this function is similar to crop_copy but loops
-// over all but the last two dimensions. It is implemented this way to allow
-// for ND cropping while still relying on a CUDA kernel for the innermost
-// two dimensions for performance reasons.
-// An alternative way to implement ND cropping relying more on the kernel
-// would require passing offsets to the kernel, which is a bit problematic
-// because it is of variable length. Since in the standard (N,C,W,H) case
-// N,C are usually not cropped a speedup could be achieved by not looping
-// the application of the copy_kernel around these dimensions.
 template <typename Dtype>
 void CropLayer<Dtype>::crop_copy_gpu(const vector<Blob<Dtype>*>& bottom,
              const vector<Blob<Dtype>*>& top,

From 1c49130c33ebdec042ff6da18d03b7c5f6ad8c93 Mon Sep 17 00:00:00 2001
From: ZhouYzzz <zhouyz9608@gmail.com>
Date: Fri, 15 Apr 2016 22:51:49 +0800
Subject: [PATCH 027/264] Allow the python layer have attribute "phase"

---
 include/caffe/layers/python_layer.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/caffe/layers/python_layer.hpp b/include/caffe/layers/python_layer.hpp
index b839d52684e..66dbbdf13b8 100644
--- a/include/caffe/layers/python_layer.hpp
+++ b/include/caffe/layers/python_layer.hpp
@@ -26,6 +26,7 @@ class PythonLayer : public Layer<Dtype> {
     }
     self_.attr("param_str") = bp::str(
         this->layer_param_.python_param().param_str());
+    self_.attr("phase") = static_cast<int>(this->phase_);
     self_.attr("setup")(bottom, top);
   }
   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,

From 458928a3bc1ee94e5f12bb254a5de819c449fc0a Mon Sep 17 00:00:00 2001
From: Luke Yeager <luke.yeager@gmail.com>
Date: Mon, 18 Apr 2016 08:54:21 -0700
Subject: [PATCH 028/264] Typo in docs/installation.md

---
 docs/installation.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/installation.md b/docs/installation.md
index aa946911c63..1e29a49d82d 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -137,7 +137,7 @@ See [PR #1667](https://github.com/BVLC/caffe/pull/1667) for options and details.
 
 **Laboratory Tested Hardware**: Berkeley Vision runs Caffe with Titan Xs, K80s, GTX 980s, K40s, K20s, Titans, and GTX 770s including models at ImageNet/ILSVRC scale. We have not encountered any trouble in-house with devices with CUDA capability >= 3.0. All reported hardware issues thus-far have been due to GPU configuration, overheating, and the like.
 
-**CUDA compute capability**: devices with compute capability <= 2.0 may have to reduce CUDA thread numbers and batch sizes due to hardware constraints. Brew with caution; we recommend compute capbility >= 3.0.
+**CUDA compute capability**: devices with compute capability <= 2.0 may have to reduce CUDA thread numbers and batch sizes due to hardware constraints. Brew with caution; we recommend compute capability >= 3.0.
 
 Once installed, check your times against our [reference performance numbers](performance_hardware.html) to make sure everything is configured properly.
 

From bd762101dba321146d2d9cb747c79c4c678cbfdb Mon Sep 17 00:00:00 2001
From: Achal Dave <achalddave@live.com>
Date: Wed, 20 Apr 2016 17:34:29 -0400
Subject: [PATCH 029/264] Explicitly point out -weights flag in tutorial

The -weights flag is somewhat easy to miss as it's only in one command,
but is the crucial thing that anyone searching for 'how to finetune' is
looking for. Hopefully this more clearly points out the '-weights' flag,
which might otherwise be overlooked in this tutorial.
---
 examples/finetune_flickr_style/readme.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/examples/finetune_flickr_style/readme.md b/examples/finetune_flickr_style/readme.md
index 9ba4c9217ff..188dedf1b9a 100644
--- a/examples/finetune_flickr_style/readme.md
+++ b/examples/finetune_flickr_style/readme.md
@@ -57,7 +57,11 @@ The prototxts in this example assume this, and also assume the presence of the I
 
 We'll also need the ImageNet-trained model, which you can obtain by running `./scripts/download_model_binary.py models/bvlc_reference_caffenet`.
 
-Now we can train! (You can fine-tune in CPU mode by leaving out the `-gpu` flag.)
+Now we can train! The key to fine-tuning is the `-weights` argument in the
+command below, which tells Caffe that we want to load weights from a pre-trained
+Caffe model.
+
+(You can fine-tune in CPU mode by leaving out the `-gpu` flag.)
 
     caffe % ./build/tools/caffe train -solver models/finetune_flickr_style/solver.prototxt -weights models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel -gpu 0
 

From 90426645c36ad71c778c4ac3688ec164242a50a1 Mon Sep 17 00:00:00 2001
From: Luke Yeager <luke.yeager@gmail.com>
Date: Thu, 25 Feb 2016 19:58:01 -0800
Subject: [PATCH 030/264] Don't set map_size=1TB in util/db_lmdb

Instead, double the map size on the MDB_MAP_FULL exception.
---
 include/caffe/util/db_lmdb.hpp | 13 ++++---
 src/caffe/util/db_lmdb.cpp     | 65 +++++++++++++++++++++++++++-------
 2 files changed, 60 insertions(+), 18 deletions(-)

diff --git a/include/caffe/util/db_lmdb.hpp b/include/caffe/util/db_lmdb.hpp
index 4e1568ace50..ee370322383 100644
--- a/include/caffe/util/db_lmdb.hpp
+++ b/include/caffe/util/db_lmdb.hpp
@@ -3,6 +3,7 @@
 #define CAFFE_UTIL_DB_LMDB_HPP
 
 #include <string>
+#include <vector>
 
 #include "lmdb.h"
 
@@ -54,14 +55,16 @@ class LMDBCursor : public Cursor {
 
 class LMDBTransaction : public Transaction {
  public:
-  explicit LMDBTransaction(MDB_dbi* mdb_dbi, MDB_txn* mdb_txn)
-    : mdb_dbi_(mdb_dbi), mdb_txn_(mdb_txn) { }
+  explicit LMDBTransaction(MDB_env* mdb_env)
+    : mdb_env_(mdb_env) { }
   virtual void Put(const string& key, const string& value);
-  virtual void Commit() { MDB_CHECK(mdb_txn_commit(mdb_txn_)); }
+  virtual void Commit();
 
  private:
-  MDB_dbi* mdb_dbi_;
-  MDB_txn* mdb_txn_;
+  MDB_env* mdb_env_;
+  vector<string> keys, values;
+
+  void DoubleMapSize();
 
   DISABLE_COPY_AND_ASSIGN(LMDBTransaction);
 };
diff --git a/src/caffe/util/db_lmdb.cpp b/src/caffe/util/db_lmdb.cpp
index 0bc82b53e2b..df83a52a633 100644
--- a/src/caffe/util/db_lmdb.cpp
+++ b/src/caffe/util/db_lmdb.cpp
@@ -7,11 +7,8 @@
 
 namespace caffe { namespace db {
 
-const size_t LMDB_MAP_SIZE = 1099511627776;  // 1 TB
-
 void LMDB::Open(const string& source, Mode mode) {
   MDB_CHECK(mdb_env_create(&mdb_env_));
-  MDB_CHECK(mdb_env_set_mapsize(mdb_env_, LMDB_MAP_SIZE));
   if (mode == NEW) {
     CHECK_EQ(mkdir(source.c_str(), 0744), 0) << "mkdir " << source << "failed";
   }
@@ -48,19 +45,61 @@ LMDBCursor* LMDB::NewCursor() {
 }
 
 LMDBTransaction* LMDB::NewTransaction() {
-  MDB_txn* mdb_txn;
-  MDB_CHECK(mdb_txn_begin(mdb_env_, NULL, 0, &mdb_txn));
-  MDB_CHECK(mdb_dbi_open(mdb_txn, NULL, 0, &mdb_dbi_));
-  return new LMDBTransaction(&mdb_dbi_, mdb_txn);
+  return new LMDBTransaction(mdb_env_);
 }
 
 void LMDBTransaction::Put(const string& key, const string& value) {
-  MDB_val mdb_key, mdb_value;
-  mdb_key.mv_data = const_cast<char*>(key.data());
-  mdb_key.mv_size = key.size();
-  mdb_value.mv_data = const_cast<char*>(value.data());
-  mdb_value.mv_size = value.size();
-  MDB_CHECK(mdb_put(mdb_txn_, *mdb_dbi_, &mdb_key, &mdb_value, 0));
+  keys.push_back(key);
+  values.push_back(value);
+}
+
+void LMDBTransaction::Commit() {
+  MDB_dbi mdb_dbi;
+  MDB_val mdb_key, mdb_data;
+  MDB_txn *mdb_txn;
+
+  // Initialize MDB variables
+  MDB_CHECK(mdb_txn_begin(mdb_env_, NULL, 0, &mdb_txn));
+  MDB_CHECK(mdb_dbi_open(mdb_txn, NULL, 0, &mdb_dbi));
+
+  bool out_of_memory = false;
+  for (int i = 0; i < keys.size(); i++) {
+    mdb_key.mv_size = keys[i].size();
+    mdb_key.mv_data = const_cast<char*>(keys[i].data());
+    mdb_data.mv_size = values[i].size();
+    mdb_data.mv_data = const_cast<char*>(values[i].data());
+
+    int put_rc = mdb_put(mdb_txn, mdb_dbi, &mdb_key, &mdb_data, 0);
+    if (put_rc == MDB_MAP_FULL) {
+      out_of_memory = true;
+      break;
+    } else {
+      // Failed for some other reason
+      MDB_CHECK(put_rc);
+    }
+  }
+
+  if (!out_of_memory) {
+    // Commit the transaction
+    MDB_CHECK(mdb_txn_commit(mdb_txn));
+    mdb_dbi_close(mdb_env_, mdb_dbi);
+    keys.clear();
+    values.clear();
+  } else {
+    // Double the map size and retry
+    mdb_txn_abort(mdb_txn);
+    mdb_dbi_close(mdb_env_, mdb_dbi);
+    DoubleMapSize();
+    Commit();
+  }
+}
+
+void LMDBTransaction::DoubleMapSize() {
+  struct MDB_envinfo current_info;
+  MDB_CHECK(mdb_env_info(mdb_env_, &current_info));
+  size_t new_size = current_info.me_mapsize * 2;
+  DLOG(INFO) << "Doubling LMDB map size to " << (new_size>>20) << "MB ...";
+  MDB_CHECK(mdb_env_set_mapsize(mdb_env_, new_size));
 }
 
 }  // namespace db

From f30c61cfdfc0d254ec233b972ff4b6b0aa2f5d4c Mon Sep 17 00:00:00 2001
From: Luke Yeager <luke.yeager@gmail.com>
Date: Thu, 25 Feb 2016 20:02:25 -0800
Subject: [PATCH 031/264] Print to stderr for example LMDB code

---
 examples/cifar10/convert_cifar_data.cpp | 2 ++
 examples/mnist/convert_mnist_data.cpp   | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/examples/cifar10/convert_cifar_data.cpp b/examples/cifar10/convert_cifar_data.cpp
index e1b89f42fb6..7385a74a679 100644
--- a/examples/cifar10/convert_cifar_data.cpp
+++ b/examples/cifar10/convert_cifar_data.cpp
@@ -91,6 +91,8 @@ void convert_dataset(const string& input_folder, const string& output_folder,
 }
 
 int main(int argc, char** argv) {
+  FLAGS_alsologtostderr = 1;
+
   if (argc != 4) {
     printf("This script converts the CIFAR dataset to the leveldb format used\n"
            "by caffe to perform classification.\n"
diff --git a/examples/mnist/convert_mnist_data.cpp b/examples/mnist/convert_mnist_data.cpp
index 16d28093dd5..32bee5269ba 100644
--- a/examples/mnist/convert_mnist_data.cpp
+++ b/examples/mnist/convert_mnist_data.cpp
@@ -178,6 +178,8 @@ int main(int argc, char** argv) {
   namespace gflags = google;
 #endif
 
+  FLAGS_alsologtostderr = 1;
+
   gflags::SetUsageMessage("This script converts the MNIST dataset to\n"
         "the lmdb/leveldb format used by Caffe to load data.\n"
         "Usage:\n"

From 74040cb2ed9d46a267a16870e9878f3b6911d644 Mon Sep 17 00:00:00 2001
From: Luke Yeager <luke.yeager@gmail.com>
Date: Thu, 25 Feb 2016 20:14:02 -0800
Subject: [PATCH 032/264] Update MNIST example to use new DB classes

---
 examples/mnist/convert_mnist_data.cpp | 87 ++++-----------------------
 1 file changed, 12 insertions(+), 75 deletions(-)

diff --git a/examples/mnist/convert_mnist_data.cpp b/examples/mnist/convert_mnist_data.cpp
index 32bee5269ba..57ddef77074 100644
--- a/examples/mnist/convert_mnist_data.cpp
+++ b/examples/mnist/convert_mnist_data.cpp
@@ -22,12 +22,15 @@
 #include <fstream>  // NOLINT(readability/streams)
 #include <string>
 
+#include "boost/scoped_ptr.hpp"
 #include "caffe/proto/caffe.pb.h"
+#include "caffe/util/db.hpp"
 #include "caffe/util/format.hpp"
 
 #if defined(USE_LEVELDB) && defined(USE_LMDB)
 
 using namespace caffe;  // NOLINT(build/namespaces)
+using boost::scoped_ptr;
 using std::string;
 
 DEFINE_string(backend, "lmdb", "The backend for storing the result");
@@ -67,43 +70,10 @@ void convert_dataset(const char* image_filename, const char* label_filename,
   image_file.read(reinterpret_cast<char*>(&cols), 4);
   cols = swap_endian(cols);
 
-  // lmdb
-  MDB_env *mdb_env;
-  MDB_dbi mdb_dbi;
-  MDB_val mdb_key, mdb_data;
-  MDB_txn *mdb_txn;
-  // leveldb
-  leveldb::DB* db;
-  leveldb::Options options;
-  options.error_if_exists = true;
-  options.create_if_missing = true;
-  options.write_buffer_size = 268435456;
-  leveldb::WriteBatch* batch = NULL;
-
-  // Open db
-  if (db_backend == "leveldb") {  // leveldb
-    LOG(INFO) << "Opening leveldb " << db_path;
-    leveldb::Status status = leveldb::DB::Open(
-        options, db_path, &db);
-    CHECK(status.ok()) << "Failed to open leveldb " << db_path
-        << ". Is it already existing?";
-    batch = new leveldb::WriteBatch();
-  } else if (db_backend == "lmdb") {  // lmdb
-    LOG(INFO) << "Opening lmdb " << db_path;
-    CHECK_EQ(mkdir(db_path, 0744), 0)
-        << "mkdir " << db_path << "failed";
-    CHECK_EQ(mdb_env_create(&mdb_env), MDB_SUCCESS) << "mdb_env_create failed";
-    CHECK_EQ(mdb_env_set_mapsize(mdb_env, 1099511627776), MDB_SUCCESS)  // 1TB
-        << "mdb_env_set_mapsize failed";
-    CHECK_EQ(mdb_env_open(mdb_env, db_path, 0, 0664), MDB_SUCCESS)
-        << "mdb_env_open failed";
-    CHECK_EQ(mdb_txn_begin(mdb_env, NULL, 0, &mdb_txn), MDB_SUCCESS)
-        << "mdb_txn_begin failed";
-    CHECK_EQ(mdb_open(mdb_txn, NULL, 0, &mdb_dbi), MDB_SUCCESS)
-        << "mdb_open failed. Does the lmdb already exist? ";
-  } else {
-    LOG(FATAL) << "Unknown db backend " << db_backend;
-  }
+
+  scoped_ptr<db::DB> db(db::GetDB(db_backend));
+  db->Open(db_path, db::NEW);
+  scoped_ptr<db::Transaction> txn(db->NewTransaction());
 
   // Storing to db
   char label;
@@ -125,52 +95,19 @@ void convert_dataset(const char* image_filename, const char* label_filename,
     string key_str = caffe::format_int(item_id, 8);
     datum.SerializeToString(&value);
 
-    // Put in db
-    if (db_backend == "leveldb") {  // leveldb
-      batch->Put(key_str, value);
-    } else if (db_backend == "lmdb") {  // lmdb
-      mdb_data.mv_size = value.size();
-      mdb_data.mv_data = reinterpret_cast<void*>(&value[0]);
-      mdb_key.mv_size = key_str.size();
-      mdb_key.mv_data = reinterpret_cast<void*>(&key_str[0]);
-      CHECK_EQ(mdb_put(mdb_txn, mdb_dbi, &mdb_key, &mdb_data, 0), MDB_SUCCESS)
-          << "mdb_put failed";
-    } else {
-      LOG(FATAL) << "Unknown db backend " << db_backend;
-    }
+    txn->Put(key_str, value);
 
     if (++count % 1000 == 0) {
-      // Commit txn
-      if (db_backend == "leveldb") {  // leveldb
-        db->Write(leveldb::WriteOptions(), batch);
-        delete batch;
-        batch = new leveldb::WriteBatch();
-      } else if (db_backend == "lmdb") {  // lmdb
-        CHECK_EQ(mdb_txn_commit(mdb_txn), MDB_SUCCESS)
-            << "mdb_txn_commit failed";
-        CHECK_EQ(mdb_txn_begin(mdb_env, NULL, 0, &mdb_txn), MDB_SUCCESS)
-            << "mdb_txn_begin failed";
-      } else {
-        LOG(FATAL) << "Unknown db backend " << db_backend;
-      }
+      txn->Commit();
     }
   }
   // write the last batch
   if (count % 1000 != 0) {
-    if (db_backend == "leveldb") {  // leveldb
-      db->Write(leveldb::WriteOptions(), batch);
-      delete batch;
-      delete db;
-    } else if (db_backend == "lmdb") {  // lmdb
-      CHECK_EQ(mdb_txn_commit(mdb_txn), MDB_SUCCESS) << "mdb_txn_commit failed";
-      mdb_close(mdb_env, mdb_dbi);
-      mdb_env_close(mdb_env);
-    } else {
-      LOG(FATAL) << "Unknown db backend " << db_backend;
-    }
-    LOG(ERROR) << "Processed " << count << " files.";
+      txn->Commit();
   }
+  LOG(INFO) << "Processed " << count << " files.";
   delete[] pixels;
+  db->Close();
 }
 
 int main(int argc, char** argv) {

From bff14b47c58cffa28a71b9e3caba93da2354ab07 Mon Sep 17 00:00:00 2001
From: HeGaoYuan <273230305@qq.com>
Date: Sat, 23 Apr 2016 14:48:41 +0800
Subject: [PATCH 033/264] Fixed #4029: test the network every 500 iterations,
 not 1000 iterations

---
 examples/mnist/readme.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/mnist/readme.md b/examples/mnist/readme.md
index b87a0f53c7a..35952155a30 100644
--- a/examples/mnist/readme.md
+++ b/examples/mnist/readme.md
@@ -248,7 +248,7 @@ These messages tell you the details about each layer, its connections and its ou
     I1203 solver.cpp:36] Solver scaffolding done.
     I1203 solver.cpp:44] Solving LeNet
 
-Based on the solver setting, we will print the training loss function every 100 iterations, and test the network every 1000 iterations. You will see messages like this:
+Based on the solver setting, we will print the training loss function every 100 iterations, and test the network every 500 iterations. You will see messages like this:
 
     I1203 solver.cpp:204] Iteration 100, lr = 0.00992565
     I1203 solver.cpp:66] Iteration 100, loss = 0.26044

From 0e145c5af91bf42e20cf8c8a295816b06905ee4e Mon Sep 17 00:00:00 2001
From: ebadawy <ehalbadawy93@gmail.com>
Date: Sun, 24 Apr 2016 20:24:41 +0200
Subject: [PATCH 034/264] Read the data as a binary

Appending 'b' in the file mode as hashlib functions require to pass in bytes
---
 scripts/download_model_binary.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/download_model_binary.py b/scripts/download_model_binary.py
index 66f72f2477e..fcdbb5a91a2 100755
--- a/scripts/download_model_binary.py
+++ b/scripts/download_model_binary.py
@@ -60,7 +60,7 @@ def valid_dirname(dirname):
 
     # Closure-d function for checking SHA1.
     def model_checks_out(filename=model_filename, sha1=frontmatter['sha1']):
-        with open(filename, 'r') as f:
+        with open(filename, 'rb') as f:
             return hashlib.sha1(f.read()).hexdigest() == sha1
 
     # Check if model exists.

From 8619fbb90f2b5546ea8cb7c4021216d978d4cbc4 Mon Sep 17 00:00:00 2001
From: Sammy Sidhu <sammy.sidhu@gmail.com>
Date: Wed, 27 Apr 2016 03:05:30 -0700
Subject: [PATCH 035/264] fixed typo in download script command
 cpp_classification

---
 examples/cpp_classification/readme.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/cpp_classification/readme.md b/examples/cpp_classification/readme.md
index a086db1a035..0de2885b53c 100644
--- a/examples/cpp_classification/readme.md
+++ b/examples/cpp_classification/readme.md
@@ -42,7 +42,7 @@ script:
 The ImageNet labels file (also called the *synset file*) is also
 required in order to map a prediction to the name of the class:
 ```
-./data/ilsvrc12/get_ilsvrc_aux.sh.
+./data/ilsvrc12/get_ilsvrc_aux.sh
 ```
 Using the files that were downloaded, we can classify the provided cat
 image (`examples/images/cat.jpg`) using this command:

From 859cf6e1c3f965b4029b7940b861038031014ed7 Mon Sep 17 00:00:00 2001
From: Kun Wang <wk910930@gmail.com>
Date: Wed, 27 Apr 2016 21:09:31 +0800
Subject: [PATCH 036/264] Fix an error in the example of ReshapeParameter.

* this small mistake may confuse newer.
---
 src/caffe/proto/caffe.proto | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 650c87ae3a6..ea40e60aa34 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -987,7 +987,7 @@ message ReshapeParameter {
   //   reshape_param { shape { dim:  2  dim: 2  dim:  4 } }
   //   reshape_param { shape { dim:  0  dim: 2  dim:  4 } }
   //   reshape_param { shape { dim:  0  dim: 2  dim: -1 } }
-  //   reshape_param { shape { dim: -1  dim: 0  dim:  2 } }
+  //   reshape_param { shape { dim:  0  dim:-1  dim:  4 } }
   //
   optional BlobShape shape = 1;
 

From 8714b53719165e42f7844126f671f32ecc9b2e2f Mon Sep 17 00:00:00 2001
From: Drew Abbot <drewabbot@gmail.com>
Date: Wed, 27 Apr 2016 23:25:09 -0700
Subject: [PATCH 037/264] avoid non-integer array indices

---
 python/caffe/classifier.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/caffe/classifier.py b/python/caffe/classifier.py
index 537193db8f8..ea29fed86f9 100644
--- a/python/caffe/classifier.py
+++ b/python/caffe/classifier.py
@@ -79,6 +79,7 @@ def predict(self, inputs, oversample=True):
                 -self.crop_dims / 2.0,
                 self.crop_dims / 2.0
             ])
+            crop = crop.astype(int)
             input_ = input_[:, crop[0]:crop[2], crop[1]:crop[3], :]
 
         # Classify

From 673e8cfc0b8f05f9fa3ebbad7cc6202822e5d9c5 Mon Sep 17 00:00:00 2001
From: Sean Bell <sbell@cs.cornell.edu>
Date: Thu, 28 Apr 2016 13:06:51 -0400
Subject: [PATCH 038/264] Suppress boost registration warnings in pycaffe
 (Based on #3960)

---
 python/caffe/_caffe.cpp | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
index a2c46a123aa..32b5d921094 100644
--- a/python/caffe/_caffe.cpp
+++ b/python/caffe/_caffe.cpp
@@ -26,6 +26,19 @@
 #define PyArray_SetBaseObject(arr, x) (PyArray_BASE(arr) = (x))
 #endif
 
+/* Fix to avoid registration warnings in pycaffe (#3960) */
+#define BP_REGISTER_SHARED_PTR_TO_PYTHON(PTR) do { \
+  const boost::python::type_info info = \
+    boost::python::type_id<shared_ptr<PTR > >(); \
+  const boost::python::converter::registration* reg = \
+    boost::python::converter::registry::query(info); \
+  if (reg == NULL) { \
+    bp::register_ptr_to_python<shared_ptr<PTR > >(); \
+  } else if ((*reg).m_to_python == NULL) { \
+    bp::register_ptr_to_python<shared_ptr<PTR > >(); \
+  } \
+} while (0)
+
 namespace bp = boost::python;
 
 namespace caffe {
@@ -255,7 +268,7 @@ BOOST_PYTHON_MODULE(_caffe) {
     .def("_set_input_arrays", &Net_SetInputArrays,
         bp::with_custodian_and_ward<1, 2, bp::with_custodian_and_ward<1, 3> >())
     .def("save", &Net_Save);
-  bp::register_ptr_to_python<shared_ptr<Net<Dtype> > >();
+  BP_REGISTER_SHARED_PTR_TO_PYTHON(Net<Dtype>);
 
   bp::class_<Blob<Dtype>, shared_ptr<Blob<Dtype> >, boost::noncopyable>(
     "Blob", bp::no_init)
@@ -275,7 +288,7 @@ BOOST_PYTHON_MODULE(_caffe) {
           NdarrayCallPolicies()))
     .add_property("diff",     bp::make_function(&Blob<Dtype>::mutable_cpu_diff,
           NdarrayCallPolicies()));
-  bp::register_ptr_to_python<shared_ptr<Blob<Dtype> > >();
+  BP_REGISTER_SHARED_PTR_TO_PYTHON(Blob<Dtype>);
 
   bp::class_<Layer<Dtype>, shared_ptr<PythonLayer<Dtype> >,
     boost::noncopyable>("Layer", bp::init<const LayerParameter&>())
@@ -284,7 +297,7 @@ BOOST_PYTHON_MODULE(_caffe) {
     .def("setup", &Layer<Dtype>::LayerSetUp)
     .def("reshape", &Layer<Dtype>::Reshape)
     .add_property("type", bp::make_function(&Layer<Dtype>::type));
-  bp::register_ptr_to_python<shared_ptr<Layer<Dtype> > >();
+  BP_REGISTER_SHARED_PTR_TO_PYTHON(Layer<Dtype>);
 
   bp::class_<LayerParameter>("LayerParameter", bp::no_init);
 
@@ -299,7 +312,7 @@ BOOST_PYTHON_MODULE(_caffe) {
     .def("step", &Solver<Dtype>::Step)
     .def("restore", &Solver<Dtype>::Restore)
     .def("snapshot", &Solver<Dtype>::Snapshot);
-  bp::register_ptr_to_python<shared_ptr<Solver<Dtype> > >();
+  BP_REGISTER_SHARED_PTR_TO_PYTHON(Solver<Dtype>);
 
   bp::class_<SGDSolver<Dtype>, bp::bases<Solver<Dtype> >,
     shared_ptr<SGDSolver<Dtype> >, boost::noncopyable>(

From 2da8600acdc922d03b667ef691279cb52c7226ed Mon Sep 17 00:00:00 2001
From: Muneyuki Noguchi <nogu.dev@gmail.com>
Date: Fri, 29 Apr 2016 02:04:02 +0000
Subject: [PATCH 039/264] draw_net: accept prototxt without name

Fixes #3819
---
 python/caffe/draw.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/caffe/draw.py b/python/caffe/draw.py
index cfa3fc5b1fb..61205ca9f37 100644
--- a/python/caffe/draw.py
+++ b/python/caffe/draw.py
@@ -142,7 +142,7 @@ def get_pydot_graph(caffe_net, rankdir, label_edges=True):
     -------
     pydot graph object
     """
-    pydot_graph = pydot.Dot(caffe_net.name,
+    pydot_graph = pydot.Dot(caffe_net.name if caffe_net.name else 'Net',
                             graph_type='digraph',
                             rankdir=rankdir)
     pydot_nodes = {}

From cb3c992a2ae00ec634313a394361214d868f9bd2 Mon Sep 17 00:00:00 2001
From: Sheng Zha <szha@umd.edu>
Date: Sat, 30 Apr 2016 16:40:05 -0700
Subject: [PATCH 040/264] fix grep in CUDA version detection to accomodate
 OSX's grep (and other grep that doesn't support \d extension)

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 5424c3a1858..568d9c2774d 100644
--- a/Makefile
+++ b/Makefile
@@ -272,7 +272,7 @@ endif
 ifeq ($(OSX), 1)
 	CXX := /usr/bin/clang++
 	ifneq ($(CPU_ONLY), 1)
-		CUDA_VERSION := $(shell $(CUDA_DIR)/bin/nvcc -V | grep -o 'release \d' | grep -o '\d')
+		CUDA_VERSION := $(shell $(CUDA_DIR)/bin/nvcc -V | grep -o 'release [0-9.]*' | grep -o '[0-9.]*')
 		ifeq ($(shell echo | awk '{exit $(CUDA_VERSION) < 7.0;}'), 1)
 			CXXFLAGS += -stdlib=libstdc++
 			LINKFLAGS += -stdlib=libstdc++

From 5d423b7a63718decf04bad93a481ebd56291ec7b Mon Sep 17 00:00:00 2001
From: Felix Abecassis <fabecassis@nvidia.com>
Date: Mon, 2 May 2016 16:20:00 -0700
Subject: [PATCH 041/264] Pin the base image version for the GPU Dockerfile

The previous Dockerfile can break if image nvidia/cuda:cudnn is updated to any of the following:
- Ubuntu 16.04 LTS (already released)
- cuDNN v5 (soon)
- CUDA 8.0 (soon)
---
 docker/Makefile                  | 2 +-
 docker/standalone/gpu/Dockerfile | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/Makefile b/docker/Makefile
index 725208c6b2b..0de887d0e19 100644
--- a/docker/Makefile
+++ b/docker/Makefile
@@ -22,7 +22,7 @@ docker_files: standalone_files
 
 standalone_files: standalone/cpu/Dockerfile standalone/gpu/Dockerfile
 
-FROM_GPU = "nvidia/cuda:cudnn"
+FROM_GPU = "nvidia/cuda:7.5-cudnn4-devel-ubuntu14.04"
 FROM_CPU = "ubuntu:14.04"
 GPU_CMAKE_ARGS = -DUSE_CUDNN=1
 CPU_CMAKE_ARGS = -DCPU_ONLY=1
diff --git a/docker/standalone/gpu/Dockerfile b/docker/standalone/gpu/Dockerfile
index 1ddc6560d16..371aad5b1e9 100644
--- a/docker/standalone/gpu/Dockerfile
+++ b/docker/standalone/gpu/Dockerfile
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:cudnn
+FROM nvidia/cuda:7.5-cudnn4-devel-ubuntu14.04
 MAINTAINER caffe-maint@googlegroups.com
 
 RUN apt-get update && apt-get install -y --no-install-recommends \

From c2dba923b82c669f2998a3174310fbbb5c64c39f Mon Sep 17 00:00:00 2001
From: ZhouYzzz <zhouyz9608@gmail.com>
Date: Wed, 4 May 2016 18:00:12 +0800
Subject: [PATCH 042/264] Add test for attribute "phase" in python layer

---
 python/caffe/test/test_python_layer.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/python/caffe/test/test_python_layer.py b/python/caffe/test/test_python_layer.py
index e46b7118014..899514e90f1 100644
--- a/python/caffe/test/test_python_layer.py
+++ b/python/caffe/test/test_python_layer.py
@@ -44,6 +44,18 @@ def forward(self, bottom, top):
     def backward(self, top, propagate_down, bottom):
         self.blobs[0].diff[0] = 1
 
+class PhaseLayer(caffe.Layer):
+    """A layer for checking attribute `phase`"""
+
+    def setup(self, bottom, top):
+        pass
+
+    def reshape(self, bootom, top):
+        top[0].reshape()
+
+    def forward(self, bottom, top):
+        top[0].data[()] = self.phase
+
 def python_net_file():
     with tempfile.NamedTemporaryFile(mode='w+', delete=False) as f:
         f.write("""name: 'pythonnet' force_backward: true
@@ -76,6 +88,14 @@ def parameter_net_file():
           """)
         return f.name
 
+def phase_net_file():
+    with tempfile.NamedTemporaryFile(mode='w+', delete=False) as f:
+        f.write("""name: 'pythonnet' force_backward: true
+        layer { type: 'Python' name: 'layer' top: 'phase'
+          python_param { module: 'test_python_layer' layer: 'PhaseLayer' } }
+          """)
+        return f.name
+
 
 @unittest.skipIf('Python' not in caffe.layer_type_list(),
     'Caffe built without Python layer support')
@@ -140,3 +160,9 @@ def test_parameter(self):
         self.assertEqual(layer.blobs[0].data[0], 1)
 
         os.remove(net_file)
+
+    def test_phase(self):
+        net_file = phase_net_file()
+        for phase in caffe.TRAIN, caffe.TEST:
+            net = caffe.Net(net_file, phase)
+            self.assertEqual(net.forward()['phase'], phase)

From 5acc17a5bfe010d92cc20766f88eff70d4ae92cc Mon Sep 17 00:00:00 2001
From: Achal Dave <achalddave@live.com>
Date: Wed, 4 May 2016 11:51:00 -0400
Subject: [PATCH 043/264] Exit on error and report argument error details.

The statement 'exit' has no effect in Python scripts. Use 'sys.exit()'
instead.
---
 tools/extra/plot_training_log.py.example | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tools/extra/plot_training_log.py.example b/tools/extra/plot_training_log.py.example
index 4d3ed0d15a9..d98c52d33d5 100755
--- a/tools/extra/plot_training_log.py.example
+++ b/tools/extra/plot_training_log.py.example
@@ -160,7 +160,7 @@ Supported chart types:""" % (len(get_supported_chart_types()) - 1,
     num = len(supported_chart_types)
     for i in xrange(num):
         print '    %d: %s' % (i, supported_chart_types[i])
-    exit
+    sys.exit()
 
 def is_valid_chart_type(chart_type):
     return chart_type >= 0 and chart_type < len(get_supported_chart_types())
@@ -171,17 +171,19 @@ if __name__ == '__main__':
     else:
         chart_type = int(sys.argv[1])
         if not is_valid_chart_type(chart_type):
+            print '%s is not a valid chart type.' % chart_type
             print_help()
         path_to_png = sys.argv[2]
         if not path_to_png.endswith('.png'):
             print 'Path must ends with png' % path_to_png
-            exit            
+            sys.exit()
         path_to_logs = sys.argv[3:]
         for path_to_log in path_to_logs:
             if not os.path.exists(path_to_log):
                 print 'Path does not exist: %s' % path_to_log
-                exit
+                sys.exit()
             if not path_to_log.endswith(get_log_file_suffix()):
+                print 'Log file must end in %s.' % get_log_file_suffix()
                 print_help()
         ## plot_chart accpets multiple path_to_logs
         plot_chart(chart_type, path_to_png, path_to_logs)

From 4f22fceda92a0370f21f64d45d71ef3e354a0312 Mon Sep 17 00:00:00 2001
From: Achal Dave <achalddave@live.com>
Date: Wed, 4 May 2016 11:52:06 -0400
Subject: [PATCH 044/264] Remove trailing spaces

---
 tools/extra/plot_training_log.py.example | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/extra/plot_training_log.py.example b/tools/extra/plot_training_log.py.example
index d98c52d33d5..c3b47a81664 100755
--- a/tools/extra/plot_training_log.py.example
+++ b/tools/extra/plot_training_log.py.example
@@ -68,9 +68,9 @@ def get_field_descriptions(chart_type):
         get_chart_type_description_separator())
     y_axis_field = description[0]
     x_axis_field = description[1]
-    return x_axis_field, y_axis_field    
+    return x_axis_field, y_axis_field
 
-def get_field_indecies(x_axis_field, y_axis_field):    
+def get_field_indecies(x_axis_field, y_axis_field):
     data_file_type = get_data_file_type(chart_type)
     fields = create_field_index()[0][data_file_type]
     return fields[x_axis_field], fields[y_axis_field]
@@ -138,8 +138,8 @@ def plot_chart(chart_type, path_to_png, path_to_log_list):
     plt.legend(loc = legend_loc, ncol = 1) # ajust ncol to fit the space
     plt.title(get_chart_type_description(chart_type))
     plt.xlabel(x_axis_field)
-    plt.ylabel(y_axis_field)  
-    plt.savefig(path_to_png)     
+    plt.ylabel(y_axis_field)
+    plt.savefig(path_to_png)
     plt.show()
 
 def print_help():
@@ -164,7 +164,7 @@ Supported chart types:""" % (len(get_supported_chart_types()) - 1,
 
 def is_valid_chart_type(chart_type):
     return chart_type >= 0 and chart_type < len(get_supported_chart_types())
-  
+
 if __name__ == '__main__':
     if len(sys.argv) < 4:
         print_help()

From 938918c3f5d0a1a738d2229a337774cea92be95a Mon Sep 17 00:00:00 2001
From: Achal Dave <achalddave@live.com>
Date: Wed, 4 May 2016 11:55:43 -0400
Subject: [PATCH 045/264] Reformat to fit in 79 columns

---
 tools/extra/plot_training_log.py.example | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/extra/plot_training_log.py.example b/tools/extra/plot_training_log.py.example
index c3b47a81664..3ea66e38075 100755
--- a/tools/extra/plot_training_log.py.example
+++ b/tools/extra/plot_training_log.py.example
@@ -10,7 +10,8 @@ import matplotlib.legend as lgd
 import matplotlib.markers as mks
 
 def get_log_parsing_script():
-    dirname = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+    dirname = os.path.dirname(os.path.abspath(inspect.getfile(
+        inspect.currentframe())))
     return dirname + '/parse_log.sh'
 
 def get_log_file_suffix():
@@ -61,7 +62,8 @@ def get_data_file_type(chart_type):
     return data_file_type
 
 def get_data_file(chart_type, path_to_log):
-    return os.path.basename(path_to_log) + '.' + get_data_file_type(chart_type).lower()
+    return (os.path.basename(path_to_log) + '.' +
+            get_data_file_type(chart_type).lower())
 
 def get_field_descriptions(chart_type):
     description = get_chart_type_description(chart_type).split(

From c2656f0bc7e1f51b4a82a79e7a5516f0f1fb012f Mon Sep 17 00:00:00 2001
From: Achal Dave <achalddave@live.com>
Date: Wed, 4 May 2016 11:56:05 -0400
Subject: [PATCH 046/264] Fix typo (indecies->indices)

---
 tools/extra/plot_training_log.py.example | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/extra/plot_training_log.py.example b/tools/extra/plot_training_log.py.example
index 3ea66e38075..79924ae5a5a 100755
--- a/tools/extra/plot_training_log.py.example
+++ b/tools/extra/plot_training_log.py.example
@@ -72,7 +72,7 @@ def get_field_descriptions(chart_type):
     x_axis_field = description[1]
     return x_axis_field, y_axis_field
 
-def get_field_indecies(x_axis_field, y_axis_field):
+def get_field_indices(x_axis_field, y_axis_field):
     data_file_type = get_data_file_type(chart_type)
     fields = create_field_index()[0][data_file_type]
     return fields[x_axis_field], fields[y_axis_field]
@@ -113,7 +113,7 @@ def plot_chart(chart_type, path_to_png, path_to_log_list):
         os.system('%s %s' % (get_log_parsing_script(), path_to_log))
         data_file = get_data_file(chart_type, path_to_log)
         x_axis_field, y_axis_field = get_field_descriptions(chart_type)
-        x, y = get_field_indecies(x_axis_field, y_axis_field)
+        x, y = get_field_indices(x_axis_field, y_axis_field)
         data = load_data(data_file, x, y)
         ## TODO: more systematic color cycle for lines
         color = [random.random(), random.random(), random.random()]

From e6fc797f3be59a12f26d247e2f1f79bf7d8086c4 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Wed, 4 May 2016 13:31:35 -0700
Subject: [PATCH 047/264] [build] note that `make clean` clears build and
 distribute dirs

---
 Makefile.config.example | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Makefile.config.example b/Makefile.config.example
index 8fd49c9c1a7..07bed63ae40 100644
--- a/Makefile.config.example
+++ b/Makefile.config.example
@@ -98,6 +98,7 @@ LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib
 # (Usually not necessary -- OpenCV libraries are normally installed in one of the above $LIBRARY_DIRS.)
 # USE_PKG_CONFIG := 1
 
+# N.B. both build and distribute dirs are cleared on `make clean`
 BUILD_DIR := build
 DISTRIBUTE_DIR := distribute
 

From c419f8517b1e1b3d7a07fe212fc6c90a70b519ea Mon Sep 17 00:00:00 2001
From: Jonathan L Long <jonlong@cs.berkeley.edu>
Date: Thu, 9 Jul 2015 15:49:48 -0700
Subject: [PATCH 048/264] add parameter layer for learning any bottom

---
 include/caffe/layers/parameter_layer.hpp | 45 ++++++++++++++++++++++++
 src/caffe/layers/parameter_layer.cpp     |  8 +++++
 src/caffe/proto/caffe.proto              |  7 +++-
 3 files changed, 59 insertions(+), 1 deletion(-)
 create mode 100644 include/caffe/layers/parameter_layer.hpp
 create mode 100644 src/caffe/layers/parameter_layer.cpp

diff --git a/include/caffe/layers/parameter_layer.hpp b/include/caffe/layers/parameter_layer.hpp
new file mode 100644
index 00000000000..188b92acbe2
--- /dev/null
+++ b/include/caffe/layers/parameter_layer.hpp
@@ -0,0 +1,45 @@
+#ifndef CAFFE_PARAMETER_LAYER_HPP_
+#define CAFFE_PARAMETER_LAYER_HPP_
+
+#include <vector>
+
+#include "caffe/layer.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+class ParameterLayer : public Layer<Dtype> {
+ public:
+  explicit ParameterLayer(const LayerParameter& param)
+      : Layer<Dtype>(param) {}
+  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+    if (this->blobs_.size() > 0) {
+      LOG(INFO) << "Skipping parameter initialization";
+    } else {
+      this->blobs_.resize(1);
+      this->blobs_[0].reset(new Blob<Dtype>());
+      this->blobs_[0]->Reshape(this->layer_param_.parameter_param().shape());
+    }
+    top[0]->Reshape(this->layer_param_.parameter_param().shape());
+  }
+  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) { }
+  virtual inline const char* type() const { return "Parameter"; }
+  virtual inline int ExactNumBottomBlobs() const { return 0; }
+  virtual inline int ExactNumTopBlobs() const { return 1; }
+
+ protected:
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+    top[0]->ShareData(*(this->blobs_[0]));
+    top[0]->ShareDiff(*(this->blobs_[0]));
+  }
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom)
+  { }
+};
+
+}  // namespace caffe
+
+#endif
diff --git a/src/caffe/layers/parameter_layer.cpp b/src/caffe/layers/parameter_layer.cpp
new file mode 100644
index 00000000000..fbd326f8469
--- /dev/null
+++ b/src/caffe/layers/parameter_layer.cpp
@@ -0,0 +1,8 @@
+#include "caffe/layers/parameter_layer.hpp"
+
+namespace caffe {
+
+INSTANTIATE_CLASS(ParameterLayer);
+REGISTER_LAYER_CLASS(Parameter);
+
+}  // namespace caffe
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index ea40e60aa34..15810718631 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -306,7 +306,7 @@ message ParamSpec {
 // NOTE
 // Update the next available ID when you add a new LayerParameter field.
 //
-// LayerParameter next available layer-specific ID: 145 (last added: crop_param)
+// LayerParameter next available layer-specific ID: 146 (last added: parameter_param)
 message LayerParameter {
   optional string name = 1; // the layer name
   optional string type = 2; // the layer type
@@ -385,6 +385,7 @@ message LayerParameter {
   optional LRNParameter lrn_param = 118;
   optional MemoryDataParameter memory_data_param = 119;
   optional MVNParameter mvn_param = 120;
+  optional ParameterParameter parameter_param = 145;
   optional PoolingParameter pooling_param = 121;
   optional PowerParameter power_param = 122;
   optional PReLUParameter prelu_param = 131;
@@ -873,6 +874,10 @@ message MVNParameter {
   optional float eps = 3 [default = 1e-9];
 }
 
+message ParameterParameter {
+  optional BlobShape shape = 1;
+}
+
 message PoolingParameter {
   enum PoolMethod {
     MAX = 0;

From 4e690b22ae30b0d483ccbe971007f2c6732cceb0 Mon Sep 17 00:00:00 2001
From: crazytan <tjtanjia.tan@gmail.com>
Date: Thu, 28 Apr 2016 18:45:13 -0400
Subject: [PATCH 049/264] fix problems in net_surgery.ipynb

---
 examples/net_surgery.ipynb | 45 +++++++++++++++-----------------------
 1 file changed, 18 insertions(+), 27 deletions(-)

diff --git a/examples/net_surgery.ipynb b/examples/net_surgery.ipynb
index a6092db0c40..d50d503bfe0 100644
--- a/examples/net_surgery.ipynb
+++ b/examples/net_surgery.ipynb
@@ -22,7 +22,6 @@
     "import numpy as np\n",
     "import matplotlib.pyplot as plt\n",
     "%matplotlib inline\n",
-    "import Image\n",
     "\n",
     "# Make sure that caffe is on the python path:\n",
     "caffe_root = '../'  # this file is expected to be in {caffe_root}/examples\n",
@@ -3511,7 +3510,7 @@
     "print(\"blobs {}\\nparams {}\".format(net.blobs.keys(), net.params.keys()))\n",
     "\n",
     "# load image and prepare as a single input batch for Caffe\n",
-    "im = np.array(Image.open('images/cat_gray.jpg'))\n",
+    "im = np.array(caffe.io.load_image('images/cat_gray.jpg', color=False)).squeeze()\n",
     "plt.title(\"original image\")\n",
     "plt.imshow(im)\n",
     "plt.axis('off')\n",
@@ -4480,8 +4479,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "pre-surgery output mean -12.93\n",
-      "post-surgery output mean -11.93\n"
+      "pre-surgery output mean -0.02\n",
+      "post-surgery output mean 0.98\n"
      ]
     }
    ],
@@ -4489,7 +4488,7 @@
     "# pick first filter output\n",
     "conv0 = net.blobs['conv'].data[0, 0]\n",
     "print(\"pre-surgery output mean {:.2f}\".format(conv0.mean()))\n",
-    "# set first filter bias to 10\n",
+    "# set first filter bias to 1\n",
     "net.params['conv'][1].data[0] = 1.\n",
     "net.forward()\n",
     "print(\"post-surgery output mean {:.2f}\".format(conv0.mean()))"
@@ -5494,13 +5493,12 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "1,2c1,2\r\n",
+      "1,2c1\r\n",
       "< # Fully convolutional network version of CaffeNet.\r\n",
       "< name: \"CaffeNetConv\"\r\n",
       "---\r\n",
       "> name: \"CaffeNet\"\r\n",
-      "> input: \"data\"\r\n",
-      "7,11c7\r\n",
+      "7,11c6\r\n",
       "<   input_param {\r\n",
       "<     # initial shape for a fully convolutional network:\r\n",
       "<     # the shape can be set for each input by reshape.\r\n",
@@ -5508,33 +5506,33 @@
       "<   }\r\n",
       "---\r\n",
       ">   input_param { shape: { dim: 10 dim: 3 dim: 227 dim: 227 } }\r\n",
-      "157,158c153,154\r\n",
+      "157,158c152,153\r\n",
       "<   name: \"fc6-conv\"\r\n",
       "<   type: \"Convolution\"\r\n",
       "---\r\n",
       ">   name: \"fc6\"\r\n",
       ">   type: \"InnerProduct\"\r\n",
-      "160,161c156,157\r\n",
+      "160,161c155,156\r\n",
       "<   top: \"fc6-conv\"\r\n",
       "<   convolution_param {\r\n",
       "---\r\n",
       ">   top: \"fc6\"\r\n",
       ">   inner_product_param {\r\n",
-      "163d158\r\n",
+      "163d157\r\n",
       "<     kernel_size: 6\r\n",
-      "169,170c164,165\r\n",
+      "169,170c163,164\r\n",
       "<   bottom: \"fc6-conv\"\r\n",
       "<   top: \"fc6-conv\"\r\n",
       "---\r\n",
       ">   bottom: \"fc6\"\r\n",
       ">   top: \"fc6\"\r\n",
-      "175,176c170,171\r\n",
+      "175,176c169,170\r\n",
       "<   bottom: \"fc6-conv\"\r\n",
       "<   top: \"fc6-conv\"\r\n",
       "---\r\n",
       ">   bottom: \"fc6\"\r\n",
       ">   top: \"fc6\"\r\n",
-      "182,186c177,181\r\n",
+      "182,186c176,180\r\n",
       "<   name: \"fc7-conv\"\r\n",
       "<   type: \"Convolution\"\r\n",
       "<   bottom: \"fc6-conv\"\r\n",
@@ -5546,21 +5544,21 @@
       ">   bottom: \"fc6\"\r\n",
       ">   top: \"fc7\"\r\n",
       ">   inner_product_param {\r\n",
-      "188d182\r\n",
+      "188d181\r\n",
       "<     kernel_size: 1\r\n",
-      "194,195c188,189\r\n",
+      "194,195c187,188\r\n",
       "<   bottom: \"fc7-conv\"\r\n",
       "<   top: \"fc7-conv\"\r\n",
       "---\r\n",
       ">   bottom: \"fc7\"\r\n",
       ">   top: \"fc7\"\r\n",
-      "200,201c194,195\r\n",
+      "200,201c193,194\r\n",
       "<   bottom: \"fc7-conv\"\r\n",
       "<   top: \"fc7-conv\"\r\n",
       "---\r\n",
       ">   bottom: \"fc7\"\r\n",
       ">   top: \"fc7\"\r\n",
-      "207,211c201,205\r\n",
+      "207,211c200,204\r\n",
       "<   name: \"fc8-conv\"\r\n",
       "<   type: \"Convolution\"\r\n",
       "<   bottom: \"fc7-conv\"\r\n",
@@ -5572,9 +5570,9 @@
       ">   bottom: \"fc7\"\r\n",
       ">   top: \"fc8\"\r\n",
       ">   inner_product_param {\r\n",
-      "213d206\r\n",
+      "213d205\r\n",
       "<     kernel_size: 1\r\n",
-      "219c212\r\n",
+      "219c211\r\n",
       "<   bottom: \"fc8-conv\"\r\n",
       "---\r\n",
       ">   bottom: \"fc8\"\r\n"
@@ -5610,13 +5608,6 @@
     }
    ],
    "source": [
-    "# Make sure that caffe is on the python path:\n",
-    "caffe_root = '../'  # this file is expected to be in {caffe_root}/examples\n",
-    "import sys\n",
-    "sys.path.insert(0, caffe_root + 'python')\n",
-    "\n",
-    "import caffe\n",
-    "\n",
     "# Load the original network and extract the fully connected layers' parameters.\n",
     "net = caffe.Net('../models/bvlc_reference_caffenet/deploy.prototxt', \n",
     "                '../models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel', \n",

From da004d7c4e5d52b701762ecc8e20b4a4544a3457 Mon Sep 17 00:00:00 2001
From: Eric Tzeng <etzeng@eecs.berkeley.edu>
Date: Thu, 5 May 2016 18:29:30 -0700
Subject: [PATCH 050/264] Allow reshaping blobs to size 0.

Also add a test that reshapes a blob to shape (0, 5).
---
 src/caffe/blob.cpp           | 4 +++-
 src/caffe/test/test_blob.cpp | 8 ++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/caffe/blob.cpp b/src/caffe/blob.cpp
index c86fd5d1d94..4a34e4c5856 100644
--- a/src/caffe/blob.cpp
+++ b/src/caffe/blob.cpp
@@ -30,7 +30,9 @@ void Blob<Dtype>::Reshape(const vector<int>& shape) {
   int* shape_data = static_cast<int*>(shape_data_->mutable_cpu_data());
   for (int i = 0; i < shape.size(); ++i) {
     CHECK_GE(shape[i], 0);
-    CHECK_LE(shape[i], INT_MAX / count_) << "blob size exceeds INT_MAX";
+    if (count_ != 0) {
+      CHECK_LE(shape[i], INT_MAX / count_) << "blob size exceeds INT_MAX";
+    }
     count_ *= shape[i];
     shape_[i] = shape[i];
     shape_data[i] = shape[i];
diff --git a/src/caffe/test/test_blob.cpp b/src/caffe/test/test_blob.cpp
index a9d7d519e45..b88562223d0 100644
--- a/src/caffe/test/test_blob.cpp
+++ b/src/caffe/test/test_blob.cpp
@@ -51,6 +51,14 @@ TYPED_TEST(BlobSimpleTest, TestReshape) {
   EXPECT_EQ(this->blob_->count(), 120);
 }
 
+TYPED_TEST(BlobSimpleTest, TestReshapeZero) {
+  vector<int> shape(2);
+  shape[0] = 0;
+  shape[1] = 5;
+  this->blob_->Reshape(shape);
+  EXPECT_EQ(this->blob_->count(), 0);
+}
+
 TYPED_TEST(BlobSimpleTest, TestLegacyBlobProtoShapeEquals) {
   BlobProto blob_proto;
 

From 42642936c2c29e539022e33bc0c691564d7e522d Mon Sep 17 00:00:00 2001
From: Luke Yeager <luke.yeager@gmail.com>
Date: Mon, 9 May 2016 11:21:26 -0700
Subject: [PATCH 051/264] Catch MDB_MAP_FULL errors from mdb_txn_commit

---
 src/caffe/util/db_lmdb.cpp | 36 +++++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/src/caffe/util/db_lmdb.cpp b/src/caffe/util/db_lmdb.cpp
index df83a52a633..4567cd7b93a 100644
--- a/src/caffe/util/db_lmdb.cpp
+++ b/src/caffe/util/db_lmdb.cpp
@@ -62,36 +62,42 @@ void LMDBTransaction::Commit() {
   MDB_CHECK(mdb_txn_begin(mdb_env_, NULL, 0, &mdb_txn));
   MDB_CHECK(mdb_dbi_open(mdb_txn, NULL, 0, &mdb_dbi));
 
-  bool out_of_memory = false;
   for (int i = 0; i < keys.size(); i++) {
     mdb_key.mv_size = keys[i].size();
     mdb_key.mv_data = const_cast<char*>(keys[i].data());
     mdb_data.mv_size = values[i].size();
     mdb_data.mv_data = const_cast<char*>(values[i].data());
 
+    // Add data to the transaction
     int put_rc = mdb_put(mdb_txn, mdb_dbi, &mdb_key, &mdb_data, 0);
     if (put_rc == MDB_MAP_FULL) {
-      out_of_memory = true;
-      break;
-    } else {
-      // Failed for some other reason
-      MDB_CHECK(put_rc);
+      // Out of memory - double the map size and retry
+      mdb_txn_abort(mdb_txn);
+      mdb_dbi_close(mdb_env_, mdb_dbi);
+      DoubleMapSize();
+      Commit();
+      return;
     }
+    // May have failed for some other reason
+    MDB_CHECK(put_rc);
   }
 
-  if (!out_of_memory) {
-    // Commit the transaction
-    MDB_CHECK(mdb_txn_commit(mdb_txn));
-    mdb_dbi_close(mdb_env_, mdb_dbi);
-    keys.clear();
-    values.clear();
-  } else {
-    // Double the map size and retry
-    mdb_txn_abort(mdb_txn);
+  // Commit the transaction
+  int commit_rc = mdb_txn_commit(mdb_txn);
+  if (commit_rc == MDB_MAP_FULL) {
+    // Out of memory - double the map size and retry
     mdb_dbi_close(mdb_env_, mdb_dbi);
     DoubleMapSize();
     Commit();
+    return;
   }
+  // May have failed for some other reason
+  MDB_CHECK(commit_rc);
+
+  // Cleanup after successful commit
+  mdb_dbi_close(mdb_env_, mdb_dbi);
+  keys.clear();
+  values.clear();
 }
 
 void LMDBTransaction::DoubleMapSize() {

From a934ca54f3633479ea0573346c510df4f757df6c Mon Sep 17 00:00:00 2001
From: ray glover <ray.glover@uk.ibm.com>
Date: Tue, 10 May 2016 15:44:47 +0100
Subject: [PATCH 052/264] [build] (CMake) customisable Caffe version/soversion

---
 CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c5d99cef9dd..da7142c9b3c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,8 +10,8 @@ endif()
 project(Caffe C CXX)
 
 # ---[ Caffe version
-set(CAFFE_TARGET_VERSION "1.0.0-rc3")
-set(CAFFE_TARGET_SOVERSION "1.0.0-rc3")
+set(CAFFE_TARGET_VERSION "1.0.0-rc3" CACHE STRING "Caffe logical version")
+set(CAFFE_TARGET_SOVERSION "1.0.0-rc3" CACHE STRING "Caffe soname version")
 add_definitions(-DCAFFE_VERSION=${CAFFE_TARGET_VERSION})
 
 # ---[ Using cmake scripts and modules

From bb6ca4720ea41b8e9bdf162f63eb2757571a2e17 Mon Sep 17 00:00:00 2001
From: gdh1995 <gdh1995@qq.com>
Date: Wed, 11 May 2016 20:51:07 +0800
Subject: [PATCH 053/264] a comment misses a space char

---
 src/caffe/util/db_lmdb.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/caffe/util/db_lmdb.cpp b/src/caffe/util/db_lmdb.cpp
index 4567cd7b93a..fb1d4956aa1 100644
--- a/src/caffe/util/db_lmdb.cpp
+++ b/src/caffe/util/db_lmdb.cpp
@@ -10,7 +10,7 @@ namespace caffe { namespace db {
 void LMDB::Open(const string& source, Mode mode) {
   MDB_CHECK(mdb_env_create(&mdb_env_));
   if (mode == NEW) {
-    CHECK_EQ(mkdir(source.c_str(), 0744), 0) << "mkdir " << source << "failed";
+    CHECK_EQ(mkdir(source.c_str(), 0744), 0) << "mkdir " << source << " failed";
   }
   int flags = 0;
   if (mode == READ) {

From 078d9981a2c64b19834decdef3ce3dd032b667c0 Mon Sep 17 00:00:00 2001
From: Kyle Mills <kyle.mills@uoit.net>
Date: Fri, 13 May 2016 11:15:33 -0400
Subject: [PATCH 054/264] fixed typo in io.py

---
 python/caffe/io.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/caffe/io.py b/python/caffe/io.py
index cee5ace2e88..e1759beb587 100644
--- a/python/caffe/io.py
+++ b/python/caffe/io.py
@@ -46,7 +46,7 @@ def array_to_blobproto(arr, diff=None):
     return blob
 
 
-def arraylist_to_blobprotovecor_str(arraylist):
+def arraylist_to_blobprotovector_str(arraylist):
     """Converts a list of arrays to a serialized blobprotovec, which could be
     then passed to a network for processing.
     """

From 87c9dc397081248dd3d40e0dabce191557bcfc15 Mon Sep 17 00:00:00 2001
From: Yale Song <yalesong@yahoo-inc.com>
Date: Fri, 13 May 2016 16:06:59 -0400
Subject: [PATCH 055/264] Fix Makefile CUDA_VERSION extraction on OSX Yosemite

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 568d9c2774d..403e00a38a1 100644
--- a/Makefile
+++ b/Makefile
@@ -272,7 +272,7 @@ endif
 ifeq ($(OSX), 1)
 	CXX := /usr/bin/clang++
 	ifneq ($(CPU_ONLY), 1)
-		CUDA_VERSION := $(shell $(CUDA_DIR)/bin/nvcc -V | grep -o 'release [0-9.]*' | grep -o '[0-9.]*')
+		CUDA_VERSION := $(shell $(CUDA_DIR)/bin/nvcc -V | grep -o 'release [0-9.]*' | tr -d '[a-z ]')
 		ifeq ($(shell echo | awk '{exit $(CUDA_VERSION) < 7.0;}'), 1)
 			CXXFLAGS += -stdlib=libstdc++
 			LINKFLAGS += -stdlib=libstdc++

From e8ec9f806bd0051f2ee8d1d2737afdafe314f9e4 Mon Sep 17 00:00:00 2001
From: Bob Poekert <bob@poekert.com>
Date: Fri, 13 May 2016 22:06:33 -0700
Subject: [PATCH 056/264] add check for background and foreground window size >
 0 in WindowData layer

---
 src/caffe/layers/window_data_layer.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/caffe/layers/window_data_layer.cpp b/src/caffe/layers/window_data_layer.cpp
index 4ca8315d791..103dd4b6af8 100644
--- a/src/caffe/layers/window_data_layer.cpp
+++ b/src/caffe/layers/window_data_layer.cpp
@@ -265,6 +265,9 @@ void WindowDataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
   const int num_samples[2] = { batch_size - num_fg, num_fg };
 
   int item_id = 0;
+  CHECK_GT(fg_windows_.size(), 0);
+  CHECK_GT(bg_windows_.size(), 0);
+
   // sample from bg set then fg set
   for (int is_fg = 0; is_fg < 2; ++is_fg) {
     for (int dummy = 0; dummy < num_samples[is_fg]; ++dummy) {

From b43c8e43a95608a00033f8f8867d32a201e5eed4 Mon Sep 17 00:00:00 2001
From: Felix Abecassis <fabecassis@nvidia.com>
Date: Mon, 16 May 2016 14:03:38 -0700
Subject: [PATCH 057/264] Add cuDNN v5 support, drop cuDNN v3 support

cuDNN v4 is still supported.
---
 include/caffe/layers/cudnn_relu_layer.hpp    |  1 +
 include/caffe/layers/cudnn_sigmoid_layer.hpp |  1 +
 include/caffe/layers/cudnn_tanh_layer.hpp    |  1 +
 include/caffe/util/cudnn.hpp                 | 24 +++++++++++++++++---
 src/caffe/layers/cudnn_conv_layer.cu         | 12 ++--------
 src/caffe/layers/cudnn_relu_layer.cpp        |  1 +
 src/caffe/layers/cudnn_relu_layer.cu         | 23 +++++++++++++++++--
 src/caffe/layers/cudnn_sigmoid_layer.cpp     |  2 ++
 src/caffe/layers/cudnn_sigmoid_layer.cu      | 23 +++++++++++++++++--
 src/caffe/layers/cudnn_tanh_layer.cpp        |  1 +
 src/caffe/layers/cudnn_tanh_layer.cu         | 23 +++++++++++++++++--
 11 files changed, 93 insertions(+), 19 deletions(-)

diff --git a/include/caffe/layers/cudnn_relu_layer.hpp b/include/caffe/layers/cudnn_relu_layer.hpp
index e01f568abc9..a1cb29e7c5f 100644
--- a/include/caffe/layers/cudnn_relu_layer.hpp
+++ b/include/caffe/layers/cudnn_relu_layer.hpp
@@ -37,6 +37,7 @@ class CuDNNReLULayer : public ReLULayer<Dtype> {
   cudnnHandle_t             handle_;
   cudnnTensorDescriptor_t bottom_desc_;
   cudnnTensorDescriptor_t top_desc_;
+  cudnnActivationDescriptor_t activ_desc_;
 };
 #endif
 
diff --git a/include/caffe/layers/cudnn_sigmoid_layer.hpp b/include/caffe/layers/cudnn_sigmoid_layer.hpp
index 9c597958b0b..7b3486f8a7e 100644
--- a/include/caffe/layers/cudnn_sigmoid_layer.hpp
+++ b/include/caffe/layers/cudnn_sigmoid_layer.hpp
@@ -37,6 +37,7 @@ class CuDNNSigmoidLayer : public SigmoidLayer<Dtype> {
   cudnnHandle_t             handle_;
   cudnnTensorDescriptor_t bottom_desc_;
   cudnnTensorDescriptor_t top_desc_;
+  cudnnActivationDescriptor_t activ_desc_;
 };
 #endif
 
diff --git a/include/caffe/layers/cudnn_tanh_layer.hpp b/include/caffe/layers/cudnn_tanh_layer.hpp
index c0f0053f71e..59e758d7031 100644
--- a/include/caffe/layers/cudnn_tanh_layer.hpp
+++ b/include/caffe/layers/cudnn_tanh_layer.hpp
@@ -37,6 +37,7 @@ class CuDNNTanHLayer : public TanHLayer<Dtype> {
   cudnnHandle_t             handle_;
   cudnnTensorDescriptor_t bottom_desc_;
   cudnnTensorDescriptor_t top_desc_;
+  cudnnActivationDescriptor_t activ_desc_;
 };
 #endif
 
diff --git a/include/caffe/util/cudnn.hpp b/include/caffe/util/cudnn.hpp
index 8a7e17c6cd4..a7d8dbbad4c 100644
--- a/include/caffe/util/cudnn.hpp
+++ b/include/caffe/util/cudnn.hpp
@@ -91,8 +91,13 @@ template <typename Dtype>
 inline void createFilterDesc(cudnnFilterDescriptor_t* desc,
     int n, int c, int h, int w) {
   CUDNN_CHECK(cudnnCreateFilterDescriptor(desc));
+#if CUDNN_VERSION_MIN(5, 0, 0)
   CUDNN_CHECK(cudnnSetFilter4dDescriptor(*desc, dataType<Dtype>::type,
-      n, c, h, w));
+      CUDNN_TENSOR_NCHW, n, c, h, w));
+#else
+  CUDNN_CHECK(cudnnSetFilter4dDescriptor_v4(*desc, dataType<Dtype>::type,
+      CUDNN_TENSOR_NCHW, n, c, h, w));
+#endif
 }
 
 template <typename Dtype>
@@ -123,8 +128,21 @@ inline void createPoolingDesc(cudnnPoolingDescriptor_t* pool_desc,
     LOG(FATAL) << "Unknown pooling method.";
   }
   CUDNN_CHECK(cudnnCreatePoolingDescriptor(pool_desc));
-  CUDNN_CHECK(cudnnSetPooling2dDescriptor(*pool_desc, *mode, h, w,
-        pad_h, pad_w, stride_h, stride_w));
+#if CUDNN_VERSION_MIN(5, 0, 0)
+  CUDNN_CHECK(cudnnSetPooling2dDescriptor(*pool_desc, *mode,
+        CUDNN_PROPAGATE_NAN, h, w, pad_h, pad_w, stride_h, stride_w));
+#else
+  CUDNN_CHECK(cudnnSetPooling2dDescriptor_v4(*pool_desc, *mode,
+        CUDNN_PROPAGATE_NAN, h, w, pad_h, pad_w, stride_h, stride_w));
+#endif
+}
+
+template <typename Dtype>
+inline void createActivationDescriptor(cudnnActivationDescriptor_t* activ_desc,
+    cudnnActivationMode_t mode) {
+  CUDNN_CHECK(cudnnCreateActivationDescriptor(activ_desc));
+  CUDNN_CHECK(cudnnSetActivationDescriptor(*activ_desc, mode,
+                                           CUDNN_PROPAGATE_NAN, Dtype(0)));
 }
 
 }  // namespace cudnn
diff --git a/src/caffe/layers/cudnn_conv_layer.cu b/src/caffe/layers/cudnn_conv_layer.cu
index 42c4fd0260c..8bc5346248c 100644
--- a/src/caffe/layers/cudnn_conv_layer.cu
+++ b/src/caffe/layers/cudnn_conv_layer.cu
@@ -30,19 +30,11 @@ void CuDNNConvolutionLayer<Dtype>::Forward_gpu(
       // Bias.
       if (this->bias_term_) {
         const Dtype* bias_data = this->blobs_[1]->gpu_data();
-#if CUDNN_VERSION_MIN(4, 0, 0)
         CUDNN_CHECK(cudnnAddTensor(handle_[g],
               cudnn::dataType<Dtype>::one,
               bias_desc_, bias_data + bias_offset_ * g,
               cudnn::dataType<Dtype>::one,
               top_descs_[i], top_data + top_offset_ * g));
-#else
-        CUDNN_CHECK(cudnnAddTensor(handle_[g], CUDNN_ADD_SAME_C,
-              cudnn::dataType<Dtype>::one,
-              bias_desc_, bias_data + bias_offset_ * g,
-              cudnn::dataType<Dtype>::one,
-              top_descs_[i], top_data + top_offset_ * g));
-#endif
       }
     }
 
@@ -82,7 +74,7 @@ void CuDNNConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
       // Gradient w.r.t. weights.
       if (this->param_propagate_down_[0]) {
         const Dtype* bottom_data = bottom[i]->gpu_data();
-        CUDNN_CHECK(cudnnConvolutionBackwardFilter_v3(
+        CUDNN_CHECK(cudnnConvolutionBackwardFilter(
               handle_[1*this->group_ + g],
               cudnn::dataType<Dtype>::one,
               bottom_descs_[i], bottom_data + bottom_offset_ * g,
@@ -100,7 +92,7 @@ void CuDNNConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
           weight = this->blobs_[0]->gpu_data();
         }
         Dtype* bottom_diff = bottom[i]->mutable_gpu_diff();
-        CUDNN_CHECK(cudnnConvolutionBackwardData_v3(
+        CUDNN_CHECK(cudnnConvolutionBackwardData(
               handle_[2*this->group_ + g],
               cudnn::dataType<Dtype>::one,
               filter_desc_, weight + this->weight_offset_ * g,
diff --git a/src/caffe/layers/cudnn_relu_layer.cpp b/src/caffe/layers/cudnn_relu_layer.cpp
index c86c6907113..795e0a9efb0 100644
--- a/src/caffe/layers/cudnn_relu_layer.cpp
+++ b/src/caffe/layers/cudnn_relu_layer.cpp
@@ -13,6 +13,7 @@ void CuDNNReLULayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   CUDNN_CHECK(cudnnCreate(&handle_));
   cudnn::createTensor4dDesc<Dtype>(&bottom_desc_);
   cudnn::createTensor4dDesc<Dtype>(&top_desc_);
+  cudnn::createActivationDescriptor<Dtype>(&activ_desc_, CUDNN_ACTIVATION_RELU);
   handles_setup_ = true;
 }
 
diff --git a/src/caffe/layers/cudnn_relu_layer.cu b/src/caffe/layers/cudnn_relu_layer.cu
index 9f617183baa..e7928bbd6e0 100644
--- a/src/caffe/layers/cudnn_relu_layer.cu
+++ b/src/caffe/layers/cudnn_relu_layer.cu
@@ -15,12 +15,21 @@ void CuDNNReLULayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
 
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* top_data = top[0]->mutable_gpu_data();
+#if CUDNN_VERSION_MIN(5, 0, 0)
   CUDNN_CHECK(cudnnActivationForward(this->handle_,
-        CUDNN_ACTIVATION_RELU,
+        activ_desc_,
         cudnn::dataType<Dtype>::one,
         this->bottom_desc_, bottom_data,
         cudnn::dataType<Dtype>::zero,
         this->top_desc_, top_data));
+#else
+  CUDNN_CHECK(cudnnActivationForward_v4(this->handle_,
+        activ_desc_,
+        cudnn::dataType<Dtype>::one,
+        this->bottom_desc_, bottom_data,
+        cudnn::dataType<Dtype>::zero,
+        this->top_desc_, top_data));
+#endif
 }
 
 template <typename Dtype>
@@ -40,13 +49,23 @@ void CuDNNReLULayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   const Dtype* top_diff = top[0]->gpu_diff();
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+#if CUDNN_VERSION_MIN(5, 0, 0)
   CUDNN_CHECK(cudnnActivationBackward(this->handle_,
-        CUDNN_ACTIVATION_RELU,
+        activ_desc_,
         cudnn::dataType<Dtype>::one,
         this->top_desc_, top_data, this->top_desc_, top_diff,
         this->bottom_desc_, bottom_data,
         cudnn::dataType<Dtype>::zero,
         this->bottom_desc_, bottom_diff));
+#else
+  CUDNN_CHECK(cudnnActivationBackward_v4(this->handle_,
+        activ_desc_,
+        cudnn::dataType<Dtype>::one,
+        this->top_desc_, top_data, this->top_desc_, top_diff,
+        this->bottom_desc_, bottom_data,
+        cudnn::dataType<Dtype>::zero,
+        this->bottom_desc_, bottom_diff));
+#endif
 }
 
 INSTANTIATE_LAYER_GPU_FUNCS(CuDNNReLULayer);
diff --git a/src/caffe/layers/cudnn_sigmoid_layer.cpp b/src/caffe/layers/cudnn_sigmoid_layer.cpp
index ccb955cdaff..3ce6aef1764 100644
--- a/src/caffe/layers/cudnn_sigmoid_layer.cpp
+++ b/src/caffe/layers/cudnn_sigmoid_layer.cpp
@@ -13,6 +13,8 @@ void CuDNNSigmoidLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   CUDNN_CHECK(cudnnCreate(&handle_));
   cudnn::createTensor4dDesc<Dtype>(&bottom_desc_);
   cudnn::createTensor4dDesc<Dtype>(&top_desc_);
+  cudnn::createActivationDescriptor<Dtype>(&activ_desc_,
+      CUDNN_ACTIVATION_SIGMOID);
   handles_setup_ = true;
 }
 
diff --git a/src/caffe/layers/cudnn_sigmoid_layer.cu b/src/caffe/layers/cudnn_sigmoid_layer.cu
index e2a4b460c6c..48d6cbab6de 100644
--- a/src/caffe/layers/cudnn_sigmoid_layer.cu
+++ b/src/caffe/layers/cudnn_sigmoid_layer.cu
@@ -10,12 +10,21 @@ void CuDNNSigmoidLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
     const vector<Blob<Dtype>*>& top) {
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* top_data = top[0]->mutable_gpu_data();
+#if CUDNN_VERSION_MIN(5, 0, 0)
   CUDNN_CHECK(cudnnActivationForward(this->handle_,
-        CUDNN_ACTIVATION_SIGMOID,
+        activ_desc_,
         cudnn::dataType<Dtype>::one,
         this->bottom_desc_, bottom_data,
         cudnn::dataType<Dtype>::zero,
         this->top_desc_, top_data));
+#else
+  CUDNN_CHECK(cudnnActivationForward_v4(this->handle_,
+        activ_desc_,
+        cudnn::dataType<Dtype>::one,
+        this->bottom_desc_, bottom_data,
+        cudnn::dataType<Dtype>::zero,
+        this->top_desc_, top_data));
+#endif
 }
 
 template <typename Dtype>
@@ -30,13 +39,23 @@ void CuDNNSigmoidLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   const Dtype* top_diff = top[0]->gpu_diff();
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+#if CUDNN_VERSION_MIN(5, 0, 0)
   CUDNN_CHECK(cudnnActivationBackward(this->handle_,
-        CUDNN_ACTIVATION_SIGMOID,
+        activ_desc_,
         cudnn::dataType<Dtype>::one,
         this->top_desc_, top_data, this->top_desc_, top_diff,
         this->bottom_desc_, bottom_data,
         cudnn::dataType<Dtype>::zero,
         this->bottom_desc_, bottom_diff));
+#else
+  CUDNN_CHECK(cudnnActivationBackward_v4(this->handle_,
+        activ_desc_,
+        cudnn::dataType<Dtype>::one,
+        this->top_desc_, top_data, this->top_desc_, top_diff,
+        this->bottom_desc_, bottom_data,
+        cudnn::dataType<Dtype>::zero,
+        this->bottom_desc_, bottom_diff));
+#endif
 }
 
 INSTANTIATE_LAYER_GPU_FUNCS(CuDNNSigmoidLayer);
diff --git a/src/caffe/layers/cudnn_tanh_layer.cpp b/src/caffe/layers/cudnn_tanh_layer.cpp
index 1a56418227c..e87dd9de0ab 100644
--- a/src/caffe/layers/cudnn_tanh_layer.cpp
+++ b/src/caffe/layers/cudnn_tanh_layer.cpp
@@ -13,6 +13,7 @@ void CuDNNTanHLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   CUDNN_CHECK(cudnnCreate(&handle_));
   cudnn::createTensor4dDesc<Dtype>(&bottom_desc_);
   cudnn::createTensor4dDesc<Dtype>(&top_desc_);
+  cudnn::createActivationDescriptor<Dtype>(&activ_desc_, CUDNN_ACTIVATION_TANH);
   handles_setup_ = true;
 }
 
diff --git a/src/caffe/layers/cudnn_tanh_layer.cu b/src/caffe/layers/cudnn_tanh_layer.cu
index 89df28a3e8b..6b5d7ae7ea7 100644
--- a/src/caffe/layers/cudnn_tanh_layer.cu
+++ b/src/caffe/layers/cudnn_tanh_layer.cu
@@ -10,12 +10,21 @@ void CuDNNTanHLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
     const vector<Blob<Dtype>*>& top) {
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* top_data = top[0]->mutable_gpu_data();
+#if CUDNN_VERSION_MIN(5, 0, 0)
   CUDNN_CHECK(cudnnActivationForward(this->handle_,
-        CUDNN_ACTIVATION_TANH,
+        activ_desc_,
         cudnn::dataType<Dtype>::one,
         this->bottom_desc_, bottom_data,
         cudnn::dataType<Dtype>::zero,
         this->top_desc_, top_data));
+#else
+  CUDNN_CHECK(cudnnActivationForward_v4(this->handle_,
+        activ_desc_,
+        cudnn::dataType<Dtype>::one,
+        this->bottom_desc_, bottom_data,
+        cudnn::dataType<Dtype>::zero,
+        this->top_desc_, top_data));
+#endif
 }
 
 template <typename Dtype>
@@ -31,13 +40,23 @@ void CuDNNTanHLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
 
+#if CUDNN_VERSION_MIN(5, 0, 0)
   CUDNN_CHECK(cudnnActivationBackward(this->handle_,
-        CUDNN_ACTIVATION_TANH,
+        activ_desc_,
         cudnn::dataType<Dtype>::one,
         this->top_desc_, top_data, this->top_desc_, top_diff,
         this->bottom_desc_, bottom_data,
         cudnn::dataType<Dtype>::zero,
         this->bottom_desc_, bottom_diff));
+#else
+  CUDNN_CHECK(cudnnActivationBackward_v4(this->handle_,
+        activ_desc_,
+        cudnn::dataType<Dtype>::one,
+        this->top_desc_, top_data, this->top_desc_, top_diff,
+        this->bottom_desc_, bottom_data,
+        cudnn::dataType<Dtype>::zero,
+        this->bottom_desc_, bottom_diff));
+#endif
 }
 
 INSTANTIATE_LAYER_GPU_FUNCS(CuDNNTanHLayer);

From 8730b146b7e19af189b9086e59fd1d5bc4214698 Mon Sep 17 00:00:00 2001
From: Felix Abecassis <fabecassis@nvidia.com>
Date: Mon, 16 May 2016 14:32:34 -0700
Subject: [PATCH 058/264] Update Dockerfile to cuDNN v5

---
 docker/Makefile                  | 2 +-
 docker/standalone/gpu/Dockerfile | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/Makefile b/docker/Makefile
index 0de887d0e19..3a6575b0c43 100644
--- a/docker/Makefile
+++ b/docker/Makefile
@@ -22,7 +22,7 @@ docker_files: standalone_files
 
 standalone_files: standalone/cpu/Dockerfile standalone/gpu/Dockerfile
 
-FROM_GPU = "nvidia/cuda:7.5-cudnn4-devel-ubuntu14.04"
+FROM_GPU = "nvidia/cuda:7.5-cudnn5-devel-ubuntu14.04"
 FROM_CPU = "ubuntu:14.04"
 GPU_CMAKE_ARGS = -DUSE_CUDNN=1
 CPU_CMAKE_ARGS = -DCPU_ONLY=1
diff --git a/docker/standalone/gpu/Dockerfile b/docker/standalone/gpu/Dockerfile
index 371aad5b1e9..daf6a7223ff 100644
--- a/docker/standalone/gpu/Dockerfile
+++ b/docker/standalone/gpu/Dockerfile
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:7.5-cudnn4-devel-ubuntu14.04
+FROM nvidia/cuda:7.5-cudnn5-devel-ubuntu14.04
 MAINTAINER caffe-maint@googlegroups.com
 
 RUN apt-get update && apt-get install -y --no-install-recommends \

From 1c3af7078b64ef71a5bb0c2cef6fee528917adac Mon Sep 17 00:00:00 2001
From: Felix Abecassis <fabecassis@nvidia.com>
Date: Mon, 16 May 2016 14:35:40 -0700
Subject: [PATCH 059/264] Update supported cuDNN version in the documentation

---
 docs/installation.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/installation.md b/docs/installation.md
index 1e29a49d82d..4aac7c42d27 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -40,14 +40,14 @@ Optional dependencies:
 
 * [OpenCV](http://opencv.org/) >= 2.4 including 3.0
 * IO libraries: `lmdb`, `leveldb` (note: leveldb requires `snappy`)
-* cuDNN for GPU acceleration (v4)
+* cuDNN for GPU acceleration (v5)
 
 Pycaffe and Matcaffe interfaces have their own natural needs.
 
 * For Python Caffe:  `Python 2.7` or `Python 3.3+`, `numpy (>= 1.7)`, boost-provided `boost.python`
 * For MATLAB Caffe: MATLAB with the `mex` compiler.
 
-**cuDNN Caffe**: for fastest operation Caffe is accelerated by drop-in integration of [NVIDIA cuDNN](https://developer.nvidia.com/cudnn). To speed up your Caffe models, install cuDNN then uncomment the `USE_CUDNN := 1` flag in `Makefile.config` when installing Caffe. Acceleration is automatic. The current version is cuDNN v4; older versions are supported in older Caffe.
+**cuDNN Caffe**: for fastest operation Caffe is accelerated by drop-in integration of [NVIDIA cuDNN](https://developer.nvidia.com/cudnn). To speed up your Caffe models, install cuDNN then uncomment the `USE_CUDNN := 1` flag in `Makefile.config` when installing Caffe. Acceleration is automatic. The current version is cuDNN v5; older versions are supported in older Caffe.
 
 **CPU-only Caffe**: for cold-brewed CPU-only Caffe uncomment the `CPU_ONLY := 1` flag in `Makefile.config` to configure and build Caffe without CUDA. This is helpful for cloud or cluster deployment.
 

From a8cc860d6bef79edcdfa07d5da4195ba67714991 Mon Sep 17 00:00:00 2001
From: crazytan <tjtanjia.tan@gmail.com>
Date: Wed, 27 Apr 2016 01:01:30 -0400
Subject: [PATCH 060/264] handle image names with spaces

---
 examples/images/cat gray.jpg             | Bin 0 -> 92726 bytes
 src/caffe/layers/image_data_layer.cpp    |   9 +++--
 src/caffe/test/test_image_data_layer.cpp |  44 +++++++++++++++++++++--
 tools/convert_imageset.cpp               |   9 +++--
 4 files changed, 53 insertions(+), 9 deletions(-)
 create mode 100644 examples/images/cat gray.jpg

diff --git a/examples/images/cat gray.jpg b/examples/images/cat gray.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..43c5ce377167a49624c3e9d61e83a04becb8df9e
GIT binary patch
literal 92726
zcmb5VWmsEJw+9-$P+W@!EyW!Q0aDzGJ1s7S76>lIrC4#70;NE4inIhPQrt?>0s#U9
zcefC@`M>vl&p98?bMA8|`@`NlOJ<F%S@WB<ALbrb0Ho^5YRUi%3=DuO`UiMe1w2vm
zb8-Lx)YP~Ej{yH&9#*lc9lgBVqyz+9J^8KeAU5{=wh$KqKWjGuA$~ytfHc_8&D!>z
zy%)2M{TnA&8P=n=PF7|oI~i7EF-<{DHwAk~C)M{J_WJL&3~b-Ovz4@C1<Nu^`$_q^
zxVhMSSu^{&IJ<gE`N^>UOSlyJ{-4(Ztjzx+@p>o2D)*08W)n>vW(A0cJ+m0U2%oK>
zkPx%DIKPmnn7FtQFSD?qkg$Lt`V!|85|t7YmlBp>{&!(TYxA&kkkV6B{<kjllML&>
zjq>&N<@Xighj_dZ5R#OX{6~hcFdv$N&(q)4%i536)syW%5)|z{Z9SaayqqAe%>PKV
zwt;wi$*`hp`v02X;-=^H#?j03zl8s<E#~nrnt$y350|Fq|IOjz@?X@RUV8TbUHSi2
zzNdk|o4tUZy(h%m!`2>6&h|f4{ojiIA^f)uQkvEt@9fdNsBG=*Y5%W3G&QBP(Uo^{
zwRTp7*m}FzyLzc9%CJ5c6%?0~5SNrt6jqWKlY6e9tRN^Nt}G!aFDNW3A*S#j-v7yi
zR>N=SWG5vgrz9jOCLyONDJL!@q$KiOSV&A+Szbs`K?vQUV#5F7Rde<9vUat#|3}|H
zyiWhkEBSx&N-22QTYEt~3?LBa{|qD@M~D~1(-Gpvte`K#Z0zJ}2l4g%r;pK+(KagD
zdpP;n+bMfMT$umW+)_^ei*`Yw=jb6QAu1>#EGYh5Na(q+u$;V-g0P~pyqKuaa}m~m
z^V<FYX(<78B?SH%S^s0C{pS^W`2KVIU-Jh2@Lw~}-WA<Z9_V@X00SueGpqkU3nnH8
z2KvIl{7?DsJIseJz~e_4ju<N#7-WElUH}jPKvQ9%ssG~^@E+mdV&h|C5nuowqY2bW
z(F7P+SXh|Y=z@idL4<*cg^hztMh>80!DAJolzYS`tV<<t?T)W!6GqK0lHyTRPs5=Q
zp4v?yI&mzfsQ+d4#J1tr<ii5s(LY+ifRkFu$^wQaA9^9Iq`HH3VDx2D!tLPV;6DAL
z#1#ogUdYtR38I$VSdGO*e&0e6fXxpN3R^7XkiPR?A}E><KXwUY4}FA9b{jkpl7xEy
zI|gZ-d#0HkNEG@2;Nm=LzDC_POioF7Jgd-bOOx1o#4hvI8!=+%A$|{`e*i#sX5S+k
zsNSeTW;4IXY>FSTBxQxm8QC@_3<olml~lEScrH6AVIy&O1cw0e#?A4~D6@V^)zcGi
zUop13K4P=o+8Q@M>BJ8tgQk`gb_y_Q!$<sJ6|bm}6FgjcZ1h+wq4XgeprfV-z}TNO
zBPWDVs^1saHy&)nKBzlCSQpm7fP#|vUK_jK%XoZQ+NgDx^}C}wR0o!~ch4?EzYM~=
z-s%HEb<unnb2t`{c+Vy5XbP`bM&;)iwc0-MVwmbu7jtm!LL<c)@&*jm;*b1GZlVO#
zVG?`M_wqB88IM^4n+tWh`v9o^p-8^UemApT?_8v{U&x-;0lT}X{-z`BsgX;SDjD~q
zW+v1MUtVhh8MIeFmjtiQHL1>TE)FS8AuW$@<RNBu#g~rGe*nO%_kGCU&mun69rhe4
zv>-_7A++mwn^;>R;Zhx@Be19}CWQF#qd6M(01b>0n*l^{#0^#IOpfzwK5k@l6?s{B
zCO7c|0GlT%30{7q(;G&Lqof1SQ@>Bu26ytcGj{D-YRn98xMc77#uLDEgdx4u(xCvd
zHj)-3aeR$dKkK6fTEF!8Q{J(<dHRK#ksfDo{jo2qTWad{aKo6HWK)i5gt9n&VL}*?
zbE#ul9Ir@*x-qS}VH7s7$XmVWm(~gQ>_8S`t)KT=m8%fyf{ssPDof?}_gYS%p>XiQ
z<*wOAB2mBa(s#xmi^QO?u~4EG*-d)yN4%;SfM<qo^hbxBw%wf&AiejoHWMs3RGQcp
zHWKu+T2|`;;ILP#E)LA$Cu8RZxD9W`&Iv6B$e{*NocD_lfYsxewtQIZ*==Fr?Wa(h
zE4-V~J<K3sMEoZUH2}LnJuHWFgEIKrGhZYiF!(}4j9+<imajC?LT0Hs+%T#y7R(+5
zy++JD05snJNS`#?lN^x0Gi5*cnM1q4R+mo2>1tjF4>`Zym_qDm*O+-)&Q(P@#PLkI
zCA5bgH!Z%f9`(0SbO*D0H<D<9iTwz7Ro>5;j`VY4sJfzj)=W8!m?lj;z-;gd5{rAr
z?;jYXe>DlGlW9Em&Dse|lowxb%b53J4`l4#Qv=6>73zo5Rf=i;3^7az<+fW#5WhOn
zUnjYOLC(CQc57&I`pK-<Zg-Q5guau3MofL@b5`+uL3a&NFbJ%OA*SCv7SC-A$kKM%
zeQghOgM+T;NRZw(y@=XlR!%$V$hh*nBKE`K;=51)xPp;f@EW_Z+aaL@1fX9ulkd7_
zzb{4}Adu%Ts!Ewjql5%DQhNbyL{Tut>7%VARBP?4U%6KxH{Q384;fAg!l0JI*GrPl
zJ+}!-%7t1&;Y%AOsbReK_8h>b*M2_qg^wQqA&|sRuU%AaDpQ12i)8jV!KdzXf&0=J
zTjQ78pk2GaT)O_LEW#q<W8{e|%6dI^dOp{vE0pFWM@@jyx|#*QYs^uLk_vMH_XB{b
zXHXpJcjd(!yP&!Rj80Spv|k^r${%fAod!#s&6-Q|W;*DUkByc;qG<M6qZtC$#GUMn
z-GSa~2k=-s&zh{PirRP7%-a$6iVtu25DZIV9G4AfTfCaAz~;YXNqvf;5_i{@chHFI
zaJ()NO47AMb<wC|SGlXo@?ow7pZx`Q1YO~X&I@khRfW4ljMLHglpWcvRdgC4FpbV`
z@dehY0>y^k$=m}#o6)^0u0Fn@X*V^AGBOpGwT8PI)aJ6PjXZe(2&nM!RlRHA^%&*R
zxKWABL&k#34<!J1$sy;AlzZ!MEi3V+$p#OpOtCx`8;&~QSlwue7L{XPUS_eimJDy|
zGyAt^Vjl)|1^CE{pLD8zs{4kit5^=W`_)#D=&jvX#Xmtcoy`K9gg<2Wv$G!!^dl}H
z&?uLr@q4DaeUmUYAL9I9#WBbmSH8CV>ucr^ZN{L(5=;K)8L$)}+D#1f_iWvIv#Afg
za`$Cf#$dg-<O{!ZcW{AX6Ga!|ebra_Dz3gKOvRs)GO!g(VAvten4ZLWg_2R*;_I45
z`mnvC2NJ9>wx^s1mKn4rf81w-?<{3kCSyJ7S`6Lk4C@W*km?y(^LW1YwBAg<EJj21
zi>e9GjUyexwB@-qZupT#US+g`St439&qEa!Y>AyLakhnSfQ6h5yhg5-q`E|f1K7~K
z8qrA|+AVMQl!$WP8A`cUIcj$$%j_N((qqc1w?H~CWoQ~601?BN^&x#Hq(j&>?H`{c
zY`joix3tlQcwWIhC;wLbWE*s<*q|`7OQi-KR#RSsJQzamqwkk=wvD`eRErT(FMX7B
zBn+8}d8Gv+&MvnLi+>QSCu%)e@JW=EDa_T{AI4wOe*je30Iu&{^V&I{c{8~8Ik@S$
zmGq4`oI(4yLK4AKH`>PR=bS1qj0Uku>yF)3-)Fs1%YrYKlCJP7`M*y*Hy2MJ=6wK+
z2xL$w(+&W-3RFU(*tXmo9{}@)zt_2En~twCC3u6{uo?k|K1^UyD5`0%7U0sAs%&9O
zM_N;-LoH9dEA1K-4S~b&QARF^ui~aTP>$!R9sU3~H`;FkfOpYG48X5BgxgLD{^rAg
zk2HAPA07aEzp1p=8%f(S;8{m2`x+|u1Q*JM6QbU+4nK;7efQR?HY440*anvsfoFxP
z`#&lM0wO&@894R=WJ`YMmG6&AIVR`wybqP$(BxOxpm?MBM&A*rnNikLl@r?Kz;<T!
z<me?FTd8qc|C(3o!&;sZ*m?5Mn$E{^bmps_f@>cDw$O3oQEK)s-6d_Z-WC`H;E&Z0
zJB3^_l0AbZW8><GW7)F>19!dWTyfzLsJ^gAE+@T*KBxP}@i6f~>}dxa%;Y1ZaKw_S
z<=Tt2;zwV%7%eNlafYZ^5Vf&t9BE8OgyS4zfC{aTM}3i~+q5=vZ2zu#t%EjF()^}V
zlbch@17ty+)!-D#&G=&E@fJ*Pc?I)$K6EaBAf<Es;4fRsH#^GMX4iqBjQB(!0u8dQ
zD8z~%_-}Q<lO6|AJM;XKGs?D5g9?UASn!K{=>3wu$8#P4^a(r}m{E7y2gSweKsw(C
z2HpjRO;KG3O#L{?dD}v7Xy_`Iy-6`dOZX|+KPiIq0|JFwa;;K%kBpqiCYXHP^pRb#
zBFSg2=y9X6VVXx?Xb1!fFRHD(bLz=2a1K(_d!BFkQG>PG+9WvRL*Strb`m>q3Z*~h
zR4pX+MXltg)s>4#%J+~A3QV9CE<9U#;CIhw*|N!tt-hgi^+{hF2W1capKYYw%E;im
z&=Y9>t**CtOcG;y>{@NblHMgX)!J)l*W)@2C=B(pL;4~^fp(5;P(Pdt?t2NP=*{i|
zA))Ieg&Vs^Mygh1qVK-H^kI0UBLQkirC2ZAL|)ito_odXYOXydY^QBn+cpf-!vaE{
zp3F!sT`ymUHw&;wV$dPeBgP?{mtQxMV8P}<l;j^{VNczkiRtnNjZ(d%CE?153xA{g
zo_pB&$ariPjk$u7@P`+V>u@DJo9zL?1Ha+2%i&BV8!mr8XDp|CDsj5&<j)hsOM5C{
zu`j@L{#St|3$51D9_=HPa<fVc91Lv9q|2x#&?&Dt=S?&aBG+|{d6ziS!WNgIhXIPX
zq(_F_wlX_8|9HM{OzwtbZWzZ!@3NtVB<ecrJlYDGnw<j^P8Wk!xI1Px4czJn#tf)9
zEM&Tt=+BQ%2KOcQtgg0gmc!($I7MAP;wX>-T!i5F74T%#)oEEx;QO(!^_d>j8*%n;
zA_p7Dm%2U&G}!~`Q}K=-0N*r-K55z;2gS~->BA61lvH@X@9#C6;5*x|3Q{wi813S;
z)SsjbWO1x(4a;3QcO4-;cI#C{UMwgcG$VSS_IRGkv;k9Eu2Jn?<G21vFIN(oCJbya
z*2Mv7z5XB|$R8HeYB!~fB9@W==HGlqU~F4=qvR$5Ajivxvekg9^Vql>Bq-WwC63Fi
z#801>&m|-szTYljd%C`l84laWqmfmk55z18u^lP*U)yYiP_IbRe~ZGi@&R%t0ij2;
z6kw-_HudE$+`CsETD|vucc@UN_gU5KIfd~7G%WV)b%fF+Y|$;zNntZpz}(@rj~u4J
zrfyS&g+eTL-F(=|N)jxjQ#VH@BnKNGZzJ{zj!<J~a!_?(;Qa#t?eSxS3%1_cmzbE(
z;{Pf424f|{(l?IV15tP98`f?!ynj^r<cL})iwJ8ag$VJIVSXU8&U4qu+o_3BmIC8`
z4Gu2)6gvxP1**-MP|R=8<|C98U_<ojJ1X8(wjS3?)km_wj~x98b?y?P=Wezqwx71t
zlwD9h3EUHw<L%PR=sM{ben%a`p?!o3>3nr`Zb=qC^E`L5w@M5zfr)v9o^5Pk)TM=K
zDG##ff_ncbm;+33V&^QYqOJJnZ<hbpBzvJtDB%uv^av9e_{S@Kqm%HQyZb_{GHw>c
zw`?C@KZmL6GPN#vHVN5%$|NHsozTdh@fERzWzS+#NbhgCEhp=<x0!Sio<Xf#a1fPR
zT<$G*1v+f|*3EW%vs2m=%<P_7j&GFF@`+_BQfnfSEtt8P9_wrccy~*98@n5{xRX9{
z)L$)MZt#rY8XJP}TJ&cN>Ud8u+iTgrC%ssb+~|Rk(_{*;9SD)d3`6cOk=8%)2Qir7
zS}gB>Qonv7Dad?x&<uvxD$r1srL3<-yFUOFao8&lRY<UJR*wVEk@uvc5_h^4LjLit
z{DHl<ZzN(8H~e-7obm+*r;DhoR3Eh&sPR@vv&9dmKBjsAZ1Pk}Kp{N^AMd#D>Cfk@
zszeo~2B!HeLn{(4*c1(WXU12o+N0Da)=N`x>2fBLGVR`cFrwc*RAA5loO^ud780rQ
z#WLZwQR6H;BC!4aJCoU|bOG&crpCUX(pBbD{<O{p<G1g!{P72U>m@uXzm_`qr&pHy
z#*PXK!~%^&h;NQmMg9uSR~iX53DFgblr;FYCpf*eue*6k-p7&a-%9?aL+T2@^ss-o
zI;vtxMPaTy_vEULU)L7I9Z_#x#{gxLDGR#0*_|ozYM*b5XG<t(2hm_<B86}a@Id?z
z0BfYj10Z1eoqC*Zn=7eu<UWO!UoEoA9qjS|xVT@PAAanLaa<Ihf$dMSa*C|Dc>n}|
zWo^o^(F$mE9UfXgS~-D12*e43rM<%fnEB2y40;<#9=Ape5{0g7jUcv;kQWp(zX6rq
z41q5Y9UBJ81Rq?k9suX}C?yTw9Xlg(5tr5*vW9EC7!KvGgrtk@h|tCh#_05@H%jaz
ze@GIV@XiasS72Dx%C7+jKRbfSLcBX9F*~LYuqw%z(7V2@NFRe>j5er1j-t4nx5OQk
z<c1DA;h%lW@2hutW_fih(sRE#W`4+o2tT8za>=s?Jpd$b-+!2pYUrz|97+DvUiCaq
zIn_wkm+4@>X;UsJ7LFcnK0kRTW`8RGHY!#HPeLhpCtUmJt$L-xI=5ji?MzKX?~Xm~
zy^^N8X&K{CgKxaZJyO!CH%46($Dk~op-XSaXGyeA&5R{eqAnyhUaBobmPH|e{8-kj
zp+<}K$H4J|47K&bh(1ESND=kL{GfqkP;Hm<ND~Ze3pUe<6EkQBsG{T(skVlp;fgYv
z(C3BWehD1|!3`J6`P0*%<J25A7h>~^a%^l)X5cob^7?LZKCu2J60hG5Gn%S|=XM3{
z@pY<9jQLtH5oHs|$W~B?@!$^f{O+f_(U>W<)U3*8^ku;c)=j(n3kjfSr^?HkNvGMw
z_A&93oX40ZSLz2)il*i9y~`bsnM)qC&^|tYN?s4wK{@PH6OreZHh0c=2b0FTO#CC`
zwskRBh2)A~grGY%!yPYSR3|Iw!19Xt@02mV4YO}@CgEfv88&-FLj_0(0!r4-Z%>So
z3Q&^KrB@$K7-En-HPj)OgZ;kOPP%YGHF0j6U-sMDq|_{^&dL<C>Xkt@akO`s;DvSf
z=(#t`CG%utIMX=hfVVNAf259JS+c;A?Ft1RoBUbh86ME$n~AGr`#he+30vyx-cuMb
ziunkd7}mnvM5kJmW<BTDafma{y5+f!soWC+E5Z#XhP!X+3qYee=DsBxG?dSM1_eM*
zRSGJ+-QPKksct81F*f8?31f}rgt|%CZmnFi)nwl@p`I6VeE2%WMV(yra@BaT9fLP6
zFp>aws~dh1)N19eoE^wVCcwq!)<6t`3}q!_^Y|OW4i2$yy|-@QZe-C+yfBai)JoD1
z9I|mdfSzLqvl26cYLq7o04Dk?P8(umEfO>E?vANA`!Rv80<8~LGo2VuGd<asha3~$
zxX~x3Y?}F(`=xP3AVyd)h`eUN3Kw>OJb%;k-j}@sxIWZx2n&{7>uW!%1B7>if?AtZ
zc0RWA6AIPaz*1=SieAhC3papGD3TxAC8MA&MGTY0!C`_ENUEVSl2i+Rbn@rvY~eJW
z!_|zA+PYcrc?_o7i=dKcK!^`=Ut$|s^wYcEj2-P*X_Y4r8G%j3R(-1A5cc~eN{)+<
zFCDC{W{XU7NTO1(4lWJj>kv(&(l^_X!s+jui3(;Wb7=?M-R%vTXU)<ENkA~_w%D-w
z(gFnhV&H9;EXbeAnSO=Q8Ey~|JW$geOb|kv@pA5fBEODn$SFXytk6E;+g}S7o%%(Q
zC>&$Ym5XM|X{lE-5zh7-L_T(qGSe5EwWP$-Y9LoN_$B;)NV~krozC^;j9(_}Hw%4S
zBxA-*TcjV76cI9Wi1uOM3wJN+xoSPn*2*stw;%OMC(0kwHBvu)LEH<_>N#Ap+8*zj
z{`1phs}|J^)+b#WNo}Oy$U0jUcgaG;Z2wx2FJ-<c4l~|aQ`%aUThZDscDZ5fcmT-Y
zd+I1R4L@SERhCX-lJz}q14dlk?7~Xw=8!_P@;oJpl_!-^EU<M{`(a=bbNEkc@TIpA
z%8JE1(HEPN#aG9`wJjWg9n^$3t`j|3^;IQo#d9g0%1(BzZNHkvrNvkLSi1oH*R$>D
zIPtvVduG#2?v-TXhxI8s>vxksxQC~Y-taHR5!!6>v_7fTi*vZmz3kkZ$?*e=&bKaa
zQsD8{IzT<9v&!<CdFMQ4sn34@Zt5s~mBv5bN1AD*+j%MI)Ka_RjZ0%_E6n$C+OO(l
zz9ua;y);i*F>&d2A-(~MAqDegRd=SE8t?su9m$<n2*B*k3EOr@@ty6hN-_7EjP-f{
zA&JO!M8EAjq0U+_O|u_D88Ry0l8T3Us4HySCe}Ah=GTi7g$E5CJ+pcXio1R=2^#V&
zIplIG9Hu;_W{-F7BJF`S!$U5&r*@We@9E?8T`P!$t0Nnt9Zn_qS`Q5{4mc~vE1`R~
zC#ln6p}AUe^!+P-+?9D41Cad0CGG|_5fK%70n&kB3%^7K4-)tQ3^A_f_u5DwoDxl+
z7_J}|#gG>}&&Gr&bj6|uhJ6VxJI9)q5led6ZjcCJusKo_Bf9^Bv%;8Zl8p(&xV(ma
zemOcZIy}Pe8m?MusvbpDx6Y`FExxyPxr;lkK8dp!PTySW3lF-#SIJN6N0}dIT0GWm
z9L($A8)RN8wZzW$&u@z!fc5tjyV5N@>5Ve9c@m&-LR_j63Yc#XJh*bL7o*{yAWOJD
z!&WwF#FWr^mAM+aO`meUXS9*&{L|UKg=f_4`EVb5`B2A7Q;%PELKM;uR>{O2kQ*SH
z(LiMvt|nQf?>PU4|IvJk@6sUb;AHlnK(xnJks)K4#v_*Ddx`p`MNiL=-zmS1QOG?j
za1)4XJ*ThnT2A{`y!Ji$Zv|;3_7~wij|4nPpOcYP=a8|xCC9SAN7_QUNhb4yI^kRu
zzwEB4v~S<4@-VL`IAa#VYCLXxwndCIX!mhA2S(T$$Hg4uXLzZy1)Y+UhL&F4DE9_>
zYn>NUslq60el?7-kC8esC?s#Wtsk_%9$XB9!jRHW#x=O#2$k(tDY5!YSD(3U-eoAJ
zd<KzL9AI_vb)W#=(A85y%^_!n1ohR#HAiNwHug5EjNxwuFQ9CvK3Cp23PiGWB%kAl
zFW_4);fnYNq1AYu7ZT@ZvtA#48Aeza)A9}_)_a8;4jNf(Bu=)F7mxFbe1$<eB@->-
z*ur5jg<fiQj0b?u={4-~0Z<pgwZmUJ{PJuz9Q1C$V*^Rp>2b*5?-Du}vNLU9k+@4B
zbOa%>;&o~-Zm^39U8c2*Ne0}&g1W(sZjDii^>O%yR(^wb6;x59d4)ixM=%dr6zTW=
zuTNB!oXBxpWzF<-+bfyVT!u^r&128%Az%mg9I-#5KBiW>T`qlUSgYZOkLIZ90}t+&
z^m+Gaey9(RQ8q+x5;h166FVDVN-P;1Umt5HR*pH;#PEJk@hMG_NLXP}b(!|JoV$Vb
zuU}~9CV5*fyb8ff0qrXf`(Lk>0MVY=?5F7Y;bqZ`x4dx<x2C<GKcNduO}D3Co9$o`
z7m(0@fRb9aEG@{L=(XmFv9eEL)66%Ba&@baE@68kAL#6JN@5;dwdU?s=b<7<z!4u@
zJ`LuB&6(V|-1*Zpwbx5k=l6NiJ(Ftz0Q_8OUs!f*g}_1*zm(pU*!E^$A5Lb%f(8-D
zP#+l5JC-4ZO|)PFb`V}R0z4ze8U&FfgQ4RpG8i_rkpc?@DvbTZBy5-P5T|RruUFb)
z4z(#Cu;Ta^V~@K7i7n~N`B3j$<!7i272Q*suNbeJwtm9`laLo5(u(zY^TYJmW!1PP
zKoQMW)g7C2t{AZHZOiQQNe!V^?rQT*J7T;Y<vBdIn+{poyZ3F|A;0=w=JJ%H17JU%
z5y^y{jg3y$twFYaxXb;LL6FlO1%nb+)>O*Ez_R=#gw&c|5RmTo*OhBE7AyG2YEJiC
zO;B=Xlpg<dbUNQNaXN8rmKUdlA$Ts+-u6>hWr^Z(rp>~xl0u;4_2)?6Yj7t#=+0~I
zJ<W&VHw^}7e$Wh88)2RK3WlJ5p%KD}k4UZIt&d6jOAueJx2iR(PH*TOs5_!0YihzV
z5(f<_a27Frk{Oed3S<$JANE`;b7w!NdP=zdh_UlN$WOazseAxrD7c_kpUoYsDKvol
zsoutQdW?n4br};X_GVqyFvb=-=e|96^GQ}X{tWB!lXrv0;iqq`N|QK&0CB_$Wb!C$
z2A`|$q?os%irDI8C1GpprU})`V7J4g!~r#3=A!kh!fdDt`gC-12P3X*zFW$d@hK@M
zOfT?B$~wc-ZRQUSECYgr^Z4*89PQ(ULBVz&mGvoc4c9s(6ixdB?odsioO_bsOaAr=
zkv@lMgMf>)HYw5S^2)k|__0NAC9EOgfLFmk3Gc~8tLZ!^g1|nSU}}1n%2U&iwD|AL
z#At+gaq6b++mcer=ll_gkL4ZNb)RHJJRO#g+N_MRR*7;@uEkWQu+QzJ!k{JIN-tkk
zZ$?S`3&5(BOjmyJ8r%PjyB&=e7Cm<n{QTq85dI@Wb$|TzfG%ftx09Y`e3j=X3k(?%
zi^RP}3lctD{zDFQ^v;%)^*FzO?qDe@if(N{fkPej&UIpGV%9!CQ`O#-NQ76U*Q~jY
zrce92=m8Mjq;MQ?-90Pym;$HbC+{?6)%%F;d;OiuinNK_G=cYTdQY|g`ceh#x=%?_
z5egl@?7Y|e`RomzsHokea3Kbl^#Ty!GmZ`%#MpVd^Wp+G{cnRlZJ7w&D5avBeL4<^
z3lP8#bGf{Mhd^IQr@vFzc`m2}OT-D|_r;LFLS618-tlb+3e><fH;Sw62L+#e<PA$6
zYrf0^J8(ka=q+&2G~y?yKVMSp<tMTr=^q559N{DQb73&sj(vRi_$9pN={>UySxcE%
zqWmS!mqHrX>C;PEby4EUBUmakSjaz+?Bsrl-}k$ZYb%e2!HaH_DxU|yE?`-l(C3B_
z?`T>zs*nEJrM2UXs)Kw9k7rO5p$bYHhy|NSQ=LonMlqMM`ipR-m|VNWu(QlJ!EeuP
zCY$M6;(xxs{Nt7dqq_#EtHNBy2gr~&d)~oe!IW-v5gG;^bLV~*?ar$PG>H-zCPR-|
zh(F|cBHd8XZMMCu6%F<A{BQ5SlK<q*eP!mhP^#&F<^SbUUgDRqM*YcFP_!2MbggZI
z_ixFq*(V{Z&A4iPpS4_WYV{Fss(=bJO1wzxY%FFREGGJUsKt11NbL<T?x(gmN6s%9
z+-HYvVv1geKfE&NekH!0^e{x$>lykH|DaMi!J^xJoFUxgY*yo8CD~qZ-3!A$2H^N^
zthjXELOH9bzs~x>D64&FWpf=%5Euzx6Ji{%Oj$WyvZ=tqKOO+(KMX$^eI8t@VU996
z<mofw=OK%fXpvK>V<*|!y1d?*KNp!PzetpTK9y^`C`puD)uH{e8A?2F+af;<%fAyo
zpIW!bw0%M@Y1cS#YT9JUz;QOxEWx*O=k@8`!i>A5nu!W16quA8rc=T5WNrD0#|;=L
ztzs~2tn`KabpX!e#`ZAaK4yii1SjJ5-~r%4EmNt%&TUq$UNva^tk{Nl5sz$ziftSo
zT+8#6oRe%cLkHR%naS3I-Zh(Njdz`)*U+aYQ*RHaEf-D>zPrB43tJbOi$DxAJ{Al9
zFz{w~F17C&O9QJ+l~6Zby^R{~3%pH|RNQ624rum?^zx@%lfz$UFKA${A`O+4Fdu@p
zmc@gsIj<+9yLZiCO+8P#%8aEX_nhY$kF=1?@Q{Q%l7er+g{EVh4&U@N91x~%gK@s5
z8l<J57p~fuXV9IK&R0}_f+KA`y;uX+G4V|er@N{+$tyb!p!2AKUl(2>-HX(88W+69
z(>9~nk<}%Qr2Z<<4`7$hkn<ZW8Mbr&L`+B9)MuM(ai8pBaJ_X~2<7ifP-@UriL<Mn
zjeeWY-4i+&@s2k0CKN|-_h!wXH3$fB6OACKJpgKNZk{Hr#XVbB-1^QtT5NCLjNYvi
zbRbScv@I;F@4NO*o0y&-wOrOi;I=3=SoskCaRX8(w6BHc@+mSo)c@WRR?S@1$=Vil
zwZ75O9q4f!QU<FM&lzH{kYgd&=(Z!qSd!_2FypPboSjU$6gMvQ89BWm9^M%|RB!5y
zf_I~#n5JbZPNmFO3{5#nt_oLLvX3sWPY{qowvbLe!>zhf`~rD)E9r!OWJ4x%k}Mn*
za^R}AH2Lgi@aQpjU-RPW29%2Pg6LiawpH`_p6=N1rrvNFW%wo!x=2_YX?M`P?#s}A
zhXysFN^tY@Onfh7ZpG`ynPg|o{TG_u8_lqR@kLapu^HAyo)ThzSK}%7$xSf7E-9ve
zpdhk>6LtTdzz+4?WLwRKQg5kdvA^TEG=>8MWsBJN+3CM~<`g;UocV+=w}ms#F4e)t
zGAK-ERduya7JQ?+XS2`=O6YaMH{+jqlPddK<;jqWC2uqYTO2XaE%Q!eCc*7wcmx}+
zzZhqdg0Z>H0ZN0Ok**)0z-M?nW7-^#3E9)!xoiiY6Rn4JjID=ir*)oQ?+#_pH2m<5
znSYA4+UvXtp+6*uy5<N*n<p>bU;e&E!e()7u>Iqq5l^d|oo(mNX$X2rJ(yPgd*E70
z?+bH$LpB^I^E-tT8cWnDKwk0yFg%rp%-Z=s^EmxWrRL|5%|EuUv=|_d%VvL>loT-~
zw=*kSHKL|L0>YORB8PP5w;!Pe0;g)S-dldDGRmU~o^cWuWiMz`GpCPC>lvk`rd2_K
zn3>$63#tQrS!v$eQ&XECNj{$&$?(k%4!k1N7_oUp@)o9mCzh#_<D1ztB!GsBS6LhC
zT)9V0er@d8pZu<FPaOSX7~@Q(%8XAfGh^7C-vsl%JeV25SK#pBDi0y85E<!V!CdS!
zZn@CbWRH8hQKJ>!n`*mYZ{7r&(d<CITyqr*{HfHLPR*^fkVb$09Vy8W_-Al(_B{pp
zM%8x*C85E|t`n$(;<LNfss})f<?1X+pXDm{+t=nKH5UTMQ!)pJ35b_N0em|SpCiSq
z`_|-tP0shvhL;j1*dLBi0=o;oK%proec$#~B43yKRO%>FvDU3z4!exB#)Zf;-fM@4
z986+Wop}}Cw!NO%5LUPKFuow~YJ%-+tsI<9_N5Q0rF8xhiz%jlBuPF3C8bk9lHvoQ
z7uyM?ZbIEQrE{+1RIO<$HjgKUV16#enC!=L_{SmN?%UM=ShT|i*WAyATc`u`{Zb#d
zZ(M1>6wM3AQ(qWi6D7&U7@xZWrynWHSN5OwORAe2whXrsGn1``z-&9w!7K2Kl+j|R
zMtYxNluuF|$;g_WjU=@e_*(S=FjaKlcE{n=CgdxW%(j%0jUr1>Q1(k}cMcRjMKp~e
z?tXGg9x0wK&@eNbm|KH{4vQ=n%d9lQ4q*0J?z0Z50(Z9Le~3L4{0Tpy_ibhLpM6f&
zzp<RHB);~$*i|`JiJn>bi>FN!+J8jp<-U{I=w!G%8D_7>Z=PxyvqsiOeo((zyQqwE
zVZ+%`DSQ8R%z1dbn!ob5LEoh@xSXAq(BRKyWEQV+pE`&-#rbqu1FNmQ|M`{Wq^{JF
z$=Z&JI<M@fJQ_2Jv~Aq%{sFV^dPRH|KeM^3{@Q<NF2XK1TD*BYTL$hsn*?il`+ix_
zKf#EP<K>*(1xXnj>{JcMa>Sn^HVRUW9YavIrthLsadT{Vjy9v?JWhHF2Tuo0Qz_V`
z`87ym8C!nYWMv+Xox%>#PGtiDHHH3|Eq)nf&3JAx{U%sZ1UFF_gRL+j8h$8cB<5N*
zIFjtoH(EdFkn-9<YvYZ_vVn6@we+F1vkLSJ@Qc|4faAj~DF^L7T_)@Md2I6Zgx$6Z
zW$R{w9Txb4gp3P~_PaL9qNoO~vjNs(z9{zL#q`0BS+?k~6-cLHf_t7uczbGPQKLvb
zZ=Cwkdct@i82q=K@P%4UAGYxK1P8rJal91vL*=1;rlYHSi^S|B)i=X4??f_mU{5A;
zmE3}RRXdx{w%oL#mAG~1G=_bIS#>Ac)--3u2<u7JL0hO(gu4%Y6Eajf0J<#_^8iS4
zqs=H~Y)=yqqAhZu#mY>$E*xd1V6o8X=mG+5r-T%+e1+7dlV)E!s|F&nD<dC8O0XlE
zldPm40C^7p8u=rg^Udp511#8%`=umoT>%p2#Wo!@ArV_OulM%tvg1EwRl`pwhvmOf
z^H=vKs&4mkpD-WP!62O`w^*icygFVndQ|I6dzTV!IXnPL%r)HgNH}%y@%m9NTN4j}
zUZa9WqpTARZ*rURZ<I}#LiVuaM_PMl4}h%mow?rIwlb0QR4LyghBXX%VVafRaGS;b
zwr3}u=*3WR-}Q@9g@t-1f;`INMe~9Y;KU7f_$0Ik&6{O%n=}*4^`|a%9-sGZPqe#<
zNQlMD=jHH&mXk@)E=_p@JahAsYDVwHVzVranj<TWL!50ZC=s22)A7+F?<JN%mz8#u
zp2|IwES&j*%%Me@d*!gqO}aw5J=yo>1e$ceq#|muqG}#J*XZn*?6U|wEd<M`8R%1R
zy^!OtF02S;=>mj5LWbPQke$$84Lo^a^Nq3!5yG#ML{G85j!xhkIho*(g5sj5u^<;0
z-98NWxYr$2$egruZxNS0X31D$XiAB>i#!ZuY$lhG#a|>rJ^-|0vq2dL<FB7ByXrRV
zo1{wK*Cd5Xzz$BO=B`SZzRdR8R2rr^zSND$>6Tq9QZ-<A4aBNLUa;)Fsibc3%PA^m
zgSs+-cz^+dHh4)mNppAJWe3_h!y&yAc7p>#TWlIY4(56re_Zfi>pC!Ag;*!{d6;?L
zoCI-q5@`t+*iAw<7<Tdgym8t@bl0+X?E5>08ZnwQ8!B^XK{MZm52*D36;fsl)cfgD
zmNogM!8C36t?JLz+HzWhoAs{z0ras}2vCNdF52)@e!8E9a<EDZGWc5-2YR`i@fX2)
zJlaS#ue{<)0;C5dT+WdI|6c71&$iVL4;}ltdkpx$u&tx_gJ&Xd&JOPLNltT<m!!!S
zv6TsogYi6kuoIAWZV==JO{`skw^~M|gX4{m1BmeooesohTZg(wF$$GZKO#>XV&Eq$
zHXkyH+c)rBJiH5A40LV_(bN3(?z(QCPJTKPgFv)4p+$ex!}|0Qz#qLYcnkbMV$QuK
zMxFIXXTSmVQ!~3bdLZ36n>~jX5gja)#y;HM9tnhhQ<us(a2kGYE@K`q?(T++zl#I)
zkQ=pNRWVL1t-+VBaG%acZO-o`YBtQ>Ab!oK5}tdskA3CS<h0fMM(glhv?hzeW~|hn
zcy1`KMY#VcG1CdlaJ#Kc*vwO^ZYZ^XL<&!BZFrnmf<~NsR8QHaZa%NNew<+Na{-nI
zzKZ^##W#OO$*(N6v*!hsl`rAOqU8Gb>=#u7Pfymj)odGV(+xYjczj+4I(&?H6xaG=
zIqY5T_05j%Z*5zlrLS|y6U(rd1oJq4Z))bkjTAl(&)UVSF51vg(iRaTJ5!QQI_?<k
zI*H&hbM^OyI>X&Ng_R~CDWt|yJ@$zcN+djV!0fQ_N3!&&@6I~;g7ix>HJ%|0WvoIA
z&kKu;P9KnGk7&bR0*&gE>@8Ndg=NQ<V7-ImwrgUEE8_5I95JpEa+>J0XK)+e7cggE
z**_S_LPpb*g#upn!P2%P%D`n&jQ-wZ3yY)2o#$szBN>rhBV(GZfkjFVvk~Dw$9;?_
z>UqX1rY8W{%?TQXT$Xu0oif#JIHbVp!#5qJ^JzS-i--FIs1uqE4(9t4J>A*+-6HR8
zwZO^S(;<B>wgHOXzF`1O1QV!Wc|F{Bd;CiX#jS|`a+O&?vQfR1Xz&hM{73G@ss6{n
z7J4*rx9DA1ez?*IrTetqLQ<P!U1gw1a5(<m^+P8*@B8e&HsJvf9&(JB>5O57h|Q7D
zQYa!CCSK3bbvUw%*0}@hUK@&~vePuAHK9uMVchYj>2g~${9)&R)o&riYv)%}qYQq$
zxLifC3*j$r7?Pvf6rXkm?xS7hQ^5ClZAxtq{+=&B0vhy>N!FuPX&K4zNmXD5>URNo
zo+%--B-@wI&Sr9Do@KJ+Z(<OPZu?r|OX}bJlB7KJIcWiToL1c}y#w7TiF(sphL86K
zdZON${qZ-)+jSOFA@L^QYg^D-6Jf0vem*>neONiZaw_*Xl5m!Wi*aZxR}yP6qk5du
zt`Rru4Dsx+!&cr0zJAbYkXMe_RdJac$4D;>M}yfMjGaaZ@3VEt<V~esFFW^s(fYF*
zPH0^I0Q(}c8)gNA)NMnrLwZrJ>hDs2o#ONIx~oxd`}YrwvWs{7pJJmkJzSgAK9n+$
z4lvgK(!|$o>i$J#bf_|xP6NsSPNB~=fZ(OJ6s^hA@Ur?Q2E+U?c;bq)uRVPjN~+oE
zErk*<c@F_^t!{P$a$p=&{KiTSY(t{O^kMqEO3tX(N)D5F@n!0FN6`2^I=33gwl&b%
zwUehQ1+7q#p^KH)s~rqWHC%YnRtEy+UjiYUNa85TZyG13FE8zTBa<^(-+VABmN>0S
zfW_juulQSTSX5GdcLhHd0X>?PupnoTPZ)GI4fvq4NYYt&dfxJW{Ogy7UxD7qHiT{Z
zSk17GyYwtJ_fD3}e~yFYevFq)i+Bbb`ltHlZ@l^FZIbT6V5{HRE(9rT^FKMUNSdrX
z=gDU0os?pF^%aKneH0ql)Z-C6_5jG|4#H$@?V}pUxWTcETBka`N24Jymw^uuMv?oJ
z5Sl9HVjtgHV{0r44n6!O%e3Gx;`8lkTUR?<lSdxax}_hTwFlS7ddVMQn55$Eg0FV#
zznziy5Qde-_WvGxibf2cIl#kC>aK`7R^O}0D9ldCEeWLir63$r1>-UF<fI8usGi>s
zfcxNNlTz0gn0`1MUFbxxbG8Kq?t`=2)>RL7yI^jrxx-ieTkBEm0)sABsM}iAYWBIZ
z9bV+MOOI=NlO=&PQ>SQ&lz^wVc`S27=Hn|__~VfQBxAHG`Rg^(>UJ$SOA-fgS5|@n
zy7xm^I3aGNX+w2BmL8SOdVoadr?73mj?!SsjB9xLyJ%ubl7~qa!<9Lp^9&1l@uN^J
zNV3<#-Mio%m)=tuQ>zlYYGn13ACAxCV`(qtp&j1Lba%Snr-qwVjM;zFgy=u$i7yI1
z0RB7x7KXG5gehO9frHEqUrQvk2qc6t1qLtiT2ybVf9<_A>$H0&%-`ia@X@s=Y_2R+
z79A%+Ki~;5_qCOHl2&wdVGal0CwGoC%Ss@TttDS8RfM_2mUzW|I1Cb95FvHzD#*lE
zI{YIQVbm)!woJh+BYD8}aWJe2-m%ykkB0O%vE4FKer3W!JRySs6%qhCW`$kt5*(OT
zu~#@qvLuH_r{SK8;~|OSlJ<kQJ{Nqk=xeH{?3i^(NZgkW_wk;)FPi0-WwUj#=lH~A
zq5XkApqV7q*!EZ)tE$G?FgpcXH;v6%^%@XdG+NjJ3xPiXh}TC|q34Sl!XtFJAA4~1
z#PtXH7ggy4(K%Pa^`4Z|s+ZbaiwUvHb1c`3o-TP-Aub*FQghMQnkqTt!D#bIo`iy~
zb8{O4Yn2MqR9OaW`OKV(66K9X*jQ|$lbG|WJQ&~4U$Ak!B~nOvnQWu1+!wRo*uQk1
z>y&9|-AlJA>$oA#wm3F&;a#{>;^My*{xcZm)nM`T9mRysSHl$hsND#lk2qOF_(^LW
zVX0rm&S0MrC&RpF*dV-_T|f9`?JEXBN&Kf^bx$$d5?&>{?=_UZ$m&m}$uChwY!?<m
z)@GSX=bN9q4JWRgv-wuNt($8;mZiGvtvSDb{YZ6n@-obf`stTcQ6aN9r-`Ck6V>-2
zyGG)fU(2YTA54r4qy<ucnKT6nRVSNM<o+!Z8XRq+=6T%HT#9%*#Z%V$JV|o^p?{40
z8bS|`e0~3EOgE1s_m2Yb+xbxcOz5r#{zub|QOE74DgMd=e?L#}4@+oh(luYN7l^0b
z@uLi;p&nwp2<I%{pXtA!IJPU7zCCMck9u@<^~?Eh<TLp1(e~?$TFC289E;+e7%>Z*
zL_ChCY&af2-U_KG%e@gG`lxSPJ*Kf}p~gC>NUoXwf!a942SxZ(`cA+yZfv(j+Hlar
zkGDa0<_G$#k|&!z%cKG)yQ9bZdOunM#M#U**ApvC2e&(S2UZ=H!{oRsC=tx383*0W
zYFzRo*haRAO1QMNY<dO<4TCjTyIIC&rc-|JpF=$fY^%~U{OW{y9^qeLp^vtif&n|0
z7gr(yx|)%W<|S0CO{bc45imtVX*6KLKQa((34sjAy~Li68;=^c@enU=BQB4oIA2p<
zG5+w%U45v<8b4)tXodR((P@zz(!+L)vunJ@WAgVxWagv4jL>#ebpG<ip0|s^(7VN&
zPpNjhE26vVpBd}}4V<C-{JT+hvG|?`7rK$%PM?TbqcSGMcCF46o|$nsj~a?qm|4@e
zYVf8QS$c(a#jCN{tXEt!QV3p`<$U%uTy7nv(sXMW;_Z0;-L|C-XU$(#nhVE==V1GT
ze8jY|nxb_C)J=8EaaP-xQuE4~e;|!q3%;@2lhilFyJ%=i%APt*)m+a1Jc29v5SFr<
zlxlo_Fx!e=tVwX*TmE{prksh%EvTI{;Aqa9<Wucp_Sz6-!8xqK_EOcI!a%+~_Kn-3
zbVBQi;jV8<4uf4&*3NucjhU=C6^L~rvBJB6=<+Ff<!7Q!<ayZEL=`NzN`7Jl$OmNz
zit80zSMJ5Z3K}m6rM)wl*C3sZ>&n>5PYX|do<rM0iX@7HUouJrr#h)Wk24rErtf0I
z2a}Kv292(St73F$!3t-l<fsy_ziHtw2b^l`+Z3hvE()gPB`$u#>~sLj(4BLZkULZ<
zd&oao%ZTjM=ik&@j75wjjf>QS0t=ChQ<HaES!La{+G3ki>Qpl(bqnm5;e$O}M;*(u
z64V=eRx1q+!xJMg*O!K|@yZPQJG6@+Vo+63hwRex`<S=$)q05$V2_3h)vswye)6V%
zp&eje7gQKzXG)CM@a+3~)w~G<&j7suV+>ZRpXaTQ0AfA!^1S1nIh(l5rbsVrpbPgv
zl`_M&C)T=+G4$q?M4hB!dRM8nRMe_!P>tDoJT)Pm5lXbt1owOZ6l(am&T>Z*aV!4Z
zQTSj%XlRbRF7j-#Dq`he3j+I$`(s8|k=-$SB7Ja}1t0(<c%2CwZ*~TUAkRgnrd|!2
zsjcDL@DMAoN7h%a9G+xe)7RO6i~W%2Yq!2}x^o&rL;Kvjrc_Y*;xrO^&NLLNy=f;Z
zv@fZ0e{r3GHq3+IDIzG;VeT%t#~P~fw<5O~$mlfiR<TaPVj7f1|7<?5%Ls6Kjp__e
zY43F@7v5tCSK|^Z#rpL2)2oz~uA9q{?BL$~R-?7_Vh?5Z+Lc4;KvWkBj5Y9j873X`
zndt7m<`S9Go7ViI3xc*)ch*OQjU=@wFazecUjl4<hDz+IjkOhaft{|&OD9b#_sP4Q
z`l3HKD}+_^#FHMeo_Hfxh<>GNAt^fZV?Y;49N%LzUI!?{%lcs+^f;WJCrxms_qkKI
znSo4ZQCc($>W#S1@)FOTja$eTQGW`+^%hM>+rV$PHWvg`Jz@6GONL>wRA^D~xfRG6
zv>?A~)}0Q@XkGyWd|^{-1egFM*gR3~A=*e~lOH%%>0Jro`_Zv${y1=Npoa?l;2wn`
zWB7iya){9SPrFvQCEwFse0#l~E<RMJ<$xLDg1mGa?Nc%rf`e)7?UES>=OR|_za7i~
zsfrs-u|-s{ax%1rdLSvSsraC#`$xb|mb)d@nWEHRW54c{@Z7@XQzbam7R4U`qk$=B
zGpFr8_>C1!ZalS?@2GsN7E#qNj<x6RoTWa$eQiI)+%Uzg94Gt&o?6Hd4RlBPfk6k8
ze{*A*jJ~nC^9;>vb&2=ZGiZ;)laE7S6*ZE&w8cqHwd~d-)+@yT`6|K7eIH=`BPmv+
zGD;tw0gA>COR-p<BmGE72LjOzA40V@X__)eNz$jM4$H-V)@41#t}29vt=un4a8`e!
zP95EgKhTu?dM&d9i=1pYtD$QnUY~TMuiAeVdx{Snfm%d@f5DE6!KKSTx5w<oOsY-F
zw1(dO9Z|l3aH!noR1J134*nToZKycJk)EJ6oOT_V9rf;Sxe(nO-h5Ww<MFHNB+k@9
zSjQ(gpw2E)_2wD3x*ud?+p;LgYF6s)_~hi&#p!vO+=ZZB!2#w34+g7(LloB#2OBJh
zyy7xEVM;;&Z#!=$mrD6}EsaN)wgO@;%lCNH=da>k#c$R<8J<|q;no;x%P7fi^yO8w
zv5^0<EBiI`hg9~U?a*63p|@n8rrw9+?`}di+%iZ1^x1sX2X@!0f5eHKl4Ez`o{Li5
zNbqB$fR>mkf2&Bf602F5QC9ycEkooU96UOonOKp_$nkAQQDz~DLY!Mjn@hlzH&OPU
z$U^;+Ys@-oS%#>P<g1W`ps8BbT%-D@=RA^JW%>%zO%C<4;?uf~WE>iX9LwxRDZV0r
z{ZFqdwL-LZ#xS9$e-KqYOs?x|APpQV`+|cQQ^zqf`s!4Z+EYad?j4fbN88qgcSN@`
zHP+$%y7lfTS6SXqo9ZdXqzE|Y8zF}~!S;7uni)T>2KQR{%*oP;S_$SK07T<GzZHba
z>o2nC8RN&Ope$d$Dn#r0{1`DTrVcYq?P=7OX6y3Tk8T@I=%fU$+JC*+1U(MXlEL3n
z7h}$%^^qFlmmkZFpy$`~UWroUh?B5aw2&D@qR*T5bigr%$8Pl>M;810B9y<CtPR@4
z|LLO|5{n%&DI?CWuvimJ&46D=Ux#MJzf921YjcwhrM|l6K^)%#*<|(u=L1u1J^hd#
zzv%babLs@R?W}Cn-j95$H<iP7Wg)5~mO4bohlR(cQ{x#yzI#oaWEoSr9xFo&Q{rnm
ziC^mK*UPm(>OTQ0nL76=FM3kj43SX3z_Up5oUn0oM|d@5_s;h0w9AU;zoo2?Wi<1y
z|4}(WWu9Tc_qI*3h81ApL@RCt27W(~LS0jP8kyRW4N)oOj)MB0qAK1b(p)+#xG6wc
zZZnL((hx-??UJi+*eU>@ERxtnt_7zTao4M2F~+4F0C9qrH0=2KT)EgwH`gb)C0zbQ
zU3O;bynaRxX<-V1m0x?*OKYRyr4+8<(E#<kc~xFi13t`##slv=I&nVp0nnEzDPkF=
zZKO9znZ!~pBhl9{4lt=8LSDWs-}2T7cL~#NZS85uzY#nQxQ?C3C~AS!n^X<xIV!}A
zVB^!vk*0_HC6l1i5L-1Yw>c#ZbNdN58<j)kS2@`c?AZCuH=2X&81>g^%o6Cf<I;zi
z7baiK`I1mvS7eLjFYV!T5O%7{9@L(2E4wPjDD7eP*s+hCb1uL?JIq_>5!{E<3Kx-7
z$Y?Iw)M<(EZXi>;AflL!aA2DJWhkAE!xH^H(7SGlY`1{pL~yvzW^EUro=FeL>22GZ
zA3eYrt@pm{?`to=zuy=ga$h7+sM@PSx&0#=!i!om=p&j2N^NC!`P%F{rwqcidA`3@
zolg>3<W@hm-a@te?S72DK$z|jPUC)+e^bO5{Zxp>av1JU_Ito@oa5{^?Ea?<><`$4
z=9R6Z-RjT?tBERG!{}X%H&*r=5b}m73JsOh3~1v99pC0@xBdQZo53SbyM^CI<yn7O
zXmbtvHYD?Q<nNxKRURvc;lV&loJkY6FMDuZZ`9IX8(F->WFqZ!$F-Qh3o4G<iI71c
zXEtzrrI3E#-;293LUNwUr9V}J#lc~Xvid^{#Tk^9B)^3OqUb{ZMOh@cmn<^FxH;Z<
z2v{1*ghh0P9g_EzUehp>X`bI^g)Es1LvFDiHUuK=bg$nv*0tM02Z{d=00lw%z8p@`
zSd0|p%vGfG$ozTO{{ZFEpRLz9l#bKH^RmRnj74KW@sL9vBonYFkB!fdk8fB$o@pN8
zj#~i82^{0G`Hvf^ZbzG+f&F*i$DXy~h^nQL)yYw?P}_h2@=o65;(YvmuCXNoDCK6A
z4{|WOF663!%s?Cd!~^HY`nrk|j@ezvZsy@wDN?<@AU4P6&;7?-myDpP`;+Cqc#bD+
zzCrQedFzakM25#D-ZgF_P1kM4>>qMIIg&bsK#{Wu{{S#|7;JXjf_DROe{u3RAJ?uh
zM<k%Ddv@eWkysPAjyt#PPxR{Zpfya|Pf=C4vQHESRElD(cH42Zpo8xIUB~q6Xg;2L
z+w_lFTPYi*i1ubOBXMHB(yD){{{YXTi1ju*w<nPrMI2d{PC%~1^pH2)9whEMi^%E-
za#W;gEH`nLi!S_tX%57fZy|PHj)2;u8A`;^%_=0R7*0h)77Xq~6(sy^#2x&T(T~&J
z4@)A6Vlc3y1tC=jE+A|PP<^{H?hlWiTlE#{_oQEyHbyuHG~&x54hhF%M<d5=hU0Eu
zJi1bDu)?E2#483VAaKjX@*sKm+jHml>-2NMZ9?}WvVgZF*+5nZ5RHQFtGOF*@z6We
zTopJPOEE73NMW&0)JqjoK=&PkY&?#scze=TQy`8Y>UTiRxPBwTjn9^1HUsQ>w?~>u
z7BHMq<B&7^wg?oKZezyF$hN@lN$Sr|<d(up2PoB?NUB?(0#79i<96UT`Ph7P?EVhv
z)yI0tftK>Z^e(V~$om*Mvu_)2H|_7a+;9G!y7d$2_Lwtj%x(_=W{Tax6+1R*L?&v~
z?!0Pp!9#vz6^w>Om47lWOSiS#t(I$iZh)t!tK)K6fi_DC6JpJ}vn+nZ;rVvtm>%VO
z5-QIU$g0u5F57#$(^sguH^OThnCw<}A=s<=)tX#}O00Ifc4lH}H5&)$RPu}-<TA!c
z8^+7gpVF;guCWu>$PAdhiO<B8;kS{q6gWF}nm3JU!;!BPxVw3DiKJ6r%oVKJROBUv
zm#u{QU#2wta~uw<!|D71$YdkR;qq`!$);k>TUbTuZ9yEYaX1{TOI<HKCu$d|*2<J+
zj^B9MUiDoisByHeR*M~rrF$ic$k~&G8BH-*%CctFfGPENak*Nx@<K!pBdik|&g(OC
zC&Q?7*!+uN+T{4EbUUhSLuoE!t7W5-rb82mrD>gei1cnlJz_v9Um%6k+{39)Q9Wd_
zpS^|0W+0RJb~3$txqLHmuVeV8E<4!dF{vB(!&&cw;2<%VW*dg+w>|8>S4K;fyJmSH
z#p5xwqgqn-vQ%=D%W63A{2EXVQCRz%W8HNJ&fJ!hr}J5Cb&2z{N`|$Zg&a;7u+v2L
zWwkBWf*{foWSY`b$jM@N1S~btlZa(KFm(0RTpmY1sxw(Bavw{CuRfhcge^p?r0S&F
zgCP`5NRFz=I}l6YabEYG$6W2}Xm0v_Usr2N6#MyFM>~s#M2#)9*{pe*2r_d1x;>0J
zi}e7_6@}shKFS*Iu0sHDl@h%fGq6{l#&&4=rEHE)M7-`|L$oNc#)=UR$rLe%-+j7}
zy{2$JpJ^JHJT^wfu6^8&Z!J8K!DB0a$8E04@y4LTGP1|Vak9kGk8_>6U$4a1tTC2e
zHYQ@h3JGF`RDMfLjujg4-NHoNmOs}b^VvnePU7pWkuZihrgOZqwV5TeF-2Z%<-+$i
z3Oo<K{8y$22a73OwLaXla5A(!TXC=iJk^LHqz<HSuslzWKXH$!7jpQ@^T;W>Nyw*W
zGECuR+lj23@>t0&y8Ll&OYzY6OiwiTgNZ$`!e+YBxhN)-ut>X-0lRj_<a{p8<Dd`t
zn)<H~qo0L;Ns}RE7ykfgp0dv+Xq^%p7a_u7EXfpV(lBXNCZ0xKAfX4Jw4J*1pRc`5
z#OBu3r-BmJ2jsab&m?6}hvM6j`u_lb{JPgCtU7Y7=rR*ynmWbaLV=fzQgR;v0qh=j
zE%UMS*0uFTX>oZe;JJ=ikyVxD2m(yXHtxeJjhPO|U^Zpn-PK%5WtuA0pj8qqZOFGK
zRgty?DE%$~1N%SUeu(;RtJ2%O8Ho1*Q^cyP9l3G}PbS<C8~FMCJsPo=UO2>1*=0$2
zdtsfHPj>$RultY>9sYlQtPxptbyu+ScF=oejzU~ct>6+)+xY&2Z*NuXw6e}*c%bdV
z8+QD%BmA-Bk>BTb*q_;dr$OAAxAp7~bAq<Ye5VrLei-iRNNxWBr~0}!`XB1Hru6$c
z^pV)JJ}SvWC3t<Q6NlTBV}Asx-_FNz*3EH$h<CAC9?nS{^3}LG_R73?w`_0ujDF+n
zRmuKdm(NzU>gp7{Ti&uaDAl%gXCrc_aj;RpjlO>UEY|tWG!F$*QD6%qk<DH`*_4gg
zaRcrM{+&u!s9=^b)nSi{x3v4VB=RJXH{XdT&)eJ7@@3)1A|kQnH|Ym{K<+s2xEm<{
z0MvNu?TXI9EmF%NS5h6rc?^7?)IL;!<Zt8SuFRfsZc$Mj(Jsm4FA@PHhTGlk-){r{
z{S1~9dd!`}A@{J2pJ0puBWGYY`)&Je)s~HT>NH|C<J@hxMG3k6$s24)UpsZEeUdNC
z+(a0F$M)PGu={cQf8XP*Ydxkw1AxR5zCO*cU6}j=N6-HNexN4ZSnfUBjv)Q^JWGAI
z@;dLT;z=L4!I4#Xmfg8+@CP3wam%l=+6eB8#X(`jfDgbvIJemR_4q>4nAtX!m<`JS
ztfZaPjlM_Re{bv8$Ae@eyT#S_Ans1!gSZFupPkQ8R(sJSs4wIYBT%GA<Pq-M%W^;Z
zcpY5y_N%>)6;bRoO$N%Li!lgE*p@y<{yul<kkVeJi6V+QEC=6-m4lX0NZ9P4ZoeNt
z)1uXVU-fAHnYgUU$b~zQHv{8;Ex!F^$&@liT1oq)s4X8IiP!*s{x|-<2Hh||i04F;
znv*gtd#eR5IVe8gC;9>Y$EL?uQKOU7^43utl6KTLf_6|h0ep~lX58=g`Ri4lwO|^B
zjE`nE1|srqig)Ct!13}%!+-Yw0J@!l)^pODXsmONFvS887j2Iae0+Pi`S~9wezLRr
zhCH+)xnV0wC>^z7$&TBG+<YCF`1AUDqDsuA)neG8?ZyWcQ?NS_e1sv1`0?la^^uRn
z<}#AQJWLfsDdela&c(m+k-K?6Klr*}bf%xPu&*-r9m0Tx4$a^O1fK+vxc=S`P4}WU
z6HV$|K(PYH1Q)BaBXQan6Ofx7{k{*}`2PU6UcEoVlM*wObtkD`#3|UHPGxb}h%wX6
z36AbQ<pcai7<u`QV7<N^$WBVg{?<}0pNd=soIE)!&YhY{&2KQUMAjM7Xe(agutPj?
zasbBDIy$f*61#s*M{7&>^ZAI^X+MbW#i}?fD=1YmNeaK0SjWGH#IP(>7G1h^JsH(g
zVDzp&SmDQ7$vVp(OA&f1_b?i!7}?r7T89lgq;|WN)?U@9T02oPq_9m4fm{}&)%RN)
zXDNhyjg2rRuU{#M%j6p&CYWoZdh*FDpq9oxth|+y?5Y+(=$2h4Kb&CFo}y%R?O|V2
zTC&;vJyD9AHzjA2{w2~_c!7hx5~E3TQD7*TO3xBB`3x)1N^vyQp_!WO1-qEcE|tY)
z)xNJG{2voq>_u%*v0o1zfv0l<Va;+@TS&NqOD)NqU}J`FdK5rTcPl;LR-7E9U8|Aa
zr7^U?@vC9&Ei19jBazCq)aMfh8?l&fyLjo}KB7ZoO(aw;!owjhCh~nUDI+kK<Etf}
zEwV8Ud6W&AVCM4j-NP-S4CW#XrW*~tP$8P$pnQB2C9cCugO^XiT^)gmSz(T8E5W$n
z){DRE>!OmKYX#_RJ(}>#OBtnd^CZ$lQy&Iz{$$WR_@x^VW9ETEY+gKPmTuyXu>En@
zmF*@A3bNdqS>(H7<Vd2tcUIVFs$)Aq9_k>69m-M%v?_crx96vqvRL|a3stM#`fPBi
za?FxTVlislhiIzTi1VJ{SydHdEi9`a0_h(vu{CaAB`mT??G3M{;a0V192l_C*V&$H
zF+^B@7IrZfE=twz?7Pq9E1$#fNm$;3YVbuIZ!DLxq8NFz@+}=mByM(gZTEKIr*Hz6
zZTg9+lGGJugXw9@dd;f>s#&WOToCX(2AUe`5+rYNW+U2JecXpeY;Ll(j(BQarFtcB
z>W+{!>heZ+M_8D0_Z8eRazP);+x3;Jb!AJKbv6XKT%DuZ#6x2pXCIJuK4nWS3c8tM
z=eaT!2l+{Fm|Yd7txK5Gx@tJgSv=gIS$2687;9o{0B_jg%YI53RD-{{{#UOxKmIJA
z#0`&6^%vk@(OoG#%X=NAJwf$iN^n|O>D9&NGc|O^D)5CO=F`hAGVg8qDk?(2EArfV
zWYa&1-%&CZ(ku2}7KxC-Aeq(J9e^7*lNS3E_xSLCS4Y}Dpky_b8m%TrjhWD_uyfoA
zSvNZ_<8?fj!T8^yZ>w4tD>foqwXG-A#Z-M{QmAMpk7FWmkxubR;DScr{{T0hmk&&G
zRdRZfadzbpS!RhLg_#dFg}*W;V0<0Vfxpilo|>9Um7ILd3^2zeb!Ol|_dA$K4ipW(
z6qEPZbR~E$P`@%H{{WPddv=Y=0C=~S`||kPZ<D`QJY(6Es(@q;045TsWY{wJcKG*v
zkM#9xv4Fm6#E~qJAlz{t(6SIolmX^aRDtLA=x3bMGgor*Lc!%^Xy<SU+<>9{f>e3m
z$5VPntTlHokh{q57-mI1xfgW>ym$j<*!=WCe5zK7UEK>Q?k@&N+BP6FY@2|s!ADeE
z+JhsOyV`(+j$Y>b6485P3%Wl35GdSkM&GBaO#^dyrkoBayGg@+op=DuSI8q{^ZgH9
zxjXiE?#7QY$PW(RE@e*Ju>ko`f=1)V_4unzaqTj@a9I=nl5Rwd4&i_bd^Y@lZ`J*}
zn4xN7s8C58I)Tez!GQLWxI2X{@=sD_CuU-S_{cyTac_m&ay|(ge@>?`a^S4pIGJ*5
z{81CTJ!l+QYQcueW^kY*F&_t)>haMkJSBLM37D|tr?)ub-bUg^@vz(X{{UB7P?|%;
zq2i@O7W<N{ck{oJJpTZ<QIZnr+HX7lSAUNwNZhZ<VX)uM-C|7`+m9mLc07v!PaU@)
zpWN;?`}6CdJH%M13<lmv*d4Y5Vo#3*{{UXP9hupP`6q3>Y(M~y?g1nJ0L|7-X}dsg
z#Erlwa(JJe!VkC`4f^M1(#AQnGWRyzZNHBrbtCuKc-wzHSLB>Y9P!48W-3^3xFBrX
zc~$ZKH~qTmW5y%}ST_3~J1aMqJNWkv`klgMtyXx0yVgOn@3AP}Kngj2rrVbNGMY;+
zCx*IioyoYVC66t@_aD`{L(6Tr{Q2mPB{4`5<QqD1<`@y=up1q~<fq4v+onI#Sk5~j
zwixn9?V;R|KXJ1G=VC`qtsPMniRGsAD;0>l9$wM1?p1a^Jbe6v`}7gk+G8J?OrVy$
z4qCgFm4ds6D#WQ`JcG~Ue;m5h52+ef#tS`gKqfJ~&-shtRh7vDklS!a?Z4^jH&5cE
znJ1P;60#lR-HBgso+Xdz-M;?-eja>vE%cB{DH3+U1w(iPZO@S!vhC!4-Z$u<remv4
z6a$YA96;P3o&NxvZHLP3w%$)nf2Nf&*&6|3L1N1&aIC?BB>XR(xPU%9c{}vQ!GM;V
zEs+NZ{jJI{hFA<_NMv4KBw@0hkLo_(->*Wy;iKw)3lF0;JhZb>-<vHkIjmfft)mEy
zJ0doUelG&E1|eLBM)`g|?wv1HbqxB;3vM|vwhGAdigSc3avIAdPO;3}kn{G|Arym`
zapU4!X64c$lJt8cHd<_kLk~08X+F9phq||6l?ya8!z&(fNyIwLn{jgM#CYmU_+sL)
z^HQT6(??$Env^oewXD}%lZTiBtOAssRE|UrM7QZII=t!Z7N9Y+QAr`UPl1BvT!uO}
zKUP&ho~*X`TBAKijb(w9#U}$BtLy@{x+<+qWSIPA%62h1F6<T~iyK;c_i6nzNpNai
z1A1@Kkf`!W6})Si1eEMc3?_Qt^5LaSbXYtUY?`H+@$z8W-ZQh_L8tPV_yp4oe)6+d
zwuvN@8o#D$_E4C&j%G9)+8&OgwH99=6kXU3MqG<XBK*kZrDnb%Ydj-=u9GP=61r}x
zip&V$Id#|RY*SDfZ7-Y2O4I4=Rx>C?*}HZqt(=juX)DGU3wNwXj*-*2RV0Emb@2U5
zVzpiPEY^}Z%og%`mlaCtu`=1C{AzoWTxNZbmZ#f|oktMycG11P6R7Ldsi-5+8ghlX
zC&fpNn^2fr>k!znZgObFX@FO;3~iBqOn9QZuvQ7wDWja#KC0`Bm*SYoS2Z3stP@K$
zEL|Z~!xYg&G`kn4-A9dIW(f^@7FZ4U1^KlN8gNTCBO@e~Vv3SSdIp{v%u2DAdPX>j
zs|4sIdJ%$JNF!TQGa-pytrnt=0F+iAprX;&<pM?;&C2xxkzS>F=jUbQJ3O{!ecS4}
zyY0(jZk*K8<bZkb)S-&RB*SMaQcA`s-?m<*O3?9?#khtyP$6yKV<-JysCxeZRno(>
zi5={cTkT5mRG@oe2-THdJ26DU3bE_}UVBNo{XGPy^&&W@SrSW8HD@TY3wGK?k)s<j
zMr<;NSs7c}2pqq*_DvC}vYDta=W;J#+T&zJEyin(CrNn6R~^Lr+qu6c00lnYP19NE
z7M{#x^<A8{PD!J`CN*hSk)wXqSTgF+@7xKpS5@U9C&)3Y7T?>~gunj)5OU^ySN{ML
z?^Sh-Mogxb(%M5xdUd3OGdQJ$(UcN`#TX}yb!W=9Qrc`gk{hN;K1W(hIC*L;GML0=
zEC{{eEAirddHaA5jkepXT^yvChlX^O`9h(`7kz<707!N|<^X@+^>o*!qoJ%c-?FDU
zG8B;+qad$v0<y{u-rsX_9tV@R@7B9>SPa&P*VgSr8!WW)D{6}|VGO{dF^*ye;1G5p
zH~oD*-ka<6%Fu>;F|cW%PU<%0yN(r04cFK=2ax-b@znNb1hy@$h!`w;cP3>6asgws
zYJW%Ue_`>|_6Cg_vMiFPwndD4&mEk5oATdoD0kTGf0LqvWT?v+DkF#>5hxpMB8-9w
z4168Gq1&NGtjg1xXMhePlQ7%P{{WgfZdC5R4~@U?(Bmg)Rgj82d9tZ=EKszMBPx&p
z@OUWwheFK0S0vd973Cn!BOcPo+`A)W<X3%xJFk!2o%$i^s}TJyd1Df*FeSDGvX%C6
zJV`tADt0^g=%qs7mIsZP8N&YnnMUP++;JX1m%trebe<J*QL&X8WsRIT_YIVi6oNiR
z!~zG{e04LK(CD$)u^nbXvH<9V+p*=o`)(VTAO8TKji|M7SD<kcRhCxrh@GY`z_$CQ
z`)|1;$Ikss#fXtgtJSmIkz4*>vf)_2%F3m8Q@KAJe068S=&TktbDC^pR=PZwkVsH4
zV0I|OaCRW5BX1pFNsda>J$U<y5Io0-UOOn>MN+;uQ}8$W>qAn?yi}0R0}?hUS%(q;
zWA-6`?ftsNP=6`oR`!xXWhF>bKX5l<r@`xxb(M;!kP^V2C3j*xoz;}_AGY7OSfRs9
zA}=KTwrALe+!8)NZMPeL=hk!-JBiH)af-R}2pjxuw%Y<m`wqG+SK-_CpD(c-eC_AY
zA><G3)LoY<gO>X-Dgy1ue2@Nd;e7u6M%J$KJD?5v8*re8{{T@W{++#_+o{R4`^iyR
z4$eKhk>NM_*lc|7_CGwjtzzk1w~|`%G2RKlla}SXVDUUaJ_%Ab@#CXaQ)>W^a28nO
zk|bnp*@H|~00(dj^60sybH;g~HWz55<H@(&@5y%D6XCzM-8gz)!=-pT1M<>X$LZu$
zx(-~A`|rO}ncSvV2av6Ht20CGXDhpG9ZB4|U5VSq!2ETGm-SACe01`>oaUM`juFE+
z+#Ta8<J;sB<NE&nCZ4BFTPFm$7{q*DV(6!D`eZY(3%BkE`}6>VFIJ>tLQHI9W#zug
zcLlhO{{V2`JzzF@r!G^nB2uvh0CM68EET`|j=OH^9EDi;D>N~7$wJ(e74kL&9iROt
z<aAy1Yt)G`cbRQXywOP*f>1&Z(A#I+o%h)I@%+7P8jih#J5CDd>SdHGtRhImyRhQO
zHXaH7o=@AY#s2_?R)o*m#A_W-8fEIzv6_ymOLC_gpsH^o{bX<XppRpz4efvGI^C8J
z6^^Y@;iFL}mNo*(d0u)|XKj!zMU;l93$Dto;al2=?bUx%MfCNoMI26+Y<EMvaV&|<
z4aA&VW`=E}P{4q!Jni$i`F%8;la)3q{Zz6e7_C?QsFpwxs!Lao{N6dqR_7PJc<#F}
z;!Ju!(i8POXU0}ad5J{P<1gBR#cL3T4RXem%`AwvV2*LkV0mJ<3GfT@*tAjlVU@v9
ztDn=AKjJC$<j~KWz#G`TL0#!WvB+)X=_hJvXOgnb6{x_963UK~MK<ZyUb)tq4(=uL
z8Y=2hmWoxgp3}DCo*ae-{{ZHQW3@`y63dN9Ws)HBCw(H9vtyeA-F)<Fwj(6DnCq>W
zF;=nDfRd>^ETTD*yfa7xMgWv2bQA3eSCXexN%ZssIj=QsiJE+FsJW%FRVbx3uPv&W
z%xJvMGI0WvSe|Gnd7%sKsf~tW@ubo)rus#Udm4vXS+!!_%NZ&)BEv&kic3nw*+-u%
ziKmXUGcwe_RMd(YIF>Rrk1>8O>(h_;Ui7N22Sv@6)GU{E7IvODA(%2p73sUPSF?AW
zY1{zwHcDxE^Ya{4k04dmddAPZSw)fY*vGqXDPF`YTI>aDWHOfIl3bPSkV!j|VC6{^
z*onBc#f(-j6ee|0&ZDl>t5v-{#JPUzFVy5&i(#7F(*P)8=+-^WM`)r}S!a$VryAG#
zX#0^yTn+5b$0aH~n;B~i=0guCsKEnP*<`nWCg|;Pl*zIzajGwIkx_ViNm@TO1sNf+
zEEMcaB8~;KQ!i;&twvcNv5&1Jhk*e$l39?qDLsj|j;_&Guc~cOIUoJqr?D~&?b`L4
zoY@UWVvu_u!wyF)0B6~_nLm~??=$RU3(_mBwKgg}LzasMX<K_an<+vo6Fe2G^LZm?
zqC$vncHP_FWFd@y>gccOrlgk+Vv;zWr>_(>WoeweRjA0_>yKI`QajG!h-D0UN$vgm
zdp{Bx%YKJ`1-(=CKTpYahGQe??l!#{=*Z1tDf8LNwbE(U#@&+irsD1#ynBU2+jH0R
z=JoF}kkuO7HL0_?Niw;dqq^U}JpJ{ZW?n^PR`A6`02nCo(le?k#cop3#<9Y)4`P5-
zOis*D?fP4C#P9Mp9aeva8&_ehvcMU+@<2aOQ?oGGdw?5!eDC+)rZ3UWIO%FyjWo)i
zlQhc1{#-Nh9FXiAwnxVQ0B)#XiET5Dt|GRvsId76U=E;$`DXZ5*|*#Mx)JC+;^Jmn
zrYVdn>&&nqd$wZd#^OzfpFV%TO?D3}TFoO9dm``^JeCjc8@9*D{UmuHZ`4*+-_lO&
z8B~wMqn`ps-;rbcx9RXk{SR}x(<UGOQMM}RK3irBxaICU6S?{O72B%DCpDDFT#|Xw
z%9kX{Rn=X&1z>}Hwma<KAnnxj$$s33Ryf*38%B8o({Vd1aaP~P+qV7(pZBR)#$0TK
zl1~hh_E{s0zyXZ0+_@Y_osQmq0A7lELq=)~YaOE%i0evFOA4`!fIv{;iP}xqz}v_j
zAF*DQS+?mS%NjJHRgr<-E+djLb@TDJ$1RT~>6P^3)9Sc+mb~$h@<+U~k_sqd81N+c
zWBkMYJh=Jkg}0>ek>e(KYeytz0%hQ;{{SU-D8zdKBYnId)zuEZCrubTkwF|Xhahu6
zLay67abdEy<LB+wqXnn5uWjw^1(}%gc9~JxJ%u<davv@Fh}pakk@o6$2c|UUR?LfL
zt!0iQ<(4%pqkZ;JQ}uG#?YaK|%c)$Jh|)Gdq_w+Ylz`E3Mq*f<izpi@Klkg&RBVTE
z=VkY7-gyvD!P~=w?*7|#o`4W|>^SVBVZVjkd>@auT@$-PNkGMb+!g?FUG`uL4cmV|
z9z1o08c5QFu6Ac&s(uH%$UBcJJbk)nboZu@CI+p?UQ<f|m8E4o)CKpiA0X_0K6f1r
zH7)?Gfe}(hf=1`Xmu-P7up{~n{{XjCtM~~dj0KO#<GJF$mY*)8&foxl^y>TR4v`s1
z?B0<UMy+C4B@%)`L9q-8Ctyb)cK-k`_v-VibmTZ})vQb_8Y@(<JaNRTcM`8JKj{9W
z$DV`wa}t#G^TyzIti-+#=^>bvVp#l=2FLdQ0B(ue^A_L0-py#4pj-64@uw}DbK`=6
zyna9a9-?%6NhPY4<X{8)eZ=`-d^3K-ZHL_W>tT8#1UT$nHsT)71@6e(k-osQ@*DF9
zZMORkom~g$d^)ET7D(2}lei$TRLQ@}o=SJz4ZpwnetJOv02F_REmP?>QqD#pVes~3
zg3kz)a3*P$91ua-L}PutZPJmb^yQ3}Dh}Lyk}A_ou2h|z#8hv&_6>;uc?W&^0L!3t
zL~9XRlUjv}>`_SEo;v_r+8FQq_&z$bXn#&}S&czXg^IH(EX&`HR3m+l9xJkt!<UX<
zvi|^1noVEmbxOKy-Mv{m>SL=6YBxv~oN@<=_so8I9m4dj*7V-|T9t9wQ6k3Uy*m=c
zqJR*rQFjdc5$ES){W@;FG0|4A+J_1?EsaEQvRH;U{u_^%AZ0?~N{C>QW@ZQf0Ns`#
zH^J-Gui+0?;qm^P($RQj)D_@%j{P*$mE@s%M38V^78*2b3<w9@+>{koJv<O=e146q
z6m;w}T(P;g-CwGXSU28SF2Vs8YP*kdCvZmNX4#h-4PA-5JQ>T^;;li>^`M>E%FLxw
zcqNg-`{QX;M&JBM<wySjKSV=g18z9Q9F{wjN?t1ID_3`R+!lgxPib5pTX^>;wDE3%
zzODW+rTjVJzl6yiUJXN<o;vz^qUqSR7H3jhl=F(7pQ|Jb6t|Exay*{JiXt9q)e7>|
z_p4vxBlzLcc}YD_Og^c{U@_ShM7EiHQeC0}D$riFgTppDp=2tuM=hBVg9V#@jee2;
z0EiwJ@n35lR`l(p{{R&FCYiASxf^2R^=C*#exoCywdEM3cx~90q&ChQGgYOFy%%CY
zj_8ARUW#hT;L}*WK|D<sHqIhT*w?J`cC%J;>IJH1cN?ZsqW021DaEZT%d~yX*d0rV
zWc2Up7DN)+#NVTllB<}jr5l*ya!X%|mIvwHjaltPd!FIh8**N=Jq*ylR}Lm@vfQs$
zo__(7Y<o#sIu+un1<Wl=6%}{-yjZDdlCo~oJ~TT1qTMvPIyUqUk-H${HH5ESOh#!^
zxR%v8Xk@K4=olX0?n+<U$YJe90FReJk4UVxr?Gc0mr|N(WElKXjp7-OB*tjgl*H{~
zIUkHdC{)L@@Rj!G)K{xoF8-&>MlpDg;S_4aB`NGfGFiCtat27KEKk_SM1V#tg2rZG
zeiWi8LVXQeOEgs?uWP%+PYlLsrJm%xUOLd)jZ@l!%d@ftSXFo`Gr1?KmO~8K=EvBc
ztK4Z6#Wkqm=K|Oh7;HI`6(Q8i-q;s;8?onx-CiTgX0n*exU6gjzt>f*Nny2F=7=nN
zylW*G<Kt<2j2X~zQo#cUAnv8>4^Ly$8Hr`Ewx`b9#>myFTPl`dS!YNk%Vg-lrYu@X
zJnUkDOGv5jn}aH+OkbkDl`x8?LV8nPpo>Pamzs&Q)@(?PmE4iMn7&04IHLe8Q1NGD
zG3cHCGM=N-&_Cga>P_sHQmz{<oBsfZu^0-qXqn)ez{p1-#lv1eTAJ6!SC{i*8^<MN
zWc@^s=E<x5P|4+TQo~Yqxg>L<MI0<0+#SIT7hqS%pSbz!6vL4)7^i~77A7qrj!2#&
zC3FBeM_+d6f40g$ze4CT(ql4F+3!ZJQf?9}xcs$A3eOopeoDJ<vwggEuunwZrdKm9
zTIzu&sT;)+JM;Nv8?VOXj~;xV`;I>o_;xs$!uKMU9P;;hF%Q^|7hri4upn*c-~q7T
zmr9;CgzWLW08PyAAwQ|vM#K%gdE`7FKdAodO`2BR)!?rd#kj1S^DMjUzQ6)}KI3iA
z+vlw9LzCwk@?>WoAx15^e+#hKcnC=I_v(=?9_mIosgg?tih@e)Kn^!M@%x>(>f=JG
z5l31++x;>tO#cAO!GT2saq<_LA3i?+03BU4d|2tS;;n{Qq-b`G`Q2Q2QOUi<e16_~
zJ=M?{kEEK@!w9+b?qVeyw*Ck&&u!I;_ycYK0GF?tDj6xUuBJBaOzqp>a`7GaK&O5v
zM%$1#@;{rVMX3EYIxAIi@-qYu+=xx&4YO>^@frT3ZmT%2P-Cf>5ZsPv%PSxPQZ_~6
zJBUPn<{PjZeDryrTQzI}YLx`<Vn4`}^m1@LyoTQ2VyM_}=YKnFde`Q>^~+XbwI)Xw
zU1Fr1o<IpN#fudV0`7gvt-1KzeYow^7Ax1SZX$P~m`UuAL)Lf<a5uDUk23GT4nHMN
z$Li`1PsTT>x@RRCW-!xJsXGxJr;;;HP`ef}*nvvSHXCe6BiqOhjM!hsOcqkB$s#no
z5FNR@6h2lnvU|PT_#O}Wy3{7C^vfrNcgfL;X|b}gc@Qf*z!>>(Sb2yao~cTmkb`C9
z=Zhb(+hRx{`PiTKC#-qv@-6K?PTO+-05^r?Kbzy{{asqLUYoUZAd?6tXk(B9-LPW~
zxb~1Y<GK6)0QtJvhKtidw9I=$8!@NcKoBu3iN%v<1AWl_zE4Z0S3}sbmdVzV6pe+0
z!-A5F$nF=mPmk_DfAaNQm(#20vnSF9jXSJLow@BB7{qSd1M>bi9#7A=t8b#ZJ*v-=
zsV#XT=2p*QMB-Ka2^6N@;10)rtsbmtWvrEQf*4H86jhwCUO^G}LILx+ANSkOKrAMf
zdpgY~h92Ooh$STpgyeaYa(>)@+-=h<Ye>+@VIoT9p467(4!yo&BC$x#`FjnBBX8~F
z$sJMj*QFT>*_OK1<N8=Eq-hV8VaTyEC~t-e%un_`7QIHjCX>V0u|`QFw+fjnfJzt|
zoFV(AfdCUX0sXmn^VY>b0=~E6wLS)&9HAdnYgYv(WgWX-)RX1EM|`kj`xQUarU&s~
zS5IqQ8*NrNUc_jQ{>5nrYxxyH1xS%c$AUknTBW7wKh?th+M#2!6saUVwe5gdBxS%J
z-_$>#Bi-lEMb!L0Q=3Z3B$X@23`&m)jX-YTP_pu1yN*86q<r%6_v(j^^q&`q$<ysD
zVrce}n4*VHB&m~NJ>`oJzWWZNy+_4KTOTcW*v%Z#n<~<xOU6Xp#~fk27Td}DoxE+(
zQ`0>f&8leFzaKdoQQs`8ImIW3L+u+W2jFew^8LDXvYHNs(T=H7x&(s4aSCOCats+6
zr-W{c@xJ@{`w`aFzYke&_;QXn6?VOg=AJ)^w^ikOmQNHL#w1A8w<S<KawRw4rZ!B&
zW#wm-agWFRq|t=FO0O{`7mdhsyL*%MY(6=0>ZLwUH*WXv#fr$G-iwM_Y3kID$m9~u
ziFV}_QlS;3umcFy(b#c~y4eTeKk)Mos<U5E<nLd)>L<w5#NDhcQts7_=Ce|Ek}}cQ
zLO+-YB{6$g2Trvw{v#U0AU$yPvp1A$f?r4UCYPt8adnbP-KW+y21?Fe$%O+)jV)Sa
zF~o@6;f^^ZiBSUiPu0KTIrzxWejKj;60Q2Vl)LBzKA~z4QR(P3$Kr9<-%(nzBg0hc
z2&*;k;OI{nDor%d*ZPS$tGAWgE~5M))%+m-9e=~S@pq<w5ZJ$6==#rr(0;69^R+QJ
z-65Ft^Czt@X*&7uLUUNk*}JzdY5KG^WmS&#_OD`Gh6?!lc##q~y@UF1g}Oftns(&0
zO5{G58D=P8s*<c$EwH4dK<Cdp1VJMpKTx&R-HIm=&L*3!e+Pl5ldt?n?OJp(*(z3T
z8R4^&rw=M4B*s9Q8SGp_phL1OFn0<f?2OL(f1#>X{s~oS)q)8nNlY+?vTL;nLf?`}
zNJsj`rIMZq)d59WBWD6<kS;IcV^`>(O8q+2TF+UmcW}70k=GM?aY_wCm0D9rUl(p!
zofR<ASB}NYj2L$7Rw3OMMJwh{UVn;*s5!gWtmu0ca5x-MmIyA>t)w(|ou>kl)uhH>
z)^n_x3J((@t1KA!Dedp3(uL+rUV;AriN3g>Qfb^DsTIyf4Mxz?#-?g4a!Zn$J(@G6
zbrzqwE-yKFS8m`%kt|tKcq_@sF6n>boek<Porh51<C7%0YU??$W2+79YfWFbEOzCw
z2h@uS=_c;%RChTKaxUU#(=TGf!z5BXQh^K)NHeu!q?d45M4=nGAA~U=8=g)}b$Zcw
z#KPGXW{jMHR*D1$D4>L?A?z6kKg(1*C>uMoeEzPj`m57@Kday~>MUJHo(7Ggo}DF$
zgkxqAIV#4_CENa>+j&fzbNHLlx-P~;If=(&yK?ZkT@3O?G<B(Cl0@1Sjba?cae0wX
zgD3lSrvCu{01&^aewXT9AEUh#xt!D)*tB%ku4a8Z(e`WOYuU|TsPI$&0Eu!6+^g1(
zt3t*wtU-Q5gRtwx7C+L_gVwT1YZV4ImQ@nQBC<q8G$Dh-(&ZuKPh{>Go<>L{4f?ue
zy+`!&3mP8}a=mzr$Y5h4acNdCgg+MLza9Yj_}zNf9z#fWwH|7+Ni@or<dQh$UHP{%
zHdh`u;`^VG=lXQ}{urp)v3{GhEA=r$6<HbhmOZXY2I_VosX{yh{{WX#---<*m`bzB
zJ}%6a4dPl<fyzeBP_jGljPPIU@%?&DvG{nz5EUjeNsyr>U7#ELhzHDoc^@8nlX(&;
zA&M0Y@{SGQfM5AskbLj(r}ygDi_*68t{EbdLIGBG0P^Mh!`tO|Ujygg)V#VmDJ!he
zlPpLYHeyN^JE&Oy0M*FsdbeV<y0#&vmJm+kin9aD!8;#<Kw`To{{Vk)LHk)5ak|2!
zHS4qjt%y64$Q=Zs{EvC?e4U2h1TRjWHz^&wd}eLcnhn%!fD-5!pk2Y^&;9r48!o8L
zY3*F;42>o{+1h1fQ0WQzlsNIs9%POE!FKla^7?sprb`-@n$OioJW8xgoYVpXC{$y*
zrxsra-R>P%_1=VAe6pCq*}Q>-aiQa0LhKeqULF{R`SJGaE~FRE8l`(ecwRV|%p~R&
z9Bww-_AEX}j<vt|fWl{B)wS(agz~pDLCz+9$$!`P99PGU{A_wN`fH^zb?W1w%;T&i
zEMf1?%ABEg`H-^z0Mtc0u?@EB-|f?_N6}p&k;a<OiN<DdnHsUk^2m|P47}JQIRtwM
zDgzbS(6;?4zNh{cu^K-oHfKuJn;(poo4Jcq%_5AFcc~}LaH;(Zxd)fqq82mInmH^a
zu;eSlG&08j03eAJkUWNOgW>)>5w}|6_4m-(v_?{A!>;w`vnOsoCvqcV8a7?L62r%j
zJbZZRQQ-doP$QBi3>XPx*azE}7exokk>r1W>DI$N3-tOcRcjbvSs<|rKXODu9b6sv
zJ014i`2G54arj2YJecT}BQ&o`#x>u#kp%@J3;zJa1UBA05!EA7VXD#8aoPbRj@pLc
zqklg>8I#~B`TGvBDy}2ytkqJ1ZA8e*(XSxQuo23;5wZL2`*cOnUYy1CR%NRM3f4ea
zk$l1d-)_-6n7om=2V>P6@d2SSc$y)E4E5S5`ISYJvfzRV+!Cp`-|x^<(rqlaE$6K~
zLEH-hHvsZ}Bn4ulZHWMQJM0HXtR9|4nw%`08LgR>K;AhRL*ZoG`F-F20MCwyS=~ER
z6e71gYN;XyP6$*NB{qbD2Ipq;@zO{5fVpbjT!dmd6=zhf2bSO7;icuk9mf*8{r2gL
z{37WcNcw*pt1GL~qOBzZ9~VJol!bdv0RI5;Hvw5sfL=D^<EKkhTFqo~6!CW1m+C95
zbGmY2?b%CmZ}Tr7AO8S%mryBA8h^uKk`H`WBZA*8ge?=2uILAlcoZY{JshiJA(u+r
zsaeE^2uV=IScYS_B1qr#mFOQ9m9JMQ#@ejW$p-!o{GVbSL2oGBgq}D50JlQVQSoU#
zYISR&+I#5`EI0N^@THsKyluDa%hj(N={1C{kG)k^8KkV%(_lAZ5TQb+<v{Q{GRLGX
zQK1B~BB_ovkhanTAlU)pLGin9upjK*I{upKij?S6#KvSd(r4#VPUW$~x4NRSMKLQW
z{g21$>gAKq$vugy&1x#GV%N4PD@ZI^F_Mg;yv!14)k2N?iby{`-MT4g{B1~HD>GTJ
zDv<vG@^kmC!m7mcXp%;h#|Hdap)AY4>Ep?lr?b#6Uh^7u&(i7bM^a-AZLI0pmZ2Jl
z6;3s4?^X+vnW|>1MIO-oJ1ba<F&PD;Rg@L@!1Nmr_?gW&tX`4HM;+@0JO)!p>%UC3
zbt^h&9qQMocI_qAg9wxNQjVsjkdn;QE<bXw8RVyPf*2)FG+NXABdEWrIV_%2Kzi+q
z!rI6iPonW!4(3{WwpxBtcC6u2N^sM8xg)e!1OzygCz72XsDFn~ub!`KZ%=WTy&fNj
zFT+Qt`hyFR{tc^dV=DTIuk#ptV$=~wfW^%$t|z{c%_Yo-)5MDi5QYGby><FQsH$as
zCcx>pfYx{?YSt#mHH5KRY+RNq#3NsuHMt2wNTA2cL(ATG>9Eq+e12N~GacG1PK}6d
zStvi}S<)p~B!R+)hafn~+#UqZ+XwBj1;xQtttVo7-^0vx@v}t~#yH)Q!pxA~w*p2v
zq&5^cdCup(3I0+Gd5-@88-K+VI?L9tM!yI+cC$8*)GL+M`tb#;6{5^wbmX*${5@TU
z{rU1a`6wju9hI?|6JL@oxjON=_{;wQ5f8;aUr6h3!S+J_nW5;Wj`a&MtFg84S{F{?
zeL%|IiW?I{jMW;t8gF+sRyCezE5T&6aYe!(a#ydzXC9CG1Fb&?k^UY%G1OkS;Qs&+
z-jx0;e+vGa{wp;0M+1s{C0|YsyPb}6(-A^k#!Brg*qawL_3U;<q^T^|CY1dhXQJr7
zv;P1LTAwkgKM9&oS>$l*mbpr-*i80GxF$ly{ANQTh&qnNnyJLWc;h3GDsRSTk-2Po
znZ|nAjh6CbvUq7Kq$>>8VltGm3|-cW8Lr7G;{|qe$$|GW4u9$B>@_|X;?l`sEUn0L
zf;CwyK_gYissgVBF3~GX!Iis{k`KFW`Z#GzkY^BM1)?Uc8ozQFVu}dch;5QZV=St7
z-^uggo2E}f;8zbuOP8WwtBLFo-H_5n8h1O49x%-^y6(Vj%iHbW^p7Y%j}Js>=e=Xs
zS^F|vmX4v#WhSvHp=95+6^jhf<!@v%^8Q3?BJ83l(1^a=`qE#nevDypkP4XJqqC8t
zFV;yF2A+6ekhm@@Nw)J#;K{!K0CxWXH&na^2wuQNj*=&mNFk6*5G>Lt1CHd6){VK_
z_UT!J(fKpC^WFFw4YJ3H0hk0cFS!_>)Jl(m;EtRB071PxrKfUBQss46Vqnlq8TR(y
z<CLI#iR0s92qSJiY-=UxO+8tHru~ZdAb3a;6dYuYqyA<i!WHBCyc5>Gy>0v)V#JeK
zp-ROI^K*7;gSD~$05h*0iw+}jHXAof?gQ}IYKyIWoo}y3U<CcP45eFcOuO!z0m$vR
zJO_=sVCZ@shsQwz*RR-;R#$m^?iE1h7zO)2+y4A|U43f7)6*?07~PL1c%%q7<h)8T
z0e>&s$m};8f2*qBr}iM!kWz}gOB@qP^s_Xc_~4kR;!h#vPr?0ze@=_f_;0hQZrUpn
z&`4tq9p{I`DyVJ|uN|dyKPPZD`|q=Lm6Y_WF^0jU!U$|FM(s~x0#pVJs-dHiK~+2Z
zmvgZ_Q|kI_9#Zuiu+>THTl}|X2+p$y?Mq&ASN@`gW512TR$hzOy*nPC&>3AfH?la|
zOL*cOMI$j%dm9a+l~{rS*cSGm^v&q4MINfFR<*cHb2BmRG(b$;NLB$r+vT?_@Cfne
zqg|a2ws4Y-gA^h~*n-Ne2_O%J@HW`^{XHtW*U;3><dSLCVpyNJgO2R3TXhnsUw7_L
zz#U3s{VsY{3``ZGL}A<sxGaJExZ88!ZSl6-bvfy^T|=WS+^qIyzl6xUG?b(-W9YUV
zMVdRd;YRzE@HmZ^qK2Wxu1bZPSEKI5J$9O;g?THh`;aS<<bIR4j~!6!XY1kcKTl{V
zw^FjfASZ`(ZmSfk4*u@ZDt~@H-jy#~F|~D_L1wgZ&dn=7#`1GeS8?Zc-~Ry9@6rdT
z{VBVU$oDAFklKz$D<o(XNUA{ERs(N7H{a}kw@){qm>H>I=~j|64AGv-p$@!SF9)~C
zY{zZC*?K!=bn?ZU7hycUqCKP~ytLqr)bsEdZ{zmy)el-=#fq-)5%1RpkZ}rQ9DyuA
z1Ht!f0qI=H<1Y)+it|jaT(n#_*_nOZ0A0M4<UV)y^y>NxYSIiVWL2G{U#A?{mtIOZ
z22elDHX!_u_vysczMA7TmIm;cEUC#2nF11P>=%`H2gxK41Mkx3iTp7g5%u=#EX$6!
zB(9AjH*zPJ^HobKY*dEvxcKVNi1b#?c84T?G(cvV2g*nos)PzZLWsuwRrPKNQ@duS
zTl3+SO9cbU7fs?H8wXH4jsF0brIRt}#eHFtOf9m0m8^;6bsqPGIVDlPShm1#;O*AH
z^hTP~x;IbO!^j||N(zxYX}~Hf)<UCw)DHvC_6^o9IvTp7)Y&SPzbP*EE68~43?-tK
z!msJwliGZr(s~biifv&<#fmoX*O$1B<91L=jy`{Xx%-dy>eHk3eU8O*W-e7S6%l>K
zIR-HWAdUR|dF!RJ)vjF-2;&loinF^b_DeYfhR0@2l>t9~U14XiGGB)q7=j(%@;mcY
zGO6XsyaC~N9zUnM@6r2A;$yuvYG{%OVTMo3J1huQ2XJ>JV647(1AV|7^jOu{EL}}8
zO2wO0s=!p&y&#S3l|T!S>ku1_zE8&c`Tgy_m+4&#8dZEosgm2m;>uUUMM^tM&G@WS
z{cXmSMtONJ#3&qxA8$q&@D*ms8PfYJMZ6DJEpnth<50pz?Lkra$PVg~2{~>0cJg|(
zWapuQh6JPZHfFNCt5tz#79-#Ty^pJp{+}KrZhLMAo?T*T%=Q+ZdlJ)I6|(kn2xEpi
zQr=3E!^ytBy_sN)&wW`-Mr%%`3Ueqzj<y}7^`4*CHKUJIV=~uo>G-6zQYXX<wqUD{
zog~O=92mpop_ViPIa=&#vqU+_o#<KnZ8nni2hluUI;&g7dWV~i?1A!_ti{~r3?vdn
z;!`LS!5qT8(M?px7jd0ysg>2gUrP6_nu`%t^&PD_9y*8dJ1t8c20bR>jD|tq7UmYC
zNu`oHtlW`HZxeG5&6PT<dK0WLxqDcA9v0SO#SC**mr=)IT-(a#tcfOPw+(`+9y$b7
zii|bg;*FE}-dAq6Md|Ld7~IxZBQ8BVS25LUT(7u2>s8A`1h>1M_1?Sx0Mt-gIhTWl
z_JB*t`n^|7XuVjH{Y;Wf(a@<qX)FH#7NEE5%qm#mJ=4`o>Wd*!vU5k}y8Cq<_{{$R
z51U_$Nc2~#KAmUd)4siWNnLf8oygx7^7>a<W3SaCQE2T~QmTDT7-y={Ne%c!j}3<W
z`qEeCtL9_zss0x^&3CSsTWT0{zKd&{^_ERnEsZ;8Go&>VSLG$`L50+KXeFa=G0HHj
z&O02{*`eX>44>ei{5WCJmarNp(R^;D)LOq$O%$1(O@9ZF$767IWtK4>YIvuIzo_zo
z-KMWjKH4k7oXj-Y%X?n*uhPvG_-p(=>5oXP3Rtzo;3GF7wj;g}EJC!)-*`oQm8X!+
zB>YbgsJAqcCdxWR{Y}z%d)X}hcLR@s;x=ofRC`Y<h7qbIix9*(%Qpnwd%eyfxgh!F
z0qOR7Sh6DtsG!M1NSOvTVHM^R$~eg)&p{jN8bQfVCkY>(Nw>GBpVOEj4_+#3EqP~_
z7=w19`gbgRq~A{?1%4qtt_k14<L9R`^G}V#y4IA%JaO#iI83gJsk1LMbO~Xi1CPTi
z9m4V7tw{bQ{Q|)HhBJA2TJk|GbI)-lWrk_)I3>?*A91)oSIFPbA3bQB)(=B;<_=qQ
zahSU?O&@-&EX&SKm3XAHyAl<K`yKuV_dNtXEc7oMX7#Go_|I}1E#HKJA&xm$U>ldk
zeSigzg+J4%Us!()mh@#AHEs(ZVr)~-&m#;_mNic!yzARm<b&PGfIbL6)1{LWk(*EF
zYCxt}$5^uy#_jP7NaTP=ks_>~7jKS!dfW%2T7r|{>Z|r<h{Tc045&+)T@XqLUG`u`
z>NeY9{krY-FG=9?@k+uKXjvJVU)<H4{H$^y{#78I!2Mjd=`7Yd3Z^#4EMaSL#KLBH
zmzzZ=xi=_OnIqyD9lU??{8bQFo(omEKh#y(fDvx$tOEsc{$0l3*z}_M_n_v_V{!()
zZtFB`p5FmC4(vAv{XafD{C}6F&(b_xSW6<Mr7$|X?ySMNusqm+M%;$mY)4xL)0*oI
zR+=UGC3mR`Dl`$KuB*q3Mgb#m!0ox;i$~+ByJ?Hmbw7twsUizZwpK^k7i9&|I*;Zg
z{^SC_dZEeb-Hb%?+Ns{{S!m?5Rgq?nDI@;?qhUg9(Z~;vAE+}a>Qnw7$q0K{2-<vA
zrP&$BB-y{2OoNi4kZe?q*Q$Iv6k#<y8609HFj@vRNfnVfD-KN=9vA{Z`QLBt(}|(9
z+*hgq^KCFCCirM1EK3d1iR1`vx9Yv9v~^1MWs#sj-RD@E71>=u_p6VUK6U_(sTMTP
zD5XfE*$9xUcFxS(0wW*|_a~Rn&mO86?K)$y(W^3YR#;Kc7dtZ(x3_Yme<#Ty`j8qz
ze2r--z1fm@q<+IL#Zg=h;o~9kx&0(<{@p10%AL!t8&TVgfT7=s20lTM?g8hwkH3S}
zD#n+dv@B#z*j3~~+^Z5#H2Cn!S97@VKk|&gXnJ?l$sAh-m>DGDxo!6$3i<E|_&+0~
zE*A?^3ssoH5FtQ(Y!O>^-B<0nUnG3>V3N!eEP_<-b{v?=8-RfaCEw&Q+n3+Q<NEbk
z)L6y2QhAJMiqLEnd)OWXG6i5aKiB<TF1U>*mosj&Az0*{T}rX>vDx=PJTc;S1K@w+
z>8QqODDg{DT8S!gnH&WLSy=*tK-=yP`v6Y;9JJ1|7^^+|UCCm!S`3Y82;k*N?b(SN
zz?Db2LFMC;pFK4da#(z(nXwpJjS5nE<9`ypvm(CC9Dbi``18KwZh_}h(#OcuyD`QD
z2e7=nF3v+P?d6k>{{XAWEBo{i*0_|<&2H^=5%(-G2#d?KQ*>EE{gYrkkGVY!DBv>K
ztgSh2R@|d9G5C<It+JKyM{Mk;f04IFoSv4spU1%>Uo~u`Q%5`?F@(hGP@p69l6>w)
z-g?3Gnyhsz<+SaUD>XZ7Doj{{M<*P+t@?)~2gib2ufC`0s+YSrV`XV4Lmh<*#YtGt
zBHwO%sN4NL1SN;~bt*ODCNldb%$sew*ah>q9f;fy{{Wv=Z8;u6vyTkVBc$`l(3E0O
zZz}~MSZ}v2!TWo*`}ERNYPR93C4v!hv&O9MszUb2E6Ol^<Hz59j{gAV>h6q{ZO3_J
z*uU#t!zyx6KPVRC7zg(Ibt9;C3|I8r^lVD)!HC@otf~vJLZQaQ7C$G)M88dO_=LpN
znzLnW#0XTHStz1J;XrudhLV}58-^{vw2nPmH8y%JKV!dLXDZcSdk-94#ef3il=g!d
zLXt+v1iSwLZkHV2sd|R(#7L-S4F-|<#Zj)s5e_!)Ne|aS^%e|#K;<UNB;Suj3@55q
z=;DSV-kgBVG;!F5y^C<z%EMQgRVKu?T`NKY@9pg!zf`A>M9l~4_PMjEG5F0owP}4#
zidF#NKp(%L&Eu|Izu3uEkcc~XF0_^E*-*@_4=Ey-p8$nf!QYOD(C>@WxV+wP38^vk
zui)Uw>Z$`wC6HxBR9wyqI+C;CDNSZy%%xm2RWfdZN@Wr-FXDSgP{nBL8k-+QZ|UeH
zmReWw*$MAhN-)lXXqK6y3pQCNa?Q_mQiXXiCAtiL8$DX|mlxcB{q>NEW|Ez)TZzSg
z$kh<Mi7e6MUO_OBS(EcjV6qSg4;^d+PV4^wO|k-caGK8<cJ0`vF-|N`6u<P!w9a0*
z3u~9zf{<Yx_TpQr#wwOiB-(~*0$a4EjKpG@)ndA0Z>j<0E<@B4ggY+k9z`l!k5AX2
z&`!8|(#cV&WwFl`^kcOY>hMD%vt@BIO8|2;n*kt%2Xrj{-q7ID-lbx+O62l1>|<XC
za0s%oH3Xw?PTHDVW~^v2H=$UoWoLDf9iV-cqiJD6<;WOb*0y~=ib!!d?J=O^n!U;G
z*-blMOyr4D%y7MUpk6mJccfWhY2yVW3oA+9C=vPw<~4Sw)RrE`J+qk#R!U0QT=kY1
z*&0Gt2&(DG>;$0<t=-@zu`>=^w0yj*sk1XmD$i=n6uzDR0OV$|QyWd~6fvH}nEGW8
z=Khl-F>lk$;Pf(=2a#<_Qzr#oJ20gre@!+lPbh=s(gO^Ru}x!TV(qac{I=tdTRHR(
z6!tL_XL7LAhRl+R_3K3(^^tpsfg}~ChzMt93%`PW+?B$QC;Xd;sLTwI$0W<!cA2Y^
zTZ;}eB+`h&^B!35us%mhcA&>5LTlA#$HO3!#F8G;Mq-#GOgTwox!VzVB$XST_Z@53
z*DphA<Rr+;TOURv#z&cb?O3{mWD)kjZs{I(+ixC!&DOGYUYOKgpX#c5xYA60I<GD|
z<UvZ-xlEoauOf}4VITz#{6ij@&rj{>-%+%6fo5v;?4*wyR5C~TY=HL$$NBdgus#Z}
zTKV<6&^<KThar)|Ql(B*<v41Jk<+s3=eZ(R-;6UIg9nRX#djl@Ro_kX6JTRpZL2l|
zRGK*B1=Dm2q^MRKa*U?le#K7R7xTK73zXhDEbxuiCL5J3rCjW;$6~t`JNZ2ZG8(3h
zi(-VPI@e-u+PRQ~wu!dPyMe-P*$0l~9sRrY6R7Z*pQAaL<?PvY>^s7uJ(YtnGpdiw
z9M6DuDtce@5jEwKyO`Zr$sDB=KI#xI>He+#+n>KcJ#*>hd<d<RN`Od+CrM&M8}SRd
zdB9Djkb${Y2j|aJeLEIX<*N}Co+zXI$lQ_?AY$I;QMfy|p962x(f<GembZ;cZ8WUO
zMp*W9;f!zYDnbvj4g2mtw?|mB)NWR-VhFo}P5E*tAGlbCjoZfOm!1AUm#)lB%xw7x
zp~y@V7ivZUxsugok$_0$9Cj()UHom~gM1CTvFZ#hXe5d#s>EQ5MV1+NRw~?ql<rTJ
z_With-FNFZ8>y;eMTU)yVGAOw!oQaM&@n6s-B>@{ZydJ%UR_0|Oy+Xx8fkkIP27=u
z$r_&txEt(Smy$j@Azs2oj)kmA8>rzNgpsz$N3g4HvnJcOpDVvW&0pIVrnQWaqsA6k
z*&-;=%%_mzN6Hc#<Pq-cA03;oZawFO*2qMQEYYfvCE5P~G{6Y<f>fSAZd`v)OWmzH
zD%OGBJ%(VU7|*k~q<2RzgSh*6-_Kna@GSJ$1!odQ!Z1`e<^KSfarqoafIfN|P}BGE
zStX{Vxw#@t+(1(rKaT(}w_s0?*mP^k=+5P2mMI*_BUgQiJOb{0>OkC`{(gF+T7p`#
zO9TU%kj4-NW5QH!23}unz+t)Sa;~4bdW_Oos?w1QF(4j$aocGS9lQg+{{RGaLdWSj
z@%gx9SPD0AI<W`AKP9*Q_aDC7bZCw1MDSLblXeJqS0&NOA(ZX%zDNH6ZlZNXR;@D2
z6hpM^STJ5ZNo7+apSp(l{{T-!EjO&8uYs2u)<kheo1(L9%oLvL#K3!w=aBQ{H;#td
zw>M5)#5QZn%8^MDFkOgc3(3a&?z@)XZ|T&AmepAMFx-eVOvY|d$tKDa0(mPdd~T!q
z4yQAgs9dcbx^B_U0Deoh-KSlM17o>T%%pickJHx;E3BurkX%R6t68|BDP>2Dhm&F@
zAzR?=1A(yI{4ei2tTc4GQyE`X<EQZwgveHH+y*}WXx=%1c=_;0>0Vt?pv_H}RFXvo
zITRKQd?{c#NgqEYc?Bo$w^8}bbz?=Oje<O2vIQw2_i+TO{E`om)l&(GNn>j<`=(WQ
z+yWgv{@_Uaf;Zc$&Qo1m#AhBAcV=lKSrGwExe$kl`yNZ@{{R<6xb>XZg+aE>#XF0$
zey_0RqsGK+e0{p-m%U>lQR1isvTp<ts+MM+SqO6!Q3J|r1&IovGdJlA@zZVSg)L)J
zy$bnyQ0eNIRz!~GpqdONbXgZ<ttw<3WKu?biH<6KTZrSj^lsI-O)qklxvu7?$ePIf
zDqwVz+mf__Pbur<^-Y>p%PR)i&3F0b#kvM24@N3MmdIfBF~v$-hC^R^gNe~`Wu=Xf
z%_cH@CX%^w=LnFMnzryT*&{sn9ZzWJJrUP7W~ZgSCfC}+npmE`L47*a9<AhQ41_UC
zdsp>U*=S?xSa<Ryvs|G5V2XJMzwoc8Jv!4m{v^iwL#%X`YZXLGl&_NY4^3<QgmuI_
zN0!vs%C@rc;cIN7L~AqC0r@I>xF=gb!}_Cv%F88>PyJ)S=;D-TuNa>FylIC!UhEaB
z+lHKrW*YTku`7!v2unDaLnJI25Snr_<MmFShQ6WI9+!HtQrWRq<kjgWozxhNeC;fE
zDCKI<rvxp9dlBxUddq3dZk+p@sPoDApZp-t_;mGuAL}NCxu|aXb(qFxu{JYxvNOF3
z+5Dx2xnh=}#a7{7dJ>A#HY(77@sc2~F@2fMUXN-0LF$ed6Q0t#=TPXXG-(@oj2;^a
zhtEE*mgV$@g;}!MSedcVraA1!)$9IblUPZlJbj&R^YGQ^j-Ju_D$LrKAslz@K<SFb
zX7fppXg-iwk|&mVq$+dH^GWqm#u^wF6p0i>q%)DA@l#W4l1%h+YqdyWy+b(#{?Fwf
ztF8rPAN1x3J<GW)ICV|+?)Dc8r0?Z&P~=6tWDl9IXAe$6D)vf6VmR(cIxGMoEM;*i
zUM(U=y0Pn5cD-eq%o0|7tl7%98cQ^?dF<vYcx=H3H8H`Q(>T8Tb27-QJ$H0v;-}o-
zryiE9fijie&PPq$tvn;@u~#l?8~F{GsChfLY{cfDaezS@5Iw_=?0PX^HAWjOw`mPK
zrt&pwL)?zWAr|D(#~&1Z_Q|Z#vXBGDP%?X6k5-$Q%K2MVabHDR3wGK#=~o#WILg;j
zU(5?!#~sMb>$m}(LYDAcb+8=;ippuNGe;9*CX!0^(s2S>jjg1vf<~=38soblUAJQz
zh6DWFcIU4u&Ssw>VmQQRH^;qdv9#tg3zw3_Jbs-205D-2Zli98b__P_HM;RDNA(P2
zW>rK1M-DN>t+3zuj{Hv^L!p+jqaJ1$i&Y$Hq>#YTAVo4NFn-wKywCptr&`SQ`$0{b
z!dtCk<W<>RBrr)E_f`dV+BQ!c?e5r*{;m9Vs$9*C=7znMILntHh8I}bW+pNwh;i=M
zKzOuAek2jN05|IDyGL4Ovvwl>En$j0azny5aDM7|`SBn=K>HKWpHk-66{w_zTtv~a
zXxnZ*#dIvHR3FqB{Qm$rC)?F>->CJLwTjYM3nYl}y0GFT4ZoLONAJiV-;Y%aIf@wx
zD<$h8%+DWgQs<iP3q+{R<w~mU_T3k6Z%6z^d1S_%a}=2d%&#djl^g>st;fql%fb8Y
z=b;{|z+~|km6D{1S-4Kpsu>`R&FqpkU_2q*F&|;EUaK0SW_on0WMYZPj7HNa4Hb~b
zc;s0&_P1z!ZNInuT@NFR8Hs};(Mt+D)n*F8IUV*QGvnOcTXDbne}18{*-H5o5yE?_
zk||(=-yvn-;lbRg*zd6a05?+eWbI>GS`e!WoXCykJFpM2B=7paU$hS!f8pyUt<1%8
z3b=Z_c*x5oj!cj%FqL*5)*$#l2W~#o(RUG;IqKCCOHxT3Y9rXq#mbckjV9#oqx7AL
z^VO3vr{Id7(A$;?gDswIys=0TLrR5Mllnq|#O^xAz}gfhLn|!r5arEI=zZLnu>*M5
zZyTNVJrwgAf#s)MLqB#K5wS7M!CEpGVhV%0fKJ<f<J-_))Hch{HHi0&5~viYEx*Ym
z>^PMhjrQBh^%H7pRp2s5ALgot4f<Q;g33?N75Uq!xaX&D9AlVQ=UzzwKvk8|iS7Pb
z^UIiTa2_`Py>Y@-t7a*Xq;q{6^PD3daKyuJbVL5Hj|Xo69$!6Eb>1o+IhdoHNCV^T
zN5gdoXq-53#lLbxZb#eO=d1Ktj7~!b6^hQ1Zwzc12IUjZ=aC-by}gKUo`Df$;7v;`
zQ%oa;=lx8`c!MVB<oWiE)Rp)nmyVfC92Bfmu7SBa$n9oT3IY!tB2qjQ;#BYCY=5U!
zty!niIok9wB1W<CE3hgz3`(g};g^v+jlMnIEVuJEF?yPWR(p({(jsna=3foI>~;i!
z;Cy?x=-YPfT)v+ZE5!1~s7dfh-(kcMcH_ymKetQe`q?XuEhzGG*R@_pc>8|TJQ#q!
zSI*J#+xH&ss%A{YH7uECk=_U5@;TV6s3l49zT?Kn{@o2C)tRVrO$;_&R31*Tje!V9
zCF95f+aEu_SKs}m+s9RLBfBd`j8WunOjmt?-0YF<<IjQC29fnf3%PZ*PRp&evLsni
z{E1?IXx`@BY{5Yw?tf2DRSjV;s2Gpob0jdE1~jo$pUN@>{Hf82_QuSLeh43P(Ek8f
z)Q3w{mTFKu5vK>5KMZ486rtkaACIv8hW!oc=i4t$mL@FQ7aK0yl{>QRPTOz&{(7}(
z9JCb+LoigZWS7GLLxvvURkq^Wf17dp^+>&=mr`mj>#M9$I9VmG!Dj9f0OINh@xJ9)
z5JPl9$Y@m4R4ZijSsAM1EXL-krvx_j7F{FQI$~(gBEtkc$qkUXB2!lOB3XaR>h+-Y
zWl5_V_zKkYk{Om8c4<s*YFky^gLiJ~i;?|H7GUL8n$U_8#S$VqVRR*_DBBrKbdbl~
znM}D^(#AsM%oHliH2u!x_ckm{l9z8E{M|EkY9?b1CU(WTW~@@Q66JHX8Fc+AU_YBH
zQB*Q&9KexERmZ~=vP|gGSV0=AFgsRgymo%wY^E1V$FFmfE7+Sq>OvgVdofjg<eLqk
z&HDJPP67rCBv<oxuE$H=4`LPL2_0(u=2wV^Q);b6>qbtUUc@?Do}<d>55v}#sS2rr
z8~B|Q7A3t?imOKx80%HSVeiD^7Po(3?816Ci}eP!CrZAv)KxS-n#tqxI=j=~L-fRW
z&q(lh^e!WXja%y%sPN!vik^vn8Sxd7jM|Sc9#2tbWtu#8kg0n8l)cmIQ`KB8=fvRa
z<0;J}K0h0bq5l91L8LMZj7lq*rj3B*B)URL@HQsOMUS4u`5ixny++QUwEB}VG;voo
zhp%|8KR!Cl*$hsNhRdw9Xya~w36mowSFt84taa)D)+$1_ELpYQ7EEGhZY^QUs-^2M
z;yWLy^j@ITSQDtVhGzDd$YD~0EY$H=&S)8Bxk4+fMk=b-m02Bv$nO~90**Zv{{Rb~
zw&e8PTDYAndJ6c;Cly}Sn7dYtmT{4N7@>WaaIbK<Mej==bycf{V!W4*&UDYL8GTwY
zmGT;Q1*oY*FKXsKmO@Bm#@n+i)v%OmhVLv9!CpzE-+LfTc$Y*yK3<Y$@R<75EDQ!~
z0gyMWMAYQ8*0J>Bwo*Anm+DW(c;e+;ESn9MWgb|m>6y&eub0PJf<{SVNS$MAaJw=}
z04ige8KRI$zYWNBaI$jbz_(id^&`-{B{}TAp2B0YHY^9UV0$+%mYt+=W4#JgQ!F9m
zJVbv|+)8uXZj{{KjmFvT(PqKvd(O8JAQ=G}v2M5Jtz3*3qGJt%H*;m<$PWjL4f=@F
z4|7Z807Vw{3oLP@tnh<nLWN(bb&X8Z1T3adiz7#}$Q$LhMd)^zx1Oy-HL5WK9<}PU
z*x%wb3$DSGM>TnPtW#DtACPd+vhm`0H%)zuG-Ryt-?vc$sE@r930~@PWg-;a<?`E-
zitQ%hk1xMKT)wA@hHYwNp2W~sUespI=vEu*fTV^RH)4^eQoC}FRxRh-)ZP-+8%id6
z7m~*!qv(Zd^&t`;b3GO<A9KeqXK(ri`RndVru8H;N^3;ILnJq9Mv;JAr!i`*kJ|89
zo#O{)_Wu1N{;z5KEnZ*Yc-rE$a+P~9*OttT3G%SbX}E#QiR4D(_UlT&iXAH06uEv#
zML6Z}$hAxJ=OcL!IfU*v-M8FqHu>vao}YS)^_e`4D=ido!en~R%6yoJjzm@;ramjU
z-~6RVyQ03Q(|F9adRckZWRhnNveV(mw(*h!4Zk`+{M7WYYn*aYIUxg%b<yP#1IN#~
zSxjesE<L}{mFOdh&g4Xv>)Ei#u{3WRL_b7<fkM9osUJT+dUO2~^(|%*Lz9Hssb6Z7
zPUD+GSqS{zK6@p1`S785{kk`2bJ$sCdT%<lX_PcA$GCZbIF;r4n8yA`oyY0uG3pMC
z84OXHscO432wFB`ikk=R@!0W0yOcl6$o~Mk-7Y2AS*318YX-B$7(h#h<i$+DuZH5^
zBj=Y;I#WzfjLOW7GZvJqf_LB}1Q6UukTyFJ<6*Z%d0>u6M2uCQ2a~ux%2_!UU6_r?
z1a13tO2J^O=IShX^;m3}ReNBUb!PzgBa3souw~qj3dhg$b^B9#d#JS3QRJ&E)GaEH
zVnP}@V<VY%i{L37N#o=ncTg2Ij(YS0o#7169AY*o-qJSNWKFhco5!EN{{XA+(F;mq
zE7))14n*d63p7OTA&~8|@cva`0UP)~tCv^laCIoB6C9vU=<%b<WQaK*^qHI88n?H?
zlD^-LqID&#E;|zsd7HZ~$nr+OY!3VPS^ciy4}g5~{+$N$uUgb`H8RE$US%V7krcPL
zak=*SZ;i(5@xMT-*0LiaY^=f7C3w#n8)S`}f6}Ae=iA_o`klb#E#S3H9E_Hk@fj)B
zwTNRc`AsPiyl%UlmvG19Vi)J9A6sb()^tF#v~sIbn#>Bw9b$EcLmH}Yvp4A@{{YYM
zj^Q&BWAXN}eZtIz<5W}PVX=M(Z-yuR{hbQ3t#TaDwro2jy~hl!Nl?diM`rL&?Xdox
z7_>L26_TxZY|inKY$I}hN6F$-=XGxh^ZR^`k2!4p3Gq@G+)7D3!b*-ZapM_MPm!_y
z{Q`9^77NgS5DLGOyvobtw#&IZSp1Mj$oq8@qAx_zON*0{jf~^-5At#jhuOa*-HH8M
z9lC_od6+5WDpE+1Woru0WbMj$nl1>#aqM8ORG%Z-2HW*nw~Dc5l#T9A^n{sM7T5sb
zm0y_~h+nzrJ3fcZjIE0>O#;CgX*U8$xeh6^Fgu50IUlkA0Dhr0ZB|@Fa=9`zXkc=Y
zktB?y0`39ue1W;}NA~DZ>A*(R1}MytMJ%$7h;q?5B#vRD@&P^%$DW(5KdAK%j<Z(N
zPu{MRaTBN`w*e~XEW*U`+xHuSH~l>~RI)hTZ>6l?g!iqba<f+|Lm_2(4}eD`JGS4r
zZkD`#YV>tAta3(RhDR)`zc8ha%IF*Pso(zqw^r-exaxN8hGtGgKNbPhj{%gh_cq~w
z`g+XKl3%Nrn?4|h{x>5dHYXD~bpAChNep)KQw2Ps({<%8vbZSMX8!<`fr5$TTfVr+
zWHY$Sl4MrD)pmH<lqmNAn?o9+C|FSxg+xFR6-ofB&<7uTAxivIt4dM>D~EPd%YBfa
z>$fw%kbI7v&qi{hm1-FsQ8d-?^Vpt_GMqLN46dliR4Mfo@%B(N<FJ)pSpNWs-{ahf
z%9QJJxhLefzr-zNW)8neZK+@)ktMNOTKZp0;-gVDg{h0I98#pL;K~uCvu<Wc-K<%d
zF~GLD`^^q1;$*{Qrj4bCDCzufHzX+CtiGb70Sci??MD*`T3EOuMrL`0)?~~70D~}C
zn!{0?U0p|ziRj0qa5hz}RnF&fjz%Ldn~RSN(UucSQk;QM)UGbdvMq|eRPSTuhVFeX
z=l=kXu0u17&U%xrY*v#gAE=tYL_AVc73!|v#yum^IU88&dS?Vb<uwMbp1q3|{+h7*
zc=$DjLiPJ{CAny2EX92;%GnmZHZqONa^&XIvgP+JoXy<K%lu1G<c95(%KlOxps5^P
zLPLVUX0?y;T#VPQ<S^80QHLpZHob9RfJoC<qXg0j;SzY6!6GnY9M#>x4UUua%S-Ah
zFc~JR6$<%fvjCc)H7Cf}sJ3leyLz<zB8G~jZ{68IvB0qWu!{FQ@8MlC2JS-x+`#H;
zY?kWT#?HxIuG^;4#+A~vVbP|9Ip&3w@Y%;T8<>3r(B7s&dYuhRj<mK0=FU~1nWM_A
z7V6Cfxw~9tdtJo1BxPWD+$X@~y>2_xtR7ntLJ4vycOa6a5ZS1|nHn@NJtr6I6b8%L
zqO`<?8^eTj<0`LAy=wDc#o5No1bGt-Dz#eN(jdt|5cUnZAqw@Z!6`_4tHxMa7D5jp
z(ue8>jn!AI-?xr>*70j*f8v^dH@&~1$VP^}r{@Dla|dU%RKt?2&kOryXri0s5qn8$
z?A34KT8vfJSZLllb(<`ZOQ|q%s)%E$K6+|xXETmZ$SY>Ye$vGWM(;(+KCd31{uwFR
z%GLfLttL1HZ{e~)pTu!ZT4J)o$yUew)qZ=o)KX&;hU6iv+b>%O!C>iWd|0E2lEhWf
zJ4y96Cz>fFpJs`cNxPB5D-~sSF~%2lGDXRQH6PR)dW#u|p7nYd+qGh@S~?b?j#h#|
zL6XI6Xo2B&R1M9HrZz3yf=JlxtA$k)+pm;JYQ<{9&eGRQGRkCzZGyFUo!ez!c;9tW
zII&IFlNU77%-m&?X%Ql22^29|ndJqGaN`)+w_n`s2lVRoVidJXUhRC9Z))xWrFA@a
z2*O{KjBoHhM;?JbqUnq#LYCGj+bRzzkJeb=vZDLm#~r)M!*lbuyW8i_T5kMd`dxyr
zjF%^7#To&AqBe#TG>vPy62`E`j&>4{-1t3cD><bm)A=h?)LXS6fvnp}cMg*>voiNM
z9k<)<`;tFyrD^(?avVGpLu-2RtdYwRjIoHS{%KWxx7ob^0A}bnmdWKZd9z{Xj@3)x
zHpmQY#eL&w`E4fqdv^UrcKGXsoB{29Vk$7>$ww7Nc$|V1mH6B%1O8SW82vrUQp0wv
z*9I8o9Bd})rsNJvyRbeMc%PFG>a4td-4HcCN(*_KP*;%yeI!yNMEnwFJC&L;hi+m&
zJC7rexq64s>nnJ?VX1O({f+@AGO$L7mfMVfCNiL%hn`#gx)amM{eD6`j}&*2_`m59
z$fda>A93b3+;6z^(J3+USe83wQ<K9aOwsnh+je2df<3!^{x)7cV_J(J6GsHBn@JkZ
zg%p4{BbX5%JWuRL!TxTnI%hKm0#*ieNuil+)SPTP8>nFcKg=ZHzyr7(O7vLH`iD<c
z;vr>7#rW6rV)9^S3&b=;x%{?YJAH|6lbuVIY(%%^i<DE_vO*}`oCaXt6@VdB@&|9!
z3jY9?t3Hg^IlOFZJdEPR@-2Ddjk$-9aCdRT>^z3>e1E5+g^hDQNu6Hwl1m?Lj3BP@
zFdU2XQ|*1275(??A33OvoLfs0#N)v*x+IX2i@VDili`^8-)+Nx;pj?r1kzntRgyF^
zb8ZpwP7S&gUIW@eAEbQvE7e;Yr?V9l-K`w<Rx>MtzoZa(agkecSg~L?>nkBWh$p3z
zxmsvyix5pLYU<KQEN6BS1yBNkzk)tLS58H}dw|So+SvHaZ4I1y#2keWyp<V*oDh(b
zZ?@ikde>%Rytw?Gq^lg3r8b^e)z$f+Mj!@3zQbl4^@BA@auC3k7FZ?mAzmQH7k#Eg
z4i6oleYf-W>jpg~YO+1(r|h!s(!#vBG^)ggkOR2nHyi!>AYk<dP3Q{p5X!Pa69c;z
zJVw$23D|M}0Db=ew^n?u)`uT#s}~|N#GXfO;~{t4`@Ek&+s@rDI!94tYwGLRkr|EM
zVwwqeZqo7%-cUjI@@52W@xS!yTU$ktrID?E%w@qX8n_-Kbs|lU;BE^Kx!=e4=+&g-
zuc&afj7S~efU{PR`3}kyx)4s_6XSoA{{U{PF+nohyN<DnXlAQm$fYJN-cbCSP9=8#
z0B_m46nc*pRgb&ciYA!H8p0ST<Tvi01Hl`8{(1Bn^x6n9deT__t{B?f;!4p-=<4i)
zxhy~vxR9pge&J8Im&5g+#$mGhWpj0*tSv=QN05VMa5qu8K%XnO%YP|6daL?(pRJ~D
zu0G4j6&qVe3p4(v1%ko5_~YE=#lP5Z)stW8OBOS@-9wF1kj-5ol4!@}`;O#FLV&Ni
z-2VVm=k3=hXvZyk6>^a@kt9Y{tO1TogNE@{OA6b?6A*jyFS<Cl;qNGV<D%lWGd3?~
z13lhQFFMgIbyk!rA+1K0WZ5fHf)ElYS7b!lhV0H*x0}I7a@-NDdsE1XJNrJ$2}SMk
zDoEsZ{@(7g@ScoSvn*FK5k#2F?KgWS=)dxizzbcRazZ|(A&G=xc?Pi2hZ{2xPpPA3
ziy;+=B#%$yYr$I`g@!&e{{RWYRuVQp3k!2|EW^Dlwj6S4?pvN`b~5yMdKNI6e;Ho9
z8LO9a^yy|Ym#oJlhO1ep7zz>6634o#G`Ho6_^_V!Z<U35#>FjsbXhA|i6@d6>YEv|
z_e_@V3CdQbrmD>l-nFaOriLps{*Kd43@WV(kz>VLxqj~mvYq_ldK!8umLXoIo6l;x
zQ04Wmb|_2iveVKtW9U$mn=KgT?D9o~4zP8YsU2CdxS{+xWtKI9v}L1`rWYP8MW->)
zdqYC272Ee^<f2Eto|<6XRW=DO0`)eJ=~h%wWN9Q(vmKDf8pgi>7d|!~2O{3yiP&sU
z{JhR#Q)*a;)<C?#vn)Yu!(K_r+*R(y37c)1mvnAj-k8r!ujC_-1&q40SrqOm%1=54
zl6Z0r8f8deqh*QEENZ&}+~dcKG3#L-nPpA=Uq*RgntXVrYab&>i0UfTbQL8qvJ{1i
z%iB}A9AW<ew#D>L2L#oz($p10$0NgTMo5W{K^x0PIhBF>S(TLv$PUfR+z!X42TZOg
z!z#ujjU*K1M*T8FJo`m2%I@=hRd{4m=aVaP*?w-~THIt!luO?Ofo>8-WbiU4v<?N+
zXHW*s$o+?itGa%L+2Fq|bbG7K4eko;{iQM2YtTNz%A`cAh9{D4t-i`dJ#@YsA7Xpy
zN~Zq+7V)-~XN6^zwu`qTdyI08nkHknA8WU@d0H^%7iI;sjgAG7yj=LI3lyahB0`ea
zo8J?ZRv<TOV65IG-_CvcE3Yekx!I}$UV4(WV8gL_LNS?Pl~@1>edAs(t;M!I0JVOj
zxVZgCJ|ivk)Kh{_r>+U0wT!az2@@|$tZ*#x)me7$!6a=8G2H%4f%*i~XwlNb-|Yu+
zEveRJUD?8iuN;7QUgLWd4;{`Ymq%?McEmAPk^yP!vlM9x7$Hyw&D^tP`JZtcaT^dD
z_Z<=JUpGqQnql;$cNr?iZFd6r-KLkxUppv1I;dxK)hh_@{{UHCf}!tC6mvavUm$jy
zbOpB94?aELJ!^0AP3Z*;wl7d*YFd{dUJ1UXEm`hGVs${s?!0!5cj6AfaXvcp!RmcF
z^|SYM`KwG>c9kWmP0-hg6#L#r9_5o@Jbdmq=^*t}NM`h<JGopehHQK>%Vym(C@j9!
z<~wI#%-#==^8Wyv$4mAf)R-;Xs}xA^Ad`55yKM^)1Ea6VZbcb*{0;vA&9_BG*LeNp
zip0A&XWZL_i5Ttf-*dY2J23veM9r#h*@Ven!`F&Nb?!58Rd(W_l?UT=@HX?fZiybB
zYAaH01=$*C<1IBOmPPuA(Fkc_NY9oC+p_rc8-GrweNMmpLlE~UayKAJUPx3%Ie@HO
zY~LKCAdf#jIt}RUX$6x_EJ)GJtkLj!0OSKM;fWg+<-~$Kjwhr3KT`cTcwk{Ha;OU<
z0VxXMOGZKciElpu4zN<bTPZb*)ulubHwFex#EPoyoJ()TN4I}F?mx@aRm)c0V2V2`
zWHM}H-)3}jg%iR50H$Q{8~UC<tD@dFPUTYy)*3Nd*;x5eEAJ}X@Q@)O@A3Qp0GRX{
z)|lxMd8|pxl{tu-BD?J3J<66p3Guh?Q?3|{mORpe%yt2j+KdAzBya35{Gj`r!SlCN
zNi|kthOf#;j<C%H^AwCO%$u-PMI-FHhTngYw%uJV=dvRhC9qye0?4YWc_|COB2+Hh
z2i$o+LH_+!DQYhLsG+eNFLGsA8-OT{<f^gp^W=hjuHWV9g7mXj(9Ys%28JfH`H<dH
zN4j7<NEiC-r*q?W{kq|*v6b#mJe1dh3k7GZ$A4oU*p*KE6(i*Q4u#7@JEXM@Gh)@d
zdS@c0IFpvu=~xH398AOXsp5S6dE@(aVXZb1t;ui*W64vKw`nA0ReBazlrj*<bL6t`
z=VSi>i?7)<#>1F~<$9^vXdxn4kwh{xujU>90GD#xef+N7Lh35=$69GIym;DPF0m-&
zSva57<aa*j_xpSvhjHYir*fxuSn76KNG)*$Dciti3BS(!Zg(rc{m69e($sZwHI<fB
zl1|iC6cU}1zTV*Z99w<J_~Y*1p&d+4KM$?n>c8p3FykiUlOc<4M}3H1Kzw}lY4s~9
zim`!LMnrKuZ4|7#?iiG1+<~#*hTDGM<?1KVtwQuIOIGcMR9OLun3ZONQM^Hwzv{1d
zZJX`OZTINgmxc>jiW0yhR>-V~W$+c6K*0e!{iF&%e?0>|LG<$_k)<TnmvyOmq>zt1
zyb$b#zI^TA>`DIJE!KTC$yLku!aP>tc(2f~JaL{j1}2S&1Q1mJ075q(&C|7~>&$fi
zIL*d6q=cJozc{|BKoi0S-fOc4@OMxT--`55(>{tmzr)9G9~5#T?|zw55@@Wz`%PPI
z_A0KdK71dao|tTYjEbg>$xz5w`i!0`r14H2IXXI^k}190{{SHeb^caA<>@_a%*&`D
zp=P&kYON>ILffo&D>RPNNV9M53|r%Kw;h1nqC|^N7SEFo9+dI0bqJx=nl&zja`v|p
z!}dPjq_CQ&9W>Re?zXGNwA@bJ(2O~i;-};)`*!=U9lBy`<Y?Kz2q{ZmYS;I%#h8Sz
zI>tnkg#>VC2)_Vs(Ve~9^^vA2jD^Z^PmPsXtwUStUKwQ;p|LtcELBlYaM)<%K0E++
zV%-_Fp+`%(2FERvES42#v1B=DAd*NasfaGn%^hToeaF}ka`yqyMjZ6=;^&WuTJ0<V
zNR{N4MxF@guUaOWB-(XPs<6s|xFnJpKVHnF+%`dQmvKhcJ)LC2>K#vYB|_Hh#cBwv
z;&E`R@s5;HtpsK~ecjJ@>E+i|RllCh*s*1vdofbTWh`Bgcezdzw+h1>0o+i|%C6jb
zv??M27p~~BRw=^PRfVg9YWRwki4w;qcICJ#)uI`I??+z0ap&9SM*UlCIy&ji7idj9
z7I@Dh2aO-L5ci?9hWw4NApzmRLHQnkZi@P6AN)SVz(A2ev8|Icg1n-V+d(Ud9C+e;
zN001x_+7eVbQYw%Z%<uo$6CDdeJv^Y6-cjAk$W)Nc){!}sIneIeamgho2MH=dW0b`
ziR#$yOJ1p#Cs1BmvJ(`dKpTrs6rw>JZdv11*zyOX_CH&FCPuO(61v*TC3~p5agBI=
zMJrM{{{X8S6@8^$my;AcUw*H4wS~2_NY#ql(8x=6zt*g89Q>iT960*YD~;i`9Dzp9
z;o=Uwy~)+a9Y=mhCWS3gS+{1bKMw3`Y7-k+f@s+CSrdK_=k&Vs<G(Jd6KZ*?UHZVy
zEvh>_7<+R!Z!J#Lk|s)(3rwa9mmo-HjKaJc7LFNx=v#6gm&(-4R-Sll*0ExC@4;>;
ztJSe)f`KJCaw`;@mWnxiI6HDjdeRjIdhgWIh_JUTy!N7nR#rJGOD8-$!4cz@#8D>m
zW7wiJvi98ym3bm~`JR`|-X7E!*HYo&d}dmNQDZX|AQD-jC3_Y722;(qmlI-~qA25I
z%C{7SoTt-lEQ<{~-enbR@#4=Lk@X2wD{PTO^QhnymHl4f$IEnN(~!Ki(v_Ob{JVTo
z&C=?d@9%M404~g1Wgb90dNb+e>?}anqfvtXPjr`zMt!{26+F2X3FXe<buE|Esd}X3
zPb{;ka-0uluO~pjx*_Ct@!%hiZ{MwLek^IjjKkfpnaHLm7Vudz&aE|Mu^Pwukr-<$
zGej3~OA)`{uPFZjh<!s_OX|D14LckUjj3u|a6M4&_xL$1v$W*9281$`^SgAdYhPFK
zTB{doja}n6>yz6gq{vApw&lO+`8#Y!$?0js=y+_;%7K+^S=pL0q2pFZ-H6<d+3~uK
z#{LIVm-L<5R$*DK5aEaFQm42WscBc>0!P61k@3+J7mJP>f?2rQ<K&_gW(7A3-HrDI
zfEa`E=Wm{?S)EYW>j^DSsHG)WO9{X#0V**75_jErDF<(!TXE{kjMmmQEk;GDvrgTS
zv|AGXd=(xJ$Clr}F01tPHbpW);f<0eX*-I8V;gxpaSOVUyApiw@zFsHnrgjU%Oh9_
z6{L~<g0pY2j$hq9q@Bl(xMclQ$KruXN#08_n45%v2*k)3N#}lCTmTP)vvr^8&ENQ+
zDNfweltCgQ%Mb*Qdaz=xv2D*C_t<PbyZGt8)f%f6LX~RISkGd-8@!%3Vt@nNVdN=a
zAo(Mpd~ZuO*-I@Llw6Wt>;&_c2VhKckWS8o7WaQmx&Z1eHIu_g_bL{#kD5>(H1JX~
z?tCKcu>^QK?YB|5lVUrtx6$)UUBpS(iz(cc{T#i_r+v30`M2@YQHRoX@8DrcAxWY!
z^R;1AnO1GT=_w#>5O0t^LF$~|o#X!ijY0G*uh^0&a0(TX=3Y!Vli~P}zrZ8+=oRU{
ze%6`EQdg0Zt%JKeasWuAkU{w3M}z+Who-+jIJJkXsBzM%y9pBZd8m$PWl$!8_9t-Q
zZ|BG>%cv}Ne-WrL)@a=Aq;rBfuH36j9?Vdv`DQzC+iwf~`d#%_2P88y%Z$j%^+7S0
zjhSUJnR#)3ByJaP{Q4{4>Vt=hH=SL=4pK<Kl|%=?8;(fB!+aGUKd)W?0L3d+h3n3W
zA|w|^Zf3lo?HsV~cPYQY-{a@1=1)lf01{D<q=oz7s=~@t%IrBY?keoNle&|*J!I%R
zbnCRZs}R_yd;?M8u%<f^%Nr2Q+j0qrk+36>*pG8`E%hDjV{cEEYOG@t7<;@y?a31l
zJc;)B8;$&QDAby&&5B?$M(HAN0FPt0B@B!d1Gk3x{=eU=&!*K^D_*INNn^3v<rKcw
zjfopH9Ebk^41u!$0F{@k!)wd9>y3_@IMQr9;hrZuHxS^j7GizAcL#r-jZo)k<n;8{
zEEw7e)dhqcH;uRwd=GarcPL2PVba^4%T)e2(FsL}$qJqVP78P`BOyEp<>POjtXgY1
zet1SSvrS$W42X9jk040o;1*y%uE(dR@aL{h4s6z%w^YYk%O+<LAsnNP%xeZx1t6Xg
zl`zUd+h$k!>cOwQVa92FX0zl>RWX@bLbgU7{gJ2!y4w+j1<(?#cJft^yQKG6W@_iJ
zAK<b?w<_d2khEa8DCK7D$q`-09lxOQ)pt1hifnAOtH|T(V_q@%0c9Q(?oa9r8*p~;
z%hoQ3!{_OkGB&1~O}mgzsyMt3q!ZzZ<eXb@JRjTVrwh{jHM^JLR(EQQOSC0rlt(1n
z@+c4$W>QH5ZI2`V?cSQN7CQYFsR5~wTa}}E?@b(Sz*??0m$FaNZr)NT)H8j``0C53
zDo1WPv9aW=+nPCNZ>3CaX<+Y3G^}GbBx3}#ljn~X05RpwhW!gy1!}Jpk`K?3Ocj--
zjbx01OHULE+#!KNorpw6KcF725#Xy-j{!X4_sC%GW2L`+%z<jHo6$ehz{Gt--PhVR
z;#9LQwE61Oj?=Q^sHCt{u@CV%;S<}+Fs?%IRgR=fw<zLBz49>tuWw+9vkt9VZ%NIm
zwH^*UXmO9i-WO}#o)Zmsy*$q(*I|N046y^{8UFw^-H`0OSEotz9JK7>vGi3M)aF}u
z-9nkfHu;5;P^9ebAd!au00C7;xOIKhK9?<BAC_;YTK3+RJY;8&wk(9CSX4=ya?1V5
zH~wwE+oI2<9+k&EQAS7#PGf?;#a$%lEFH?Kq<J7%MdIHoz6)_ZFj!wt;jY(CMzcg|
z(jQ4Y5g8$#00j1<Rfqy*j%a3HByJIlCr3KkYa1OreZ>iB9#5`<#b;J%VV<`nXqawf
zqRlF9e@g!VHpiD!8CWb(qa@23_TYxZ(<84TcAbGcC74t*axvLe9MgAJ2&H-&=QW-J
zIu?@EjiB1R_UzVnYf*h#D32vN0*y38G%wwY+y?06dz4l*{$EhxGZA91!Hv{(W?5&2
zF?nd4Ef(&OOPI!0y8X#ziWw+cYP&Ry^%+&m5Ez@KGIVs6%M>Ay{T<pc<|IXgTxl?m
zG{@?&vbP7nMRL>Fm4FE)AL35}#TR5F6vj@bMz%jACFg=mHSAHhZYt<#eJ!|y)S;G#
zknC5UBP6iPx|kiEQcl~H=tXBtVzN-pO|o;rbt0B5R%WrWURkQeYqNh&>J%;`8%(g8
z?&|2_Lm3bh>al-I-o^&EVJ*AT-U*U3&F3)!3rWh$4{H?W0AN7=-DF}xdvs<py;I~X
zGnf$+o!wRlT<x<j$pMG*@6pczEPwT?rTdD~lGgiNB9!+yIZpDIVo2QWw%!|Vh?BNT
zSayl)(&h<iCLzp4fCR50?{V@7+t0_2mY-U*S<T{a$CcCY>Q$a8667^w?>}#kC1)&o
zcNu-#C@R0r*OEW+0Q8GM>5T2`c)C=ymO=|QWsaPJ^_ctoxB5xrVpcuRjzH``)7GQ(
z-jICWR?j0Gw#>l8COK7vfe3%n!1otmOK-UxdMR7dx`$6fw<%yIy+Dq0O-_`OO$hFT
z&FiTw(UIVlAFJeaJj~rWX+EbOy~n5GU&|tGqLsa}1OEU;mPO*guPuW6cIvTfQo~NH
z(?Bddkrr!JLMt>*t+cJ-f#NsZe&_!HFy5%mR<y#vvQ=hi<Lt~`8+T$6z$0(eJdgff
zt-3oi1{&d8F7cE?-Qo}B%#jj=sN0gTDi7=%_vr1VHO*W0n#{1FDoZ>d?8QOZlyN(m
zII<D_KT+!Ws_4_r;bn@Ah{{PHads-kCSkuLzQi#1Zan?>{$7-q!&tMZmjN|`!BoF>
zw^^LG5$AE?O0ga`*#7|YblYiMEnO>(ORzFJ#Cv;AJ>j_V3ch{>jG*pzA3Y4UUZu%c
zy%cRVrddR<4)z;}VCuUDU!9R#Z`}U?ZiwETA+i@S9M&ycgvDBHYwo*-^WBj{cvnVd
zjPJ6tGW1K-xcZc$tvnUlSIBn-nxU3NiBVQZb|jLVPr=*d9)S5=TzDL~w7mX>WJ@Ll
z-?pKcyGOUk46HZ%Z|>;TfY-Gx<Cc^RV`({rMtzxqE5~<H`nKW;KYp|5M`nA%&2Hpv
zv&+GGJd!saCuNi_>^J~ac{}*qrFU0Qk4d}?@+++M)T9uCt{a!g3^zXG_Vd5=_@2|g
zrxA#JTgpPnIH_(Wc@-cd0Czk5ay?b@S!?*{=D$Ikd3dzZ?#4FYcTOq};k@xXe}C?J
z^gl*q{Z+-!X2LVul$m0dFJiC^!PDHncH#l}Jd^tMP1i}A(weHy0?aVdt4hhO^<|kP
zhRY5ySRm{Tb{iM|{J&SpI~$@R)p)oq&n#m#QaHoGunA^18#r}U_eMble0)CrN#So%
z(U&XM&cxDS<*>D_N>`D_KX9*b6Spehw#<K*rax9_Z%%a<mya!uH0|W+!ySq_40MqT
z(3DkSyF_=f117|)e&0PLexP9~-Jz3>XBJhXni3ywYavG5c8`?<ZNGIM@1B(XGwJ2~
zO$7D?ixg8$7iLh*dq_#-T>iD(_$Oima_cINtBXTwfr6T46>=9IY6Y00*09?o)t_;{
z(>oO^Nbupd`SN1V`gxhvYIRO6)*H2}L@ZICes)qIdx(R7EH)c{`#0~<>MXXP$9+sy
z&tYsrkWB;b0x}K<a6u%Gki*a0?Y~t?Y33?iYu6+}8DDvSbd06JMmvGC1YbKJJbbS%
zm`x3133Q~&QREQ;{#y`EyL*wpnGYuLM*i)(Wi&2Yo)-N{tVoennr_UJNZbupav?3u
zfACs47nmPBNA1<C*DpnKy3<O_s__o5e+aKUaBfQEDz?mfPq&sUu}`t-2h;k6F*zo!
zW-|=pPo#SC21l=EC-$U)yoopV{f|uQWA5g#E0KvK@5bU*jY00ltK>|l%D(<pl==Sv
zFH^W~$)w;w(XGn-qy>X4c6boO!1nLIfWUsQcU{^C)HTEA#apt=P1ZorIF4nORYoep
zIiqD8?noo|W&QepJwD0da?MWT*RgzyRe8&jzbaLzjdBwjAE!=MCOfvt;Otv+y)dhp
z)A;JM!YI_HpcyaOADdQ>dPYcTC1|Uuj@-K`72{{KJ2&S{k%EmEM+u4<0rd36%(aO)
zTBy)SEu`oSp=?JpGEj;N+)J5WIMJkS&mb46$^ActuZ-?e#cYj;YvpcAreublYL(}L
zb){0vY*GY0wC<oqcA0|mA2Rx52PU@E7BZO+7#%%TYe!xvS`@utlp?)kU_Xc8qvauT
z!@W0e$O`MzWu^T#p_ZVxXoxD~>vGA2M(s^<%zGp%Bno)bS7juV?_~l#{I^GcPw>&r
zcO#jSHTs-=YWUdVXg@XDSYuS}a|d`<LG3<Uc5?p!Jr!?bt6$Q2s+CniOV!q+%BwGD
zD^|pAjKKbqQ7%S+e=3i+&A&_mhn-t#YRy9Oq=h5zv=!b2u>}3G=4jJ|n~x)lGXDUV
zrV|C8t7MpNM@mGu9Llo5R$9|ak}*d~b_2H2gmR>YR(>t_>c6gOVHXWUFNvu;rE1nz
zdbq0+!(s|CJKeJBS_x64j39ndsO}DFw@bD|)jKAc`dhZCy@YvSYg0`W)GXa{qqQB+
zWHC!%g>`RlCHEq+<<u@lEai;eN^HGZXT6QG3oLc`$yRu4R-<C&IMb5JkC?*CJVZL~
z#zF03db8>C-KQm0T5{5Q-%lMN624l6yH=26vM@=zb1+Jhad_PW#SuxB)8M6yg5_B1
zT4awkYLiE2B?URFHOdPlQ>?j+C3%UGwb|vT3SFueNW1Ed&xENd{fN(1k0)+=cju#0
z!Gfl95Gp$3ttL(<F!8NAw7AQXG+cyh%A?B@qafy`?l_!P8QRWDwF*fpR7DX}$y%Qo
zZ5BnVPUL)qiF%ZkR!D3KSG_`Fu*{qlB<mwTk?chH>Tp-FiJBC*S{R{8CaTdZ100bk
z{EG^Js|ekpU=*j$$~47gXk;gl{SBIP#O4N_t1NK`+>Q^p-S_?W{{1joVjHkiic||~
zCP5QJ68@xS19jYhiU9Mr&;D+RQ%O8kM3ciDX%@$@jm^LZa7=|*MmOX8o%*S2c?L%n
zWnsBqorz04EYZ5$-F7Pnba+V0jgI~ZJ$Z=#02Lfw8FBdwQrfQBStGJ8Q?p;R;fQZA
z#DFxB`F)%2dh=DFb=-6>*!~p#n>dT+MXxwyOBL2t5(u5V#lGyn=`0H?b?A@qO{cvh
z_0J1NottzP#rQi?ncg_wt(W#ZPC*Y|b(DoW@4wH+&Cq}G@%TtzM{moh=;LwssLE8F
z<|3R`#P$60{w4J4q11R;2?U*lp}soO4y@CZ=gCcWfRwi-OEr-9V<?D8ktFf?s&-Hg
z`|a-Y)dw-@@V!xFt`%G{o*1Vs7S3C77<L}iCy*a-eZ6+zwC5b9Om1OV_t6rxvkj2C
zd5#B}cH58+0Vl~ot>>fVUrg~3<LAiZM{4bLlbvYhWou<z0M7wDLvd9f(Ej79P99vF
z+M68hVj{9gvDcnP??%oxW#i;<J~?^+08fp&yI1t~7?Ja~efQB&Fq>e=cjiKF+kgq!
z>^9u>U%j8THMGJ-Fp*wmM~z9?qaSLsZL%wTe19h{rEcgvyws)@qF($w&0`}s2^8$i
zIqm-dSP`%v4Zl@wEq7CC`|`xgJ}$u~wZUVWH1%a-c}82uC6{+rUydJPzgMXBZJBbM
zZT)?hHHxIgQ?`ZMc9D@!o<MmXJh!{7?CwsyRD`x7X=9k4;9fk`FY7WEKEbz=Hai}M
z&|Fw<*QZKZBS|)v8Dnw9cV0#{QL#Idu_xo>Z`qm9T8P(*A)7lV1r@S~aAROejynuD
z+1Y^D{2l!H>1$c*dW(Xon6tY`gO1Eg1!db{OhjzE4agq^dHZxB&pmBFZq;~#3G0=T
zIbmYFgrB8ZejSIOpFZxtQ`Yd$4`U)@c&#A_{{RaSKo1e*C}72Z`g(N#00thm;&55q
zu9SJAb&r-$8zU77D=!qt(nGi#y9AMc)wuigK>SX&o6YJPy1N{aM;+*$Z&e&fX@~3F
zdH5T6`~Lv9O5TX|g55l#<Drsk6H~C=n4{WPxkvySxeUaU7sx(#L;jy8&r>hG?H?{u
zy+UBQ8oj>9m!l%_PEthw0Is`o_aC<Y)90bbrTHxOI;EPjUY$=6_{Ne(WCchI1#(Xw
zK6X_*csqQ2HXe)m2buMkFD6GJy(Es<hAeD~(aDu(*mvGdwn)T<hYyq0d+{^r!}<e$
zTvhCItJ_Y+E06%yq>#qVBBbfK?<`=F8)w}8$6CJB_!`=SHn*-yURvt%R<k34Wm%Wr
zlBVC7tH0`9$lsF=ncq%5RXCha(&d>g+NV}LU5gT;aj{Wi?NhQ#5KNu1+mM~fF{%06
ztXnwzMn>g}Fi8@lR|UARA!d`5p?CAJW;?FlPTO@!$Z6YIDp#Hwa$FTl2Qi;}1}(7d
z?oP@{8~($ntr=e@hspaHOgzHQyty6#?b=jhw>2I%-+%k`#Wd1g!p&N_DPl4uuI?lu
zj>-Y^;BCIgfA9Ua3Fv;SlT`X%m~5)AJ9ud##@m->acTu<>|?EbY{_CK5wY8G$KCpR
z*Ke%;m`#_rV>f_}V9(?3G<RzNu?vC;c2#ntXsr#sukhTq-|yCbhS!t6CR{~~bd~aO
zz|}9+lW47mDr4>lTzeI^AHS6(d2Wk3QnKU)T2KjWDFr#WSmjB9E5c<9<OV+=>_Ps$
z7c6O<W)1{vnA)~3h|JZJz&*ZX@_85)^CyADb^sqNcIl<``6Z7Dh8WyND#k@&jpng-
zc&AKqi94}D1$ksdp4YS)Uyl^#sK2D1innzo{VPOvW5-(5wyemQyC1_aw*LTvy}*0<
zdQr(RYq?kC(N#QOwt311%A3gPjI?Pqm>4kmTFs6{vzN?f?N&@|mJ0%Aw^|zD-IZ)e
z9EoJ}GkcUIe41LVYhIvcF}i<E=A?SfB-eA9sR5g6vfGX6SEn<3CX#8sqD~R0ND?x|
z9f&H(=JcQNOa*NZrWS7IVx^DbSEJ^ZQEgg|HX?37y}L1_gcJOVs+$iz9epCDILk?-
z=*t|HqO(Q=D03`+dk~+@ZI;5q6R{eu$gc}+zWb9E+ZFJ-w?S$8@AUrw#ZIEGvcj7~
zD~M7k>s*j{O9sVhqecXF-;<srVsj0Qq*6(z<*-%^z9d*OLl_RZY=0*D$axS}VH$g*
z+-%Fs%CmLz5w9~>b8}pr1)7z`sd^gg&8<UTBVNc&wrFiHC6sPA_Yc&(x+-9^Uqw$V
zPQ39>mE|+Sv7Mey>WhhGP?g$Ot1M*g#1k6;(m*Z0t9%|f*)3UfCylo(ahiAIup);2
zSsE{@#t9R<J(|G}b<ik^WMmSUr)Fhk8+1Bqw582kn$(fa1kWp&=2*hmmO0jY6}z)W
zW0}m=Qf1}tRE#=r$+>n@)RNYyu-b7xF2l>Ugb~SVm5JmhdoZhVc-fiHI9N97z7&nR
zkjGXX^rIY+y=zt{zmLn&vpdNkSV9Px#l}klIpna`D*pi0kGvfMu=kYlIP8KoY<;$c
z@8hhSRICdP9LZGBmCRLaMCfc&nHo7GD8AR&f$namu{3{;(7*NdEYZ(nY6=re2sQGz
zadT}XGOSi&SaLAKWJbjCR{3QKCm`3|F=`sPzu}oznhcZ)^+M#ZwHLPX?E+hpl%-bP
zS)(f%o(E$a0P>-3ptXKgX+b5JCYC#Ltclx!97k{+#{&KuHUxW!8<qpdmgrxn^Vt}u
z@6^i0dfYbEVQ;FhEVU4#cND;|2_2M@q?K>e$9|ekE0(=7K_*5_Y|T0V$pf`#1rHM>
zh`t1YfcV(3K2)EAV8~Zf62`MUPNjsfMAE<?W4LE4;kP@1zWrfkUdz#W>POt|#W9uy
zfeg&M?&iOJf9tQ3J_hHnKA+-2Qx~gnnR>L1L6m`IMj06EMUd}C8a5$>LE~UK5S~$j
zb>r*QtQJ>7dVyhVh@`J}Sp{ZD_O8%)a8N4yWHaykjruo!AHNW}&qei?H&kM+*<5$j
zM7oC$8_4Nyk8x4$S6JJ{xZ?5;%c}6-mma+|OQ>{r;=9o*RnDOPB}+GjM#kkU*ou2q
z>=+A06z)9p1Z<_gRPcU3Z5CYoFzRgsuQOUdQN<*brM+kPMx%}+-77qdy^A||=A&|>
z!EiYI{EWR<^h;UjXw8>|U&OOWd$9wwyvsBp__p<>Dy%^G2gmB!dIC)qOYq2MjbXJA
zh<z=BD`aAxHXa8qB0xwV1RwJG>hb8trg<}x;WJXY<$Hi)r@OmJcBni%X&Lg<#!Dga
z#gy;;dSW#<q<9>TO1?^|g01;k4Wn7OnMq@_N>}uf&E$=?<G)eb`xRdqrL9CeGuwg=
zUsjuUUO<h(jAVXnd^nHmxBbx^WxV~3PSt48Yj!dS%CZhoffBPZ?=q<E`Q&y7;FVvW
zB}`|l7W7rRnC)XqNNCS0z?P&|{{WU`C`_A<N47(G_~KW7ow~lCV0}l^QDz`&aW$HP
z);8j>+R{1zLzUzVWD<X4x5wM18z+Oaqx1PVY{+Dmg_zPd-<(qb3-7+b6lE$m`wjVi
zuOp31s8c6?+!dB(h{k15PQgh49xB6Ppq;iO;eI-a)0L-rp<yQSN6U$14AE_|Din|o
z`*|HRI%8~&WQeqqtgcdc6;Zj${uvZ79GI$d{{ZjOW%z%!YZZ`PHK=ZrUfj{tv?XJ;
zX_x_0(cElfl~s8#+h<naqhGGxjkS7>md41-dLiEua#e=Kc+jzeMJM%?<HY><U!R_l
z9GyI!t99tqhE<Ab3K*tG4hUV{;$>~uW9NS$a`^GlE7PqvlhZnf8#uQlGMff$W#pBE
zqwrqkdB-8iPtWw}tMvvf#hg|?<;yO$yJ6*S=FGf71)qr=0}?-{jxEp&PGJ6$*vV46
zX0}EfKUa^n%fA91Y~87&@=BeDk>hSk!_Q4VD_0#wjFOM=(OP-v_HV}4L(}hV%1#W+
zv!@^(JaXYK-5EV4(%5gNO7_ug_GU3mB=%UZ+y$!`f^WASwpRP`UvE~7U*f`F!(ObD
z8kN%RA&_*MNe{SU?W1y<9_az>#V;05zUlYn)pOLpU+8`yS_=@tkiRZJBV#9MOA}8F
z_5Pg3wo<fQpOON_yl=&u`*gWktE28%S)Ai}a%WZ@uha_jQLqevf0TfJI;v`IC5Fq(
z+`>p8lZwY8)R)<5m^RBG3_@()RF5E!cKh{SY!<1-W8MW>C5d>MB#&<Ri;-x@7C)LJ
zBVqyEkJHuDM&&aV!bxtMIp4K8i}lWfit;g#K1lKi3>f|Tmd-;`yj|!b<sG*(OD^O*
zT)sId*!xc(pFVtaO!Tu`X-Ht750bZ!igsP%mdkPv$b|&*2lB}~GM^jx_~^@|_5B?&
zK4Tja0gic2QyC?0d?T?csIwZBAcSUE<zKiVkKe2;mRA%qS2EWa#Y-~gQ978G6rX?w
z8P$fsaz8sA_8)GB5@Inq-9Z)VmZ7aQO!Y<NAyUEaeq44D2rj#Od2hOt_xbAo0FcJy
zJwLHmRMx0&V^IWoijYdq(Wr3IUWF6ODmgrt!Q#iFl^<C&p1P+TQ1)b=2;qj*O(z+m
zMIZIm)RI@u{{SG7?L8i}3{P$}WogDdSU*oH-6(GjxiZUeBNRZ2Oo11Q1N-!Cs`^%r
zP6VY4PpIvMPvW(N%@!K0XL9gawo6GYaqaF@NX)E?+_u<`^U`PaRfp5MYK*n9xlFl&
zq|@HW*=&RrSi@%KTH8(@c-w0dNU?-jANit1>7Vps?HxB2`#E2t#nMD*A+fxF#4<Rn
zEhMpb_(l=}Ub0N)txCY1<TlCou-~h$xb-_X4y(jrsZ}uKtXjsL%u%|-9x^*IwTW?Y
zyae{yOvv`+Qe@e_nF_Bip1(;S6Q*dketVD!618zF#)zp@#Hj{8t#CIBJl7?Yh*H~0
zBD1#Ow^MqOyz^etS*M0sZPUo=J1%0_Eh9aqOhQJ8Hr`FynE6r^e@?7fD-g#H4jKOd
zIq%Vq&1*cL^F)=b8+GF-H(LSX_AdVb^*3K7hU2@J7Y~@n>gq@8D(367Hex~(>MYYl
znk2IWjhi5~C3w4ww&g-Qbup98#|l!^c;4me-$^6hg1p6SEemduB<!T`VXMhtFom{c
zC5P$XmsLGoPHQ#|36jKjpr(`1wq{7}$1H6Lv0mDKWOKwqb0tZRMfZDtGN{b(OKQqY
zWu;{rI|T84reziRybgE?_UFye1HvYBKO?Iz@cTIeMOzY7cR^Oe0Jib+3DdZXDnBm6
z0mYQC-1fFt->a6H#mh$IwQ^`7<n=N(B&`u>C9_a~nOu27!nLrEz{pj%N8u>#x3tLx
zTJx+GSc5PYWUX!(ds3%<N0Yc0kot5o%O@TVQ5tuI016(zG})X@=rI{O+-X_uoECS3
z#Z=pqkV>n1MJBfAWa8?0A7rSj)#kQm>wDPDmNq-TPYj834Uv_gy;99YXr(x_JZ%<E
zqs0E^g_c0u7EOn=8L1=4i?!<EvUX{v^AQGKDCU7(89Q*?5^^Y`fE~ygxA`ihbuXfD
zHglIFY^@7)Z|}W%psQYWVy=HSrPy|PU?dHTF+5w`cj>0Br!n;@)<_N0BqlYt=0ap*
zcX;+-0olCle&2qr5yI7BNa?RQc#=%>o<T_@NUg+$Z;}bvc>DET&*p4mE42GGYwj@5
zEZyi>vWH;H#2FWTw_gB%ZMx9k_<`z8OJP=7>*?E7BwH3JH7fAMced@_WcrHxGO%&E
zN8FFo*Pi`pp>P`7t^4_E5mmJm(xVX(4+!(+J)3|}+<5WQSFCWD?Jty$7@&u@0!E$4
z`2?0BzZG@m?{dw*&+6NvXP}>pjQ8QgHfDz9t9l0Q&UuUs(-|SgiKYV0ym$n#i1#;W
z!1lK8pQ*=N&Bgk=>8GrEN{*|eW{yuAm!OU;86oen5r$ogjp4{wW>)d!r%K+s$_+;b
zLrLC}yp&4JF&PMXO42ADzSPjN?!{!`zn%9UI@iakewt*l_a=xW7UQn1H`Y}rnJfBv
zqaDW<9)5mXrQab!<})P>{{T{I$~TfLb@y2+SdVFvNXFxF%YBq>us<H{`eeO5)jE?Y
zkAJPQwiX!4lG2m^05A&fKybi~$OP^BcjNtfulj+V%d9ZSvbCD;${Q)&EjWZQ??_cg
zo+EHSpz8VfbI|x}vg#a-)JestR8wwT`%4mXP<b&s?puC4apK&*{R6)daA8|t>5Vmh
zn&W6g1d=`8A!^&Rk+RQOWU<7ok@n+J;QLFi{?+K8qxBANM&U7doE_Ac7gEGx82Pgr
zwdOAJz^nR1Ct%1Agf8CgiuKj%PE<>UzmYE+T(Z6XYZ2mDI{Q_C-+>#AfZQKyj)Gc8
z3riwwLz0<|*+NSNi1;#<efUI!Z!z&JxASe*9hHkt-F^?=j7(zZ%iJU1-62vp1UAQG
z@&5paqlSmpSnAdUmZpp|nCpQa#SvF|1iOF$laC?}-V5}v;PAII;;y8|rB=wp#U8DA
z)nd$3nSnL7`B?~3QNC5<<Udg9zFT#2x-<B0ZrPnhG^ifS@wW`hL~%xYpY<e7)foJc
zzds!cbtj~ncCCCYiDD@7Q8YE{S&at^0#-hEByk_QpYs0zZmOF5MAg!GN_oibD~gt=
z{bX`%iq<CG2#JW;jr{(@{@s41or_)R*2h$pR^cW!l5M?;PQ)<t%niTq(#=vmbD*^?
zx;aR)OHF2#kT5R~Xuq|VX#r45pB(<p_UVw)8tWA%Dzt5mn7Y_5k|;%yWggiKhkf}1
zNB0N(y=CHkNy*@~loOL9;%`s%w&H<VL{8_qN=pG)79pcWJAeDmp#K07_2ktwF`Aa3
zV)9p7)2r^=gdD&nzex@o!8`23f4@iH@c#hU-kivDIK4+DTxO`nuhdhL<ej*(a3XNC
z0{0tbw;5i1N9$H@u|F3YM@Qtf{cPrlhLvNVrdus03Dz1mW{eJ2R-zs;-5i3r17{pb
z9J*6|ERQ3D)^=*-EX_4_v`rSWBuEhf*tD?7es|ws53rv<J#0$DX>5%+q>bteZB-+)
z8<^NkeEzWi00U;-TW|O4uM4K=*#?bT4QR9uyr5{Be%kzrP&ejCtK~@Dh#ua89=YfY
z?uo>l+;Ca7fwCkL+EtLf7Lk0%VLn2im3_WW#Bb7Gv)YE#d^s(IkWE#GCPj(%GDPY&
z+CmDQhmEEG08dSRiOYWx#X|(*TO7hiXzW876a(7i2Fl8I{{ZwSZ0%8E_B$10^I5Us
z%6@M##DV5@EX0yOZ@>5GAB4$k`Zexd##NIcN-GUwvU1=Gjrj0Te6dwixj)Q)I&r-Z
z^+h!<CnHN;K|O0R#w&{w2mFakvZYq;w&9dFwnN~JgLJ!QBacq{w>)}o&tjHFf2!?O
zLE1^x_i0RDrCp1Aqwo(dOV--`3)Gn1QKJ5ZG7+VavBQ3q$p~z1=-lzK8rO^#qQ{Fz
z8!-&+u?LS=eK%$tef#re^CCo{-HkDoD#%Rgp|+nXaZ`I2NH4~?l~o!<QUd#h#A+Qu
z9tPG=QChE}s_Gu*D^JvhzCNB-d>Ro1m9ZGQs4Uq|$?eNO2&xv8s?A~Bk6AT4nr7yj
znw@>8n!UN|#eP!v^0QY%Zr%H??4p(~lC^$qx<n>@nYWADQ1t5ceNRsfs2WJ*Yw}#b
z4q|Ctr@f7aNnh%*u}{VAeO5suRkBnp;eRh^$3CCUIRLK>TtXBn7B?SK%~g;7T8>$H
z+=f;}AonZ1o>Ow&o@s~aL~e|joTEH&FL5os9OfRhas%SyLL#aeHXh}5uQ!qa;;MgF
zE~>h+)r@vximb+LY!~apyUGwyQ6ZMKm=GP}LX&Ra18Ceg$5A?{GFF-xrG;V2-L)`B
zZR<VMqP^)VSU>(nH`Kxk1p!&6JeQADVV0~V;|!u}(bKvz+G<{DE?SLvppN~3v24O_
z-Kx?>JS2eI?w$_yd@f2`(>^kdrL|%^P+G2JA#xStWHP}j-H8X)@pz|##6=Oyz_Txs
z>(d*j><bXoqZ>0#URP=B+5icob>3JlNgn1ok=Q&g0Y(xPW%rTMLr<(!Wn${@6p^4>
z^7HKnJ|#%zjO^uou8F?PJTJSeggZ}S2&&qZYfZu8d1M@^SXE_+!t4~uQP+|%KHwYt
z-Crt5f*TQAtf^wLPpYF@Sxs!TWPV;-6$Ff^v%&}b%qqz3>~!buRlZ{K1u^l*l=@7S
zJ5KT9#oLV7%$O?W{)*J<e}d)}JTx;$BxElhTXhJzOm&H6!{l-K7-OwFMPBWDa8ETC
zh_rFWD=ggFxq;$lh&YldBi^jd8*ov6uR50jlA4|AA=G+)SkhcnH&oAUs@dF|xSYK(
zl1mtqZc8%v5c?eRM&ADb=~kz=XxGD7lm1#;@yQi&8d<qpkF*t;gl`<YS7LWNf$&?V
zwGuKD*&_Bx;?9ygG;FHG?*2dn$H~}@_Un{}PHL#6#0SbqBV^ck;DtEK3v4|2-{Yw4
zg$lVkuSU#KFqsTuD+m=(u=6|&hy%_Tk{L(`tsnk5e-BdTFJvat*{hh_)f#&T6>?vw
zr#q)<BwFPFWLGDB{P`Y7+pj;`-$`Y(=6;qcMYQ(i$xB-yLQr_k8!r6nyZ-<pf#>hL
z`$tRv0OA|ae8cJ4^LYA`0US*<77`|BMQF(?N{yMnM&s@C)}R`fsCf*{yaM8H%aHoc
z%rzr~Af7PgL#jC0p;z)w`*|NdJKl;XsUDWYS`>S7(3zsKn8gV!8s63|Gt*xkxrA)P
z+qW(r<?C!(A2+6TUIb+<W+`Or+JzPd*<-O<$^g6^9hw_zLn|iZa=gEnppUA1R(pM$
zqV!#-Fx!DkG|{S*NbDk@FwTc$J8T(vzgq0|0*+TFEYd+MLZy!7w^kcB>SXc8L$Tk<
z8#v#iAEp>=u)@3)=2J356U`wk$V&iP7LC7A+rjzRFZAmZQNfdfTX9{h8&q^iperHW
zPQ-4Yjze}Wzn?0+ep{;#rW&sPhK5E&j!52EWqNb7N8PMEPgWAYp~n1_jkfM_>fx@d
z=}%PmMU7S|>nIB}U8kusZsIu=hqiT`?g%?J-ro(>Ee}ufSn5iUqR#0MMA_I)XNXlU
z6l^!xAw0J5e0**zQ&+Jk)njbL?J~$?bagDz5QLQjeYlVa`SQct^U#)_GSy7g+1BK>
zI}Wo$9}=jri5r-YpN8T_;1UNG{{T-|zM~}58jnl~q@L04m{=9}q1~J**fVl%yn)Dd
z)rr!X)vA{H-K^=`B}+RX`vJbdh2VeKj*MT2uS>I%)txU1p&aCw9Uq?UJCXAGm|0tH
zK=5uO!3@6MTlG=(evY8UdY6XAW3R^8Or*jblze1TmtsL8NY3A;W-7ptzDC_yHC#(`
z_(Uz4Xk9EM?<X!v6IbEKeaH*>gblp)NA;r>tFszq;Ii-xd~Xe=j;H|1+Cs?w{{Sky
zoPEFY=wInDxsA)z?b+@xVNIAhZS0d|cZh}Z>FwY59XZ)t9uF;Ar}0IF>&~G?nU#O)
zoq!V(4{WeL0rr;tJqYy=q&TWFNsz}&B~dhgRK2fsHn}RNIS4x|0tVY})NiBOC;l(i
zEr!d+sZYF8TWVFMtadF@cjSG`-c$gci7FRvnSD*^rXW1}FE;e#GXpJqF?fx;tg)=}
zC(7+19D(=)lKJUR^}q0`o6#C}O?jj3*@GvDl)h3r+`*AX@x1Z0dvVz}VBgLM_33x?
zkJJq1n}5S|k{<HABTFpuJdB&|-$LJh8~9(3_uHp3Y%T*Uj*dzY)xLoO$zgcMb{SZ1
z2$6Oy+m-&mzmA42cT!_WhNS-hQ1oRm!fXPQKm%?dlsD@N3E24re_o3^U)845mE)51
z%~v5&qq`LGj%Ab;?pB#sZN}qq_S@Uh(@_2)G>51<W$8?{_}r+~@6wtuV@qzPGU7G|
zyII3?ALVZ!ze$36Uq|?Y$rDLDW?<FlQ^-W6j}>n|=!iQ3{J;0;vrh>PMus}G7@xGo
zQ*IGSBx~guCywFU^&da&*G{aDCz;2sXz0t?`gV-X5bh#OssQGyH)h+~e0cHIU(*d`
zO9-u}HAE4`EqSF&g_Demk}>Vhz>ji*M*jeCI%p@-+7{-QmexAWG{0iCf+ei{=UYk$
z5zEBv(eA^6+w{8r+jOmIZ8c*fm6IgV8*<g2c#VeQ94u0M8PCEnAxJy#<Ij$mA5U_&
z^roq=mHL?y6mO)+)rq8vtg$&>7bXffFCQ>eF8dx{?w`sQV4kgQQ31x-u^EAYbT=eN
zts+jgA*IW;G@+Sac>z{tK+4=tQ1*2`Z&l>^VC+j@FukfbHC|B1JQ{u`cN5N&Nf?f8
zKub(jluotB7~zOIsYyziN;Wb#Wt!dTF{_V*ON{PHRGTA~p%XQHo<B|F?6(TgPdg+j
zZsGcLc_y<m*qE$r-Fj<@mY!Pfpq5HBk_=Tzl3TO2d`6w8S~%%psn>zV@!@JyhIzw3
z=H9U&bwkhC?CEP*39P*gY}aj9(!I(#EQ^(Frl{8$&gF{|HE?+(>}+?XkI1bG#}De}
zRHddMinMm^*qssR>L241Rfc4KbturDl0y`z0E#ne+On?4aJ<)!-CJ_lN?BZ8oRBfg
zF?-ffH~lBa*KY9Sgl@2_LUtZ~rI-BORP_cOE#V&@VOBV$wUVBsBn7+$xSigzZ^)Tq
zSQoSRak`%?<zulu?N*q~fv+7rraKQjn4GOvJgz-Sjz0cB*|y5{Vp{?xVpqKD#X^DA
z%L|OFcH3uran-vmI@TnMa1zXL%n;ZU4=HE6EFHjk2a05G-=n^fqemeNJ4*E}n53Ga
z9xO*Za}MNkDJ*0opBDHaru;*4=$nqbki#B7CebaHc5^5@Re4C_voAf2WT@mm;kWR=
zJxWV)rkzu;@WR`hl!Knlira$L@zxxMiu6CsgLW$371Svfdb!0Iq^=S<=BGP<3d`0)
z>3*+xeNI0iBxH!Ls_|%<l0HY8dr3y>#fQ{MZyzPKo{GG4I^D;Z;}tDC9?-IoN(rrR
zh+!gy8$2P@p>AD6YD}@Li)y7fa&;{!hReuWsywK7Xjq>#GC-AR<YU^CX4`*vLqqB<
zmPjehUR;hkCY*gPO0jErxuy3PSs-pyXZEk=pfOJw@<VtBpGb`i&C$1hJ1ZMJ7i_?t
zmu^WZ9FT7#`*sXT*dCi_`H58SuEYb&VZ{^zjpKPE`}q6y*3B4l?#!W24UC7h$v3yy
zcKO(D+kTV%I_t2~#|Q|q$thQ8VUd~^mP8xJ#$N-ux%eb*Ket-R{9j29a>G>3V6r$|
zgwU*Y^7UFs>`3dpP#A<HD{tqT@9tB}&t7poPKP0b^-D0hN$*`0CyH!!oRnoAv&S*+
z_JL8&317DV0HkzQ&1wxbpTtFnu+^U%m%^-w!9y4<Pst}i$G{6N;fWys0H>{QYTW^h
z(|V1s*RN1ZLU|ziii;C8m7{V|LqC!2Dhqw~{%){y`KnhbcI(L<YYc#QQ4{hCu-Y<0
z{*pcq&-uEp*TrS%)tcs+$j@DFFLLFGn8gIrg#^r~OCn6I@CTFb<-b(F{iEsuUetN0
z=9Y9aN8LPMVIMG#s*U=JpK))E{{W{!jOKpLoSgAUld4WRK`bD=EgEkpC2$A3kUkIj
zTcTg3nd`OZaI+5CM~}Q>X#%8b88`qv#3*Lm{BQ7jx@0q$eCBf<M#Y$Zrpv?;A8yjg
zHsnh0;HsDQfOa1L0P}Q2x9Rl^bw<cb#RXUzXx2g9kCzZ5l{k&l+)okq>37tc^2R$q
zLarJmJ@#{RwyeRTLJDrGK_E8eNc|`6(a%lmn-OCYEP_Dx9lLcOi^(uXP~C=fE#YKu
zY5S{rBacMfXQuhxVN!g>EkAbsd1Deilx4@cav7y+vNIKub(LIj_ZBPn>3j7T(~Rvf
z@J=!!`X3c1h*Q4=A&Uhtf!nzmMQzU|J15_&hA#||(zz9?k6X07AG!-l(+!UNw;;wy
zZaWe-KdY!_^%|5CHZoZw_czl`6R?f>4UBRrVYco2dE9yFrMInXX<QQ7!$8qpZZ?$=
z89(M@VefGs?5uY3e@~7_ZiN|27d=wSOOr^XNvr<=?}sX#vD;17NjdGaMmg+9&i)5l
z>h<4KYkyDuMvoaSfYqnQExM5;lO5rCik4VG;@n;)+^O6T{Kz^#*UOs24IVowQ<Upn
zYg=Sjk=a7=S$ASLC3x<?K7W6kW)*TZ>&=(O#$+`lB>gz!DkQ3{6pbF=n05nlPQ!n8
z-5Gr^^&($T4p&TNp^lzHHkOk_=)y}08t$_|T>E}dpOTyY-Er19%~hshaXsrsjzM0o
zI>!?vODZY<0G5?d?Hzze+vI(^3HoKObyl0!iJC|x?N#3!nLr*`tyh`9Dt~A5$12An
zw&a4lFV9U}`tv=V)lq5-b@n*=k<zysB(AenC%Pq!o%y>)LD(N4c=tD(W~23*&|12C
zB;w{bAzfiI%d-ox71|>zk?)yPiV}9+W8>1%ti375q73#D1+OKCmE$$QUf5ztj1b!g
z*_YaU5&YZrZ1k>Kv6%aVC?c9vnU!8$`G(wW9ErC5u;owh?)LSCo@DyV^K7;5B=T66
zzU{(D-`b>c+qnT!IFa$zXQvq4Mt&=i<|K_L?=(?~f(CfxNZWwtw-L;d{Yv#asq`Kr
zO~Nv?`88Q&9mtR$`F00kBx<Mh^Ba^qk8b|}9b{0B?Dbw@60tC%&002OlePdNNW<(K
zak)Dm2dWNJCzqj{#=#wul`L35!fs0>WbRTR+xr-w1AX`8I$-N$DPpQrxjMC3?8V%g
zrIrYi7V^&nk@<3;lgo8K8});d)7n2>vfS;}h|I<rT9<Asf<yyl62yI~c==*{Zg%Kx
zsxdiSA&!onwJXIGq{m*ddsEknAj5MUMET+va$&dgeY*LfwQ$nZvMHDeC1~yaU1PxV
zjpg@9rHB1SZJZE(19ClCzvG%&I~fP8s}jsPyluR0(YORM1v_pKk+Asl@#CgL(4B31
z{ae~j%F59RK8lM+`Lj<<knIvX_zJQT03WiF-a2n1(YU=eOMJcfR>u9;q2Ky>A%Mif
zyb(mMU8QbJHbvcw73I4#h`kw#shiZ2<nGn_`6y0b2d1fon-u`8lfy!Moz*5K?N2W@
zjy~j3c8Vj*G?Eo~v?d(V$39OgqcxDn<zcswOoerlh{>4Xd{rwjiCgMK5;b{J7gHlm
zGHnW?cxto`5-mJrHDyoYxhBa?dpUb!4jHnY+nMZ<SN{O=yxi?4<&a9iv4PE`eTCkn
zVeMimSMEVV?DES80PgFF!^bR^EL(l|n#H>hPBn;c#irx@rMfs{`<W~TK6&9<G26L!
zPvhf>gU@aX^~Gx@oKZ3E%`Q-J<_{hy+b<K#s@3~2%Zth9D+G>wHH?9+VZxNLElgmF
zw6O!e^1e=aHXpMyjrvyd^`*qiP8?K_n&K%l)z+kfMXiv#l8$Lv8HfzVOEwip>5*gz
z+>>rISa-wa=@{b=^-@+Z43$p^tVImXR$BEd^WH0T=9RYJ+axn_1(YumZj`w;Ga%mU
z7GhXb8*d3QUv^EaWa_hyQCT4N<x|C-f7UuJ{{R(9>UMK6EKGUFPJ3|)E415tXwNE@
zXh8#U&uIdvBbQy6;Waw7s?Su(S|&4nD@<>gk(eZlkFexC^!9E@%-zIx;re*>k(6m7
zs`(8oX_CZJo>N66s8_V2My-^_%kaTY!L}<RdwHo<VE~t>&saAt*nB3dt6pTjM4g;&
zZkSM(rJ6n}T0<Xic2FeZs?NNZmr&<d-NVgtkb6~Yu-a^9Sz9NJmZ<}`kgd9pt)f`(
z$|)6)?7_Dibx+6VZBLVdZ9_`YmPsRu%nM+~um&PC4YgqHP>-{-6f5VaQ_-qLZc-w|
z&`3#h^ypb+0C)%_PCQZeB%vIK+ouSsot2rk8#}NJj7uH6sOsL`iTD2iF2`fnQ}OnR
z5UhNho(vi;3`p(G06qgMkPq40$DX?GRI7Hk(^*ks2{s`Qd_N+gmPp(lP`ZHR5PiLC
zH}OF%`g%*LTR&4!;%nBt4Lgk^O^#j#ja#lg(Z~+nxep~JM<731`SAQtYRX!J5}K0_
zO|2~`Umo&zly)|e>@=>-OP<kIP}}+6<gbpkv8TTgOKUjVdm7S6ly~IFWO~ZQL6c&9
zyu*lQQN$g+->0Fst{$aoD4L{jEpjr?DPT;ZMj^e<AHul%5%5DDhfD2z=CrjRb}N%c
zoOprB-3jE|f)6A7q<=yBdS`S$sdZq$!3IhXrUochNL^20O026VWeX361o`q+w*5U0
zbNS45oVAM(ylS&@l1SBiaD&M4gJmT52;YwWNb44qSshOufq~QcER>;@C8tsuCF8kZ
zz%qcG109bR<T|j`$7?M;j(XPg^~_Xl#3Z+3A;@6doH~KacK-n8@zJO7$?4ao`E5x5
z0P*%<*UH<tB!RBZl2?|xw)`3bK}AFE0B%Sh>FZ@)wf+=+DXpVt)Y^Af=MksJM<BhY
za?shud5C)?!rtVn0pbZs5bVA>*3LiDTx2l|b?0l=qn1Py%CJK;p+|4K7R|EA(T&3H
zKtHSR)tS9H)Hs+wjZ6GG5spN=1}u(o+@@q%dvnf?c$rg>UuohA@)$2$k^DC4Ed}b&
zqAwLQUhX)<C9E~9eVD7+#N`)~-o;&&M(-?h#=velepf&A^{PKyF<OsJ`irS>8L#=M
zB9S1X&umLHb%@+{&yFs9AJjqd`}Diy^Hp^0Qmv>thEHrVvYu*;=Zf(gxCl2>`hoWE
z()FFkVD$z>-L8Qga3m-TAlaGuKc8!j_F#MhzTRCP>F4X>GXZi9^-;7dEg2m0683g7
z#Vns@3GR>l!+*a|x8UQ_sy@HuwcV_AD+NqOK6=xb6++MTDjSH2d?TZT-29%kC-|lG
zLUr}GUFnRLNiEH$ZY4T0OA~JuNdwu67UShuGoL;;ETy_ybmoNA`dZDGj~6wJXK^H&
zS|Mya8-MYptFVwF5xHayxNVz{j-6jcr^mFY`H0q9my}eNxlhzs_>}_<*Z%-7)%`jl
z>wix){x3IMJErIQEP@zqV{JyeIg%60w`3;8`5@SzxMHo#ewbU*XiaPBrK~n;7{0dc
zN1j-#$QFEdYNULfD$2eF>=sD5aU58qA8$!MmeY96Ic~&ulsJ-mK#9i*e<|08a1pw>
zQb_US^80Po_e|c$E=tW`(5z&NILR*`X;_bG`Bqco#=wW@Rj%+^w9Q8F#_vfTr=@x|
z<zHZVm{)IwBiuafc>e%TCoBlW<g{yxH(;kb(=x>f9mwXmjytLQpfUy^eEYondKXPr
z%in8MWGfYaeY&x!2_i&CHD*5(`6W-=&sVA%GN!YlwW6FX<{lO1KmJt*GE~IG4|HNe
z%0c)lKg_)V{X^3E{{T;}4K<XG);A{7bE{y75nPQnXlzuIkCbLBxE~*<Z-dm%ds^kN
zSS$uhC?&O0Re~&<G=TQF@aJLz54?mjGN~J$ET4Xv=%A&jbdEnOr_FnIE2=ONFpchG
zWQ8RK5A$rk;C|Zy^VC)EPqgN@qOD?iCASC)mn*k!);>YxMJki|;1T2>jyr?U<5*xd
zmLiw%NhE6(Sy<D^+E<b*j~P5WGk%dXC_q$@<Bviu6E?rCE!p25^x7A0*)1c)%Nb(x
z4aV=fFWjE59+-O7g43G5NcB=xjV#Fj07a1lb&A&i0QBtlKSXWtH`oqbdU*9Wrrw@w
z?ICYd;<9i}k`!dgwXAh`qN8<Zh_H^$bqYJji;m*#eY-cb^{VTc+j=KZ*u{xj_iE{o
zNK1VyB=QAhjgPuA0o*Hi8-4nJ{TH6til%yl#L|lkFCoKz(<+w+QRJ%b58HM=?mav>
z<izBwSC=W3mTQ&^8CtDzb0%X@Le!@%#}(<LNK_YK?e6xgbi(>=rbU}op_)^J#t7wO
zj-+L5m(z-_vgPwxhenYCS28e=m}0LNvN6tKxi&nDTGZJ1DK)BkZwDI1r*w8cDC5d%
z42_o}rMq;{GgGT(W@jDV<;!g{$znu{iuA|ebJ0O+%W5k%?AFuLe+#!ZK37dQM==)d
zdmBo_9kX&rY>4Kv4+ownNP$K1d!?&!`ifk9RGz}aPQ+F0S*((YayO=tDpAJMFSog}
z5<HX0zm|drci2bKg>$Ws#%Hrw#cOlp;F7M4j&@~%tWw5HU+b$^P&m~VZdD540W{3N
zsIL814ax9YZ!b-&&2B7GV<f#xx~(G&ZDx+_HNv*`BsM1FAn;p}US0zAK($gi^!==q
z6gznucszifC=-v+sixMks+E|6%?+C!xd8!EI90z`>ZL<33mSW3VS?0gvlXaJa4oAb
z!*<I>#B8-5Zsdff;be8>ydT<9#LbwKayi|{GX88)#v-vAF)X6MAEv-oR*BKLx5}Nz
z9*J0MN+{SlD=}sH#?`JvltxsuRuD$lIioF`2_GHVxU#>wNaf-(g(+)fC2Sr^EaSNr
zy`?i&Y{zaGkGeTAm+VI3cuM$H5-YCiId})F$<@%}tye*6thH`eQ6ZQgs)35KA|KYW
zDECImCeQBhS$eJI^%NgdS1(ePTeTxB_u<G(EK49+57XPm+>A5hauIPWBFXu3ab4M4
z^-(oev4P=MrN+%Uc{QAM=*Jvvvp*9xc@>yP41te?*;|(`0lLD%Y459*EMcd+jaw(Z
zEOMy3jt=a*D;1mB8E<yrZT@BD*1<gk!eppN7`RP{lq*TjM_@R|xspCRfgdj`_Zyy?
zvg_>kMx+iw*OaQM&xe=a*%AG}r=GZg2N5s1jD4}~J(d7&qDb~CBq4pkk+OmA>!Rhl
z^(xt(<HugCQIxXM9ucfzK{3s@g&mgIG7_VIK0I}%uf`3JPT;YV)mmrN*rI8>^>Ug%
z?TC2A8I2V#R+dw>tY7K@iP$S~SKqBZ>b*Cia^8|>t>b5=P1*Bxr-rSF?c=AMcXpPO
z2Q3+3e5r0D`AF-{?v>29raq%*E!%~xQkAkWI+DNg^47I0SGN?hY&>vuZ}Tjhc-x{b
zW7PZys6!bAz#~ZKl}V91TqKP*?ebzaK6cypUaxvuzMF(B)sW5GEEsOB8xyzcUS7g|
zpo6yCH%Dr^M@?%i9f|9}&`go7dE!pW%T3u2ZOBjaqI+1LEEkFM)NY6TI^$x>Nt4vs
zLwgrV#L`5ki#$N`JA?8<^84{%Tlec^x-;<|_+j*iNDMT1ToPqy)mX(>s~gQ=kLK<o
zvu><6ByG2jwS)MF{7qE#FDr8yq_C1=;ITQ5y@!op1H24Hn2=bgE#u>1?)_a3eK_=Q
zSk$v$SWl?h7N;UwTJ@eq0!YfbaArHSVDjAj5OzHkeOL7Y{oEY51AWyRB;FV@HsTSm
zA=DxX!hq}>&-%9LbLqdT)^H{)Roik;LRsOCNjDPBS~C16WqAQINSp2ZuwJ%8)v0B)
zB(qhwQezf8%S!YH7PW7<IRQ5|VI*I1xCKt@(%tF*0OA)?dI@7Sr}XAh`eHMg2re4%
zw-%$4o=`8%3_I+#-(w>Ivxkki_<pZmre$brEPVREQDv)5nHZ?$j?5E>6DU=Zw2~0p
zIuhf^-M-&$f&&LfDT|usX-hn6XA(40$r}I^p^W^+xhhZn+s8tC!UexIYz1J|$8F}f
z^wCWWgr7Ct6To6eh5e5I0B);T?MH>m;mme|#cd%g*%BW0Ipg97125`{xAyP0-sSyW
zZ3Fx?y;%PM*}f0eSy7$?e$_-&NcUo_iXEF^c8}fiJ_Gfv&%{P!Qe?e*^>bV|cvZ@3
zt4#+aXRkdf@JAda3hm8753>!xB#(}(7|nA^Ok@KmOjgKC+>V`!zm(j;0ShqLs~034
z!Q6j0p04@}H(Mz`Kc`sPY#3*7h5IiTmtnwuP%<~q!)?|#s`vFZTUhJ(S#4RVR!IK5
zR7W#H7?8FS9sI_s-sJ%J^S4Hw3zS*0m2y|ASGcAn5?YUgIB+;aI}|&tq1cC49#yvA
zsNH*|Q&!l6Qqy>9*AquuCgmPD<L!`$^9ac8C_l`6j~s&!9SE}6>v)S*E@NjXA(C~m
z91|I8%9{mK8JBXqg>N1|TJ-LEQ>HY>s@{_1>fU<rWMPEVmx$V0@`%i}ET-g;To3FD
zmHTzCoPuf{F`mn5yEmqZF*c{9{k9I9tNv8+?tl1|g8Y9n{{TbP-yf+W$m4Cv43fh?
z6{hx;L?ycegGDzF(Ya;h$6`xwyM4N}Zm`rEPW^333QKODF82#H_!f%fO3EdHkH8L`
zl%2jpzw(ZVUa$I%i1osbCgix8D$RkOb%M<C1(IlFh!`SCknj4J$UYAqgYdvJ`gO6^
z>P00tk>G`0OqPD7IPqRwN$!$I9;?0IQGFJzib-nDx?;n%N|p_WF~J<=q!_8#o-oK6
zn4<l@Id~mlWc^*#IBbSLCykL!l-;0!1~sxVmE_P_;`=<EzlPZCSLdgf(?3--U#c21
z?kh7|7_V5Y7VaAKVLLPNF_|dJPVS+G>dUd*eY%45$42HV-qrZTqc&d!6<d?nwJI#P
zX+b|VTGn66%AwBjy6jjJxcTc+eywI-P3zpvX%+tfhvKr9>)u-D@_;jQXX3<dD*nUw
z*!1UmU#)T$v@SNrFH~d}j5S%PnpSyK?8tX&_N$MG2e@%#?Qbj9e^Nt)&bnT(u}6`y
zEK00o48a40o<dj19zopi`hGfY^koY=b4+9H)5@789j%~&lZdY-{3S<GvZ)R3R^NVo
zGhUnL?JsS(!4=`gPpO$Xm;&-LSdB^9oL3`n^78hAdSUeLt$Q-ut6MF59ahj7(HcqH
zvz%#APhxr0ZqmUT0W7810M7^`b%-hN)9kfO6+E6-85FbSt8a>}a~<a+n$`M&5)6Ko
z!CJIKNpUf7-M5y?SaqpJ0mc<oQOt*HHU9t;!KCyKoRXZCD_L7v{KPU9#6+0q0y<df
zqye(hNq@N|t6V}zJ7HgsHFmpBM~SauJT^OPFO8&Rf<2leCA!%4r;nye?D0bi+O=}z
zo&!s9QJa!<QkMLW6&Rp~2)~BYtyZl#W8uBYJ2mRqBXUBeO1s8`$8l7T(ob9~hFZDm
z^Hz>YzN!~r4K$G&*A}fx_81Rvo@Sa^9G`y6<HK+BA&9SHT--abzj@zt+iugX*WqE?
zkBLNKRZ=~;joH7=)ixa>v&yw4iK@$9X(6Jj0C=KF(Ik-ZAN37kCq4Xa#W()|FB`aw
zwk8VqXyh^<#BD>hBOI;6mv!1W%x&y;0yzScxKI_F^>tl+b81GvikBk<_Aw1zuSX1r
zxh)*Ry}FqY2F@$6hET<MG+-0AXW)~kHH~Jll%%f(g(~i&zKtzeBal`}V6z_mws%QU
z(YK5=OUx*J`jX6J<J31zs3iIeNiV-FhDaDO*sXhz1|0cvK0qPATK<!~?P@0<T3U-{
zTQRD&i||;8OsgXezN9)S;KY8H;8lY0b^XJ6y;*_8+)DG-hG%Wyo*0#Wrbu`8NeoP#
zx!-RlG$Dxp0H&MucJ%8~X~}Q>JTk*IC+8P#VegF}bK9CW<0O1R@yrHa{QB7^q&D%5
ziR|MnaT5kg=p<3(uxXMRlo9|?3iID@(!CqfIO66M@#d)VDOHUDPis0VjscvL&3*|7
z$M5+a(advlebc$$XwhME$6*YGNm%6DhRilr{{A|Q&E+fLaaOW7s!u%%{L;c#Lu92#
zaJ=k6F8t%&+mQf;L;G*nkdN_rmj$J>gB$7oa;6STx2)Nxg1u(SWGdsbQ+I6B!`_Lw
zNsAIUC5BVFZrga`deWXh)Gc*^!^5WVxtJ3lc3Et#YZhd9OtPt29u41N5n;%$`nscH
zeFL?qu5-xhd}(`9O)T-t%xyHy&xvk(mE5-9*nPUa$M{d76rjbOqxA4F9r-HC(M2gi
z#R!g0avjJ7{QJ3eiLAXdpVMqgy_=QnSyfxSs@QoW5b|QG>hY^W4}tGrzmwJ$ko5|C
zRiuR?mb=?#uOr0&0Gey53(8Ih+&K-``h0E6q8_c)mvq*UkUnm_pw&>Zq=qxP?BQhj
zAwAxAK0|%FLLv1&JdUK5Yst;*&ehsU!ZB8mvhU;q03&byo{iW|F>56aTT_!e({!+$
zQ~@(FFTsJ>02g9O+xO_@Z(L;Q;9nUIMp-42zkBJ|U{zI#Xw`&X<H=L@1EfDt=Cv+M
zI;(Pi#1q_=AZc05b6AE};uw?X^1&)M_kXbJS??T6EcPT3TBgfxWu8Y>vl}nDBO`Br
zg*$$t547~&dNcTkrr66E*t?ynB4mvukg$!Ta0;p0WIRFG5xM^W(zmR#l{0yHa&X5j
zc@?`7$H<_XMsXx_ZZ_c{<+=Nx-Fm9Wn#-H=MU{I}Vxnb?)n{ac!dQV>yzDmvaz_3S
zM$EUXxVi9S1C5>JyCpxRHIcb?qT6IMqQ*GFyX*+^f7S8To6;OP#?_N2l33u5dH$)O
zl@o-;M=!0~Pm$nx{fJ(bJl>mWGNCYS3`s3xDyZ%V+En*EP5yO$C^>JkmHwSD{)uI^
zwv5wxJ4ovtN$*8!jebQrMPIoc)wyraXLf}4nZZ8R@v-WgbJcnp>aIf?n5KJk+{mlT
zXe8>bZRC+GL}MPwH*a$<-`(f*^eq@%O2%uwh}G-H>ohZv4<=GK54a_c=WYD?{+&T-
zuT<&iT)D)WP1|M_UdVoZ=w<{gr-~6Ih<?QGN4xtQCDvHOj9W8FH1WH)YG9=ru<}qH
zfipsOVduylWa*z-be5im#W%545`HWqER~{p4~7h_?N%y%-gZ1bH|j?x>t+hpmCe=0
z+^&;7OEK7o+b?u9WLLKsUHgx-s<P~Sr_TK=`g>aBUWB!>6Y`u5(5I2*!B!l;cN{!<
z9^XAO{*L;eZx3?qO4p4mHlR@@c65uq861dpVmFZ`<W&CvtB+W^zfNWISq#2e0@;fp
zFg7g1uy|!{_IcESv_+e4Ujy}jFz7w}FFS*hP-C!(VleVc7-oqI779#&5!xVmBf%XI
z@D=qy#@U`qHYUcx9LsERSXgYvIiiWY%E1oI!^3gw9WFHVGBqedt1?dvrI@<9%NmAP
zL}Bj*NB(1~?iSam)F7G%z#}A6Np7|1q~RJ^S8l>efbSKO$@>roo&5B0^rO_QH?4Vz
zeK^-Td|@xsy<R(2Z7<Z-nsh54d74B4RoX-#kJ4oS09R1|0E$n-PGi$7jvG0qXYF7z
zR|T4}x*4j_cLXF3tapyIn*cUG(iiR3Yf$Kk>gXEzO1<nBrGmV3)3r`uvqx$*+IR_3
zw6PXnhWHFRZvOxbKBQX4`gf_HKVQ9*$V*q8udxp;Sa&s|Kgn2-#0Ebv)}%G3rZQ^{
zX{&B!o_d))-g7CDzfJc76L#G8E58;S7{pB)@?Y}!ZjAn$dP|nTX}RU8GxkZH1W;LS
z$XL^Nwqe{aH``)1*!78~>$N<!XmZMxtIE<%7_<mWJILZ#4<Y2TZp**haK2Zfwz<mW
z^wnH`Lj#hQ&K@XH=EW<C8nk<ba5fvw0`0Kd-oJ6v3FyzN@2E|5SOleOM2PW5UPVqJ
ziZ;jGIeGhS(>XqR)*i)m#|f=kO7eX?Ne|4^x6~GvR&Z7^X`^`nAh@imN~kBJE|bq*
z$QpQBnImAv%U;2PXzU7-<Y9_Tsg}a*CdymwM6T(yRqLpSx!Md%OQ5Uc@sZ_nmL)$+
zg^INE1KbllbeWARb>~0+*?%VS@In6oz=yh1f$667((LzjE*@qK)fvO)vTlMmyz1{-
z8fos|=%&aUlLFg!BdpUq1awfv)z4M24A};bpfcvKc3o5|z_s97@Wh;1yMZgX3*%$U
zf1bJN)UoyXSW!Dw?p&d1rb!HuDjqQrD=^)JMh0&mr;*#DrFiSwdvdt19Eg%j1d0Mk
zR?I*&_8ZR?C74E_4nNK1x{aZQb2*Tqk!FrG6C_*DAqGh#cHI8}sdpc8IvRSI&5y!f
zvrvduI8AD5&l<GS6=5{ae$j<2OAk9P-)^<2zM@To)wlIHm|MM_$l^5ZT1=b$Rw}MV
zYMEsnzsi-Mv0?~zBl7(FKyH|Q6|3v6X7)b13C&c><kDTjMIuOe%Uv82WQtg82MS3c
z<5_p}(_f=9(C49gurjpk+bU!MOpwBz%IP=tQu3!G=VkN1P@bS_ER8&4&8KfgUsWOz
zM;|#rlG#Np)Mk{hS>Uq-Qg(xWwv3O}&|4FwAk}iko3UEOPp_;00KYWl((+cPlafO#
z&5pRf`mwCc9TnBPWxxkx(<P&QIl;^H#v<1&7(1itoPBxV8;|F&i>wFgZ`pm_Jie4^
zEBK04)uw{AMRm6vi6bjb9De@Xd|0p`e=gg%Oh0ycNN8e?NESiBhF(rK<W*w_{$9}B
z5LoVh2<s^n1Pblkj!;T+P=rUe{f1`pK=48Eep{h8u6Zop57NC|lEF_6OPJhssiTg=
zcLqWLyR{^BZPd#cWgvn|`1|$Zck$1e&3d_<%I2`R`r;(2)SCxlQ)pss!mTB_D$3`Q
zL|ksGu-K^sW*upUTbi!6P~&WOYDr2_3P?(_2umw$11E;$yN^5V?*5&3YK=jyu&oXH
zne4rO%b|$Zl~w}EfU2<wcV2rP$=lt(_OxgpSiMERb_HEUli^}jlEfBbt!@ca1ywu@
zAjJIDfFP1Q^uTJZeGaFpk!7vNXCh%(92AY2Rs+M~b_d+8_w&g7bO6F;KZWM3V<Sac
z?<1I`QN^R5f77!Oy8eKFs6Ae_ypUYLm~Brh$t8JWkoy#`z*DmL<;3mh$InQmT@!j-
z{H;}<CKF97h9neT_a#*0zQtoBWAXcbuAF^0b5PU6!&tY~Jc^LW@q!tmP!up?2O>!u
z?frTYYi&uWG=@Z5$iq5QuW~~>Hz2Z7oGf#eA7Fs;^XK`xpR$y4x^`@ZYV7u4VQMP~
zfcIHYN!>s`3r0r!ZQ=R^>bNcH1KNdVHah{xM+gF<F=YjhnKt4E`+lyn{V1MYC62uu
z{oklbByrtY5=3c9kgzN~5Pmv8dR>=&QK@AYrY(>9qSG;cOzv0!HeUgD+kO7a$D)6&
zn&(Yo@naE+WLuWMDiDhUSmLBFH3wjixlM=PpY71Q(rg8)mgCH297rXYOF4bdA_s`r
zw%aIOxqJ_2`nKtY%=(D))0T}X7ByJfs-$}z#7Ie4$oMDZc=Au!^-70PLt+VRQi<k^
zd1Mk593zjyh!D5zTW=pedc5N8WSd4_qhBv3I8JU`6kvg5X&|!(%<nn>0H(wpxcS@X
zp)M;GfY9}-iTiQMG(rgq+6^h>{DkVEh*=g!8-C%suWL*-*sM#baPviCjP*j=5wkFq
zFav4S9n|i4hVl<Cg!JH}o1RyWq_NN7g^kgI$r;>6OAXbx`)&8=*`YMUXR<Z17VV_g
zXR#bnM$;g5W*Y&+mhR_b0UPezdh7Lr(fme{$WwzV>lArKb6R#lRV9r>$sCFX!26qH
z@;Y1c+9e>G(_%7`L?YbwM6U3Noi;l{$8+az1K@vdsQ7(Fm8ERO8c95~!ATdlyfZ{j
z%2ik3@#VQ6b?bYd;nC=~;G@=^F8M!QbM&caceJxt%4ed})^YaY0T#7K@22n_q9Cx@
zxqq;DHCLv8gYQq{yK7Hms%J5@mS<M}KOMtCp*u$mb4JWAz#~rx;EkJ*>W}MRuHS;Z
zN24oC4T;lnYAbXZLe|ECep?M0^6H8D{to{D2mQL#oLbvb<f{D^Hx{jkwco6kSeW68
zgpzKk5kM%xIe0s4JoQ=DT8kT}Zq=!d7Gt>a)k||UM8qO0c@R_`)G_%#WA5LgzLnI|
zWiiseHib-qjs}ACNP*pD07+Wr+n<IzAHL1EP+pZ}>)^d?gHdWcT2mJIK`hRTTp8nu
zgQckU8~cP!8-B7kL*YkTI`#I0`oZY^yzZN0Jb3J662%%G;=Ssn`HhJ3jTqeh&f9d1
z`fL-_Dr=`RHF#RaQwt)-%8JPeSd5lqkwH!%jkfX%`RZNLu7kOjz|!$t$JuxZS1;W`
zCvq~!Zs;U`a6URv^&UEwLs{F+wn|EpDB)j>gfcWl^6r9nRpK_{+k^iAhkmRW4NGSW
zAT}dg804Mjlu;~`%@czQH`Do58y&Xz9J(g!{XM4rNYb^W#<T$3KqJ3c`*or*$-tu6
zliRu#cy=hjuj={S`+j;?F?I5|iR8p(a#CKaN<yhJjph<^*;-{jNnhq*K|3k_%+I4Y
zC{WZ;IG!@n%zKhVc*Iw2k);4H7CU~g1clqDM^q&fMmb`c%h+c8$e56;#E18x^W~49
zk;kl0Pc==wG|(n2Rg)t5DE^ski6yUAX;GDLSd96!@j<-259RU;zp!o7A*40b**wi`
zOJA`*5{PTihMadZcN$f<m(1Ldo>qh1YUuKirP@vr1c6Zt>FMXGczg%Z<L*1z%Ve{O
z7>Wgr<+nX5s*e;%4$D<1nPZb}keHi~>0X)LDXDSRaB|Z&;d|J4tzDF)N+OQlT4s`H
z;_a^P$qW-Jj~*-e_MV}%=Bvr$r+l;%ytJgaE7PpR$qbP~WPF9$i1GCO*<_6f+jdSV
zzf1<5$*h`oq^{AIn#-4ow;)3ZY2=K*^QoM~KK|5Y9(uS<om>?xjMgWPX=AFB2+??O
zHc*9NfQ~!)0ZAJyd;R)Ty=T-yuAiOeXoU7@%qy8?Dj=><J)0!$u-#ozf2^M0->ouT
z^&)vS%u?55^jOPR{*J19UniZpS@HPCQct!D?KO$G_}EA1=cX$_Yf16dH1Y&*t;+uZ
z5~*t4i8n7=w6y9~k_BDMqa!-;9!knv&n}tYPQ6yn*UjT@n1nH`HWJNFJR-Eza~5$~
zTI`N@F8JTfLf%6s+;7pLq?*V4)>hI=AiFcbwizRQt}ba()lpH2_e~3!WsNuI8#}79
zs~(B?4DK$yfFkZP6jEk|c+Bz0SG0_*0o()dciV{^x;N=s(dREEN|Ohf<Vj*^CVEyC
z%lHt2LUAMfzC3?^oDCrr{6z??DkYmWR7GYxk^mxAc3=5-R@!NUGkH6C@&jyoE@GsP
z$ddF;^y1ViW~iZMi|Uk;Y{1{nUN%jN?jLVH<vYqVDTK)wRxC+QYELPDVzLd!`?u_N
zZnR<kEdKzA{{T`y4z;M`bl#`N<n(5)qa^ulGi?+2-U2`ho4EVU=jjO?`zOkef(Kqf
zbzkDPtsO^~zhY|=S7oy$nbuX8b8=1Df=J-=P`rWj;GUtdH7|OPjh|IXglv6Wk!y4X
zgzQ)~V=Q8`ByK+~Bu{er2WIOcZ8J{xF4E5f7M3PGvIB-dyj8$q^RVB?j)6GZqfAYZ
zk<=>lo3i^c1QOc-RE@o%2mb);^4$w3^){`2M9gH87^9Jdtc%T%@hEo#!hod-_#2VY
zV@&EjJ)f3KX2Ud)u<tdA0)W7Pj=jD3mLq@Pez0%rOny%{CKhSRRx?K$l$l3y*Kipf
zhm*woeDrj|=-n#~RI@bELp6{dX#I??`|P8<8+F(K2K)K<bV2mKJs}RThMsF9UJO;s
zaMqc}=Jw%{lo?}A8*tr*=fMoW<!+5Xi!VaGD9vcwnG6K>W64KOC9SpCCXXj1oE2I`
z--uR5X7+xNyEj_g!s2rJYO><$!7Q%~4Nhm4PpF1E=%f%yKJCu{0-v8BJ9R{@l6^gs
zy%t)y`g<FFEAQ^FVNftf`3oB-kDd2jx`fv`*s^t2)+QgNWQX&S4*Rm~oVGqpo1Y)i
z`k(aADHfl`-@9gB(~8dOyqO=9Bk}M_-0~m4!+!m8(bWdE)*{`uTN_F3LK%=otj)>e
z@_dhj=l=ll>BvkXnr5y#2y0M+0Bt(QBF=pNfZOfJe0=<VuRT%eQ^w))W{z6aHl&h4
zawy}FhB3C_o-BEU+#U8GCvJ%PJZU@lT#M0y2xCuWh~o3YRo`L?leyS|u>Sz>))%I*
zX}s1`RK=Oe*Rq)I`Hu`N7-5d3r)Z7dV5xH(fP{|<Ty(GN90p4{t@T!!)ALH$j6L4Q
zI{Qn`!WM-kF|4THn5xah5<T2k_S>pW4PmY@DQie%1U4;2ycOBER!GYB$34%x=WYJs
z`pCd&KUAi<Hd3Y}#ScDnMnlFxVddd<9{A76AaY;k483>7b5`lCQ;o^mdlhG=YP-CA
z3-9^igQSVzvba;Q`yux9$@;me{{Z8Rb`vL(nPrxYvCi>A>LfAG5Jr+R<7phX_VdSX
zm5pB_>DyJNlI)OFm$;=Qj6))!@S<0L4m_`c<L|!RQZTlvL#V^mM7sr^(V9xgc|^_#
z=E}SKOYu98AMeo*F`%)U$z`bp`xso*0QTmsBUZ2*iuT@G<b|@N4+Q*y^S@000O59~
zq`~2D=zV8r__Ht6E-N<j$zr^fm=P3Al9Ey2C;7*i+x3+lOlvJZm{+xr)xtY9A(!}`
zGJ5_YmbCIDVUYPIK*hN!2aewzS+Vp5CZk&w8pQ<hqOFS(GJQ+BhF;_&U}g7h;dk@Y
zwqw$#wHxO$@Ku_sz*<`|%_GQ>FdUzXY}|(9lI`Peu=J)s{t(H@YZs3GAzIH77h&7C
zDxz$T>Ia>GQL!8De}2DfD%P&NIVmu<sR^VlHJId95XwrEA@CTmCvmVK{C_V_{{Z1*
z)@?Ua1Dw%1hb0Y~!mM9SUdRFELaq7Hy7*oBY<zUSzLU%0{am}3(=MY-Wb$(o$taw~
zxi2Xz51A`7C@Q24i1X1OGwJRRH<GcEw~<jTq(IzC#+6K}u_ri*_x@V!Kgvf@59tr0
z_}nI2Gnl_wa%!`zh1n;t<`nlH%a7L5ZMToN`Fa3hG5AR<UxO1{lB9Pe$4O?oB#iF+
zLo|wfazD1^TW{^syR32=>lLWKhvHaKqRb+gSVI>eeoG+nz_IYd#{U2v72(nrq|kBa
zW|gJP6q0+9)>fXZjEKc$X+r{$Fyr9(98dd1dKq3FU!Av&HiZne7C^Bi&HU%gh9_<(
zc>(=_1NR+lRvdP1v2kK;$g;FBf|k`#Le1a_3XhKie~rFB-i+9M)hiclM5Y;KGQ}Lv
zD1=7~jy5Mm<-ZXL+kXHS_ViHEFOaEdS%PF+nGVfp=l*n~C{cSPPQ_w^y*klDQYH38
zOV0eIWL3K9-FJC&`3&VHw=uzCrm2CTuiTaI%6l04aZ0Vaxtq5<r_SF{%v;!cWc1Ig
z7qGN3>n>6`vO4n3JXx9>E@Czz#xx9(&fEV0%Zp1Y2itZs6ZaiA8VfC#&ER+L#a|sl
z<tik~#bb^dp#6SEPCn^m1>lCd0kC-kGVjT{U@^X+Bvc^o)@tx$9@H?_ZX6`9jDwR)
zIZ#Ts6WovzPbnMx-4C@FtCn7lMHW|@#!8pfM`lI}1S@xn&Nm<kzMTGF`eRjJj({Gu
zdXXMqPh+Z^oT0sH!rPTS+_Ct;vNJ&2kX6dZ9An2LC+XX*XX(X&`h#OBkd3%hizz)y
z+(R+FI>`-4m3|Dtrd~cgv0t!moV^)B8(8f>le$)k#9fk%P)Y=@YgfLOG8Pq=^;ml`
z#~Kg^F4oK8dS|e>d1R?PwCAN3Lbds2ua2F2P<=*6h1fV*+sAOZ05Kc3!MZ3u7Vdu&
z{4x3cOq|*qF@mBC5te320XxSstMT1F$B7(zkHcxbO^k1%$V-023meBBn2D8yYC;06
z7q}R1HXD#i{{SydKcRSCRglQiSQ5@cwJD4rQ6vVul>p><Nd%>&o7@MIRDi*E>tR_8
zWy;uUaKma!c>}emqjcFAv|=UtfXUmAKl*_I{YTt<bVkxKSZrKxZ@UcY&kU&h1t+)R
z;z7R^JB^6&LEU<k&SZug5tYn{<F{uSjk$8Yc9kp1ajU}eM_o*$FDV&&YW_DY2j5}g
zC;U<W02oxgdi6goM-zK5D`VE)8&YX~TL)_X26!iECXX8Hvm{Z;8t@~1*Nb`Iezh6s
z{{W=AU(`$aE4d27?l%)_X&!I-mM+1@U!zgD4P6Hk=i&bVnZ9~7Ykybn(Nxm<E`%S#
zF=#7gZY5;|h=GkoV1tN-NF&F_-DKePJsk8gb{#~<EzcQ1B2lrk7$AHD=Wm_-dwLrB
zog6V&WUpUorAdn-1`-BjC7gx$8#9uAcLa5>{Y9o3Ww~NW#BbbYleABjcYXOO*pCO`
z_y?=UrusuG6?tcd%TG=?jMK#&ukc5Pa&`=&HeI;yw%>lZ!$IFtSe0Rt&8oH$qjJo>
zq2ZQ5+_qon0Q0}~>6z0x%elCP1)9*S9~7H|>{>|nV-iTK`MW6G?eWW_g*^jHH(47U
zb_02Mhp#2W1%Lqu+cY~U+;6_*@9zHHH~L3UR><jQ#mf<zNU8Q@r$1>d*RvWG7An|g
zWtKJskbVjFx4WhDGo|$AgEGaOrFKe^2_3Q~26c#eF{IJLrNRx*CjcqwUe_8Y5v!~m
zwg4(F<&ogBk6G0T?a1(dBX8iiKO=9yRP1(yp2WtN9!3=BZ1aq$i-|0x50~HJN1xxv
zR1H(5bJXl5Ip|uWT$E+*{!+X@%+4d<l9k)`BYupU9r0RHs;zk9gCWIK%oIGbKHw7u
z-z-Sjx4=7X%c6x$rl`t9L`f|g+*!=hu#PyR97mE!=Ob>UlkiXdp(x`p(`D>6{EyXH
ziWg~^OR*}izinTT=67S~;BD<69emcf_0jr{-3=}j)VTyU40Y@gENn<w9#kw)k0bhn
z=b+YP<d*Mn$9kmFi7T0-C=x}LgE)}H+pg?EJiHV4>+U+%rqQ?CB~B|7!(zJ``-)lR
z1H(Ty{Y}T?@9q8i8D;ft#?*<aEKr%E4DR9z?8aKk#IbX>!Bv-G_woI@VzfWvmltN#
zSn`$W#t~zNwY4!wu#L7`3U(}^d2HXy;eStEUaRU%R#{S7kjD*)cYWb<&`KoSnLz{E
z`>)R60nm=7r+2Rn8#gDq)YKU1OgES?Zv|Uzoj*?>zy12E>$FXC1yqW_jUs)!5-$b}
zqjp0h5JPT02gcifHvM9HYo+Z_)RorMb6TxY#L<MFG!~I|Nnr1ti;)TZuz#64YQ0O)
zIGmoEk5lRVjI}d(y1!A6niI5_vBUt?=TzAJRBy?Ajk%`9e*G+1Z^Xu;(F;9X!p^xm
ze1C`Kf-E*&LIjc-;b1pH6bA9`{{T-;H{sVL{{V`*^{d*=npry8R>#+P3`#MR1~wz1
za;v`Y4DtS%M&5njsY>+EelUF@$@)L0w8S_o^V82w^e_lak=l@3wIeqDpa^Q~6ml>>
z8+*BV=`8gB0P$^}{v!UEYd9>nSlCMO&GgL@7I^|RjNp>cl2DKGmg$j;T8$}|sY^Fj
zwOSXqn!TiSjlua;<ilp%pJ^ZD*}dIcvpMOnU#hmM*P9J~1&S%8RVtEj5~=PYjilK@
zBko6zf!do+Wii@@y^S>`j@CsZXs5?Cu8kRQ%tnuv@Rd6qo7zXmMlVe@_NB*OG&K@Q
zDX5z~R?#ZX(TCY2Nq$7%U^n-F;p=bygMJ--6#QFg3wN~kvWH9SjBQ5YGS3R-v9=-w
zZP&`sb`fQ2iNJ0rX8WV7b*4YXccvbtdKKy&ycVXhQ!|T>)1(>P6qe3D>`3e)w;Xc&
z=t$9>o(VZ@T=8aJui6HDR;l#n42B+p#d5uNl4zkIGfb`e8*}cI5H=g^cyG|o(rL|j
zEz-$J^cjqNlEku>Se1l9nNN9u54?a4j^0<>@zo3X&73>Ir=e>0YswlKnlm6*1ns=B
z{5Q0y_U=4yzsFWsv{nlhJv{Cv;#beR*I{O?d;^I+($}5i^2$_k0hDe1`geUl%cRsJ
zT<yky%Myi%N+lBPfGR34l~Vrz)9g1LT_?jfZtGFU#})fZ%iLyAarl^d0Fqp>7~^Wu
z0G`s4ckow^{ZD_xIXpza!qFqWB(UyfNR=MrN~9%7(5Nv%@X{TXPxAcrH-z+-{fxb6
zA(~1|ak&<h)ZJ@SmT1;Hlc646gbeW^@x;mdp062w4+bw#;mW+f@b;IhQd&R`*#7|C
zHD#6LUPFv;C5MSXA0<O?x#;2PM!)|6hwyEU%S{&945nK(Iy~xzrD~}87;({fJ1W+#
z`L1^bnSk<Fq9q?ts$`=$<-V{&&}TB%q~-a%a$reiB;Wp&akTKHN=PA7{M`z5R;`Y{
zD=TUz=Orv$7-%S-JY<F-+>Ele`$-%g5A|(_&q}tf)#o=$6ryq}<K(waA|Hs7DIdMw
zDdT1hCk6iiOnY?^>CQ9jbF}g-t1R)^X%aadOe&*(<*O{;8wU1|9CmI#?wd?aybTQX
zI&W@E$re##s|*eecrmoG_UziTD|=MrEaW7RBwlzg75z=o-UzZ80fx$?#UCXXH>#|{
z2qrdB;;C$>fXf4ZUme{_sSCGER*lJ7TeAICr<$sY(Mbz5VWOPEq%?(xxi;WQKbhn8
zap|_xx@NXgyfLChC9;1eVIf}J#W{%skI}gv2k9gF^wRoaj(kNcVrrrsygjlFaIhO3
zca>yGEGSulAFH3VkAv1@<+4|Gy+|azE>9C}qKf5lVIFz~z(10Kcy`2t;eVHQ<;SKb
zG1`VT0Fsf2M4y&is*U0(`vsLs?d|d4{^0ejKk-BSMa$@qp_l4a!B3U{0E)5qAY7dr
z6IaUMaxr+TVkqHY@RlTec?0=)@7IB^P)@G=LTen3PH3K<P48HI#-T>zXoryRUQ$5%
zt6@109|vN5eycXJ*LrjC@u62uVbOn!&jQp^0d}hMovm7uPnqX$ad-LT+<FUQ_4Rw&
zW1R#<Hmtni+n3Z#ECz2a$IQE*pC_z+36#C3vMkP%wM&-gvXVC~pmvb6MC=E%$lq@S
z^62NTYtzo)QzHWy?RNhFT98^cWh1uXb|DGyPsl2IL-j_K$z?JG*S3i_bv~vSnU+oj
zA8aTX_aFLyH%HAQf&K-i1uhaZ@p+n1T}hA)<XIT}k9vXg{!_X6$*Y5cjmMR%Q)jZS
z&dy|3EjG;Tc^&wK+%Jvy^S_>n^mNzN;nEya)rFoiBC!bDwDG7LQ#*kBNL!Z>e3Q$e
z_Dfo1Gz3;^&zO$JNtFxAWJQ2Uw!$?8gW3mz?tH2L0B*ZI8~jCU>2*Aq>6YF?qz(#6
zGsv}|48#U&1d}i-cH_5>UUu`=(zHIf(fZzsP}MZ?uw9xS&Af+r%8;vX8^)2SB(X++
z0DxHgiu7y4dRO>J#n?%8K4zNBk!aaYTKzg;5Q`@`C_ubWl1CK^!^mUq=&_ajDEcF+
zv5%AV*CT*=IEiEW{99YAG`vr2wV3$I2txkg#Qy-NtzZ5ie+(X?(aOn2hARF#J98%-
zhAE}b;wOl=ffOn``1Ig^)yJR5x2307dN-~8HpH#u(|N46MmU~nR)q_BLhgs+7L5pV
zyC4H#cYkv3I-sP#i7cFP!;188T&z;fA1Nil&Nhp}yNC|Ql7ssnckkz;h9A*CRAST9
zWHNr8>vzddQ=3OF8p`h@fU+S^3__2Ox7+)I^<+ASBNi)EWA2Bt)wvF3pJM6UF-i7k
zBf_7Km0b6zTIRlT8y6||pn`Xf&ARf<*k(rrhgTab2H%h!TJ8GkUaT1TqaIZwM@CgC
zA@-I8ey=BDPlLZ6u+F=)W0bJaHdWBb(Li}gDoDW`2)G@N>Un>E->$~=?&hhvUp+>X
z;00HVxU>XvSW>%!gXBWHdEEKq(K?lB@wA>S)#$9p19pKN8W|Zu0F~vsub2M-^!Uru
z8aofDa`o~xvQW}hm$xKMKb4o6Q6ecG2E*O*Ht4^T^g9`->|{nKEfiPcsUfm}H(~A_
zgi_UIZMeLN`hg?om-B9<jYXExnM{_Qu>@0G#n_UhtH=qGSX+<j+-y99xI6y<m#HZ|
zZnZ`hhJNy?PDYE!VTuX&-K4I+c;w^m1b|69uiv0{wV<A=M{_3=C0nx=kzrD)H;A^{
zG7iM}`Sa2D@OSCYt$KS~LwC~(mM>~NX{~o5On08PM;`+Qi|X5Lqo2Hu+i*AEbH5&(
z&t1P0+UHYi%x)jl90klydrD7kwD_6j-bn++8X;o94T$aqKkD)FdK=_4tUAY2L9H=3
zsUGz-sdh<Vg<iR06-`?Tb~1ni1}nJ$Z^xqMu)R;zoi$TWt*Q{}93<5uqkRi^s;Qn7
zlD{~%?naS~nYLbGN&Q`HTh(7jG!BIPH)#yDto>u1wNio<?)K$*%$1{qw8!Fqs%_5W
z?YH+#bVffm`mzftm`P~RpXu5bU?iy0_gM_e#EG$;&y(P)H;#>Zr$w%L%d`=uR&ce#
z!yt5u(t{b2sRwrFu^Tsm&x!HU{7<Z&i`s1VlFn*v0jSa1Sngxxljv`Q%qm{587d<q
z%N>CCd$;QOi1p_`>y{5Gr#)O&x2WW*)!!v$QctO`yDHqM!9b96_jvN8?bOev7`pmF
zF5%>ey;#|uB??u(l?L%zj40w{Rwt5=l_!54Sw@b;WbNhh^)YZ0VUla+oFjfH3rBoL
zyMY@31MT*HUU>CK%KA%;xp(NW)%$lXs?SEs$|r^fnVEve7(C)q+xwNa8~GguwWgTP
z=={uBT%=|1-5^)91n%*cQUrfmn}&){Xx&H~a{Whcj9!oA^$q<)9v>%0zBV|=1hIW=
zr{xKWdBkeF$r)e2{{Y3-$u!h|!z!ZFWs@l-We``Z0EMeSTmEA0fKj8PT~R}!DqWdJ
zmOWZ3#}1#Crc*NvSq#PNk<!x|aw$2qmACx-zFKAl+&h+XSn_67PYO2Z?djH<$Ym|#
zrpd<|W$a|KXCVTwW$I%b?@E%U`y=vF#kM{;k9SA>pP{(C))pyV7mi1$SzXCxQ(I=;
z<J|H-ci_Wg{{U{ao$EfG)jp9nQ3-sN2e*23o!7vxa!AOD*?jS2je#E=TaQYnm-VjX
zbh41uxhzs-?Y=r<^A?^e(%3ey%FbWf3kBGIkg;s4dc8lXc^ylSx{(PsYwbKmIWcrd
zdrjOQ8^~5NLEp&gr5VcPEYhg8<|XCjASl8Xc^Ie;4~0Cq9lqY(x_7-h(YjkpV-kFY
z(R)^`4Q9?YS=AY)*+PiI`1d-f*zdOg0L({OiKafbdYXb<orcb0GIguit6puX!8;jl
zkzTs57l(@o!93+Io3Z#EIUbl)H}TR^vuZ1X-05IRXsLFQ))!OFM;-V#07k@r^>k6O
zg}aBA3sBaYSY1p`5cfo@8v_ilzx_Fs1weU!FCLg5OS4(5Zcdd8P_4{vN;jSfVU@QL
z@gb2OSjQ<~n}W{)1=oXd@XFn6q6p-pq@b{{)&}h)lOYCO<&GH8#ug(=$B84ZQ2S$2
zO7a_jTK0p+In3HgUIpj*^x}dRj=WvCN4mc;$Vsu-o%Uh={TXw45r@R(GMrL*G4!Oe
zm84`yp4?HH5fQd68F$%1+=2G~zEuAJ#0LxO=dYfk+q+*-T*B%3Ws-~OSfXhfambD?
zL-~X!j%Yp)!t{h){{Rl(#GO1He5RSf(WO@KUN$4CBgh(Ei6Rm}u_GaGKdbM{rF&if
z0EM@#I*TV2Ek^k<Jj@<<N)x(8&-vmrNxWNo{5PL(`}AJZ-{GI9aP+a5JwcVsR(+7q
zVHCg-)i{)7?UUpk_wlglp3@)UXXq|Ylr{0%-@9!g+IXbNO6amoe_XJgn|@{X+kLh>
z?s{T$PxwuG7f!WIPE#X)JuK3vCHk3(<0mHAg>O4I9{_paIx=Rz!H@8#Qyn7L!An<A
zO#?U*^O9#~LB+W&p3%*{t-j-L+o6V>{{RQyNnn>LPa!wcs+BNTn)S;q8i2uG2*BUF
z=k%8U0DigSKf(*r3YVs_nzdBXNguT!qhs>`+j!)TX8hfw9^iIR8<$5NC;lD(0EfJK
z8klS?J1G=qIb>++lnlNMfnRWh$-m#_{zp*y*ZeI%4;h&S)X_9DR@;M24GL2#?A}#f
zy{zDQ@%L}l)w@Ce0EIu{(kRwjnkqWd2tf+8>BAf`0-z0`UC!H|o&Nx@L=1oUarzgh
z@&w}a+-7aT0yrW>;J(%7+kRjvw#0qk<CjDke*s^I8}XB$(OBoUiU)RRsn27O*?BCI
zCebe+9~*v^`RkUQ{3-fVq%N5>mJ;=JsyLWq{{Zp{w)u=#*#6%mh#&Y*Mi_kx(R!T&
z$))hMZpNLiMud^f<gdAma+Mv!<6=Ay+o`Hwg`Y?{E9zY}h`lU)xI`e0Q1IdTj~olP
zAXRre{Qm&AS#~`y^w&*UuLd*ItTfOk9ogxj;s74~<whtA9sdBYFOIGBbRVbpcyFUH
z9kx&!DQSdA%X^u3b^8;s`0M%0;lGSs(iLk<v^FA#vg-=7#?n<LTH&}42+PSyKO6K7
z^+Lus2P}0m6-|1%$a^Vmpiu;O2---NQ?~M-Bp(}}>Ayg?p9O}$RAlO;u+6cjD-$yW
zJD<{^f#7ey`*cLb>ufUK5#s9Zi4Z8O1Y%Ag?<qyx@rVu1;CUs#exton*4pZ}4!%Q8
zSo$dEmbr+lO=@+8&dR6`%7bCQEw&r}-B9tGzff7H7xBAt8<HVs-c}7E@ysfC1sIh8
zZS%ia7B$wb!DZvIM#ao`bCbti7qVoD2bm-sPUMfCC;tEsMeQB<oy$?TT`b$X9g>Su
z%#SZ9il20x)rTQBJ(*Y$x$whovNi4+zE39>x}|a}^WJFfSf^f2GFy*vxB#Crg*y^<
zAHPx_sOh-l(0ILNB{<@ucr>;+qqoz@$f`MyAw-U&ef*GmTXFYhf`0ZY{?8EHRc*LF
z>f7-n$lXuJ>^k(r{tMCQ=(KOCo~Ua(wVNoA#f-*dEg0I=m1@S)v%*-Bvo|k;zxup&
ztWU;|s8|nCeNoGLnTw7gKm1Ra##xFK5i8=>+ocN}NCIY44&!1sEZr9U1Yh|3LfwN4
zkx26pNi>feN~vP5(QNNJF5mLax{>xj*P(ttPi8cxqq*JG2JT@mah9y5JFPI0T@Wbw
zgs;Z_e14vfzNl&)VXv|HJVlvoTC0po;i)v1C5alc2|-xXeh3^X46DB+Eywna^xpm3
zGT*H=T(oy7r)pW6DVRJHDMbt8^QPp#(_!=HqMn}1ded80VDeaPJCV!RrE(UtR^V8Z
zB^U-2V<QqlR9OAn6SDN(>s>vqJx-Gik<=J^m>Fo&i5k3Rp`K{~;Lyz)sS~i@<H+{!
zy7f+v@XwLRT8?__kEeg!)nzfYX`F7|#zf+xPD}UO%9j4*nOE@H>TS9fW3OVQ3j}5n
zZ;US}iIa(m7B}}WJ8$;it{#(mC9d>NM<%(Ph0KmF^yJi*aasxSZ12WOQw1bRS=-1`
zd=dLL-4d;OFJl>&p?*7Y#F9wvKAsp=;}VitH)cFQe{zB2W9RjC7pOl4_BH0AsjRY^
zvN1I%l>Hf)K`fG~C1sL0(`gSMBYnv0B0o#9nrl+UQ-Hm~BPVgYl13UZqyrd5Ex04)
zfFy78)!yE%OeChAEPQcFSCnuF8drO*vO{+%EKAPU0>Pq)zX7tZ4g7RS(YVSG-iI-f
zkG*3VOyu$zER3rps{H$eO%d5Kl_MOupxK$^ZXmZz@1d54psjaRTZJK-#m2i1<VhmZ
zd(uZG*~=Y>`dFGyBVfA$;ks<8=W4#2Yw|>{WXeqlSfUHUDJBaaB_xMQE3&X}0ZVZ`
zCO?U7D_c=u@{>CI1T#jf8UzGClwb(l5aZ9=`CG3!+RdcVny)UAc4Ugg_9Lw=ba+)A
z<S9IM;&ZY}g?__*ypJ6?KZlHl8L(?>HHMcck}x(_H8MYrM5?Nd);+=!dTKEdVA;}I
z_y&PiPWxN43M4c(LN}3}ha<$Q#DFt906zU#@U-e*pDC5L(_Ottxv^d;jcT;x<=dl9
z#Nt5H%BamW?cq$DLl`_*^j*?;`dJAq;H(&`y4%OcRM$izNu{qgPTZ4_H!QI%Xs3V6
zVn0iC%W3@*`1-c%Q?k{5q6iwKfkzu1kraz0ATPDRc;qA7xK?lMf70<;vSGyJ`yW=Y
zb{1)55y3QzBJ3ZL;U6z)QhXNs^!R!$rE!mLNbggIN$X1$#ix+RnBud*h~FVvXpxm|
z#CGMwVJg4sV!jUEqg<|#Q^&Pw#F7PgZAVF3C?i5cD%f+fFa^9S^4tBolhF~_uZof9
zCT30|s~DOiEopangsjWE@m~RlgZ=s&diCn|o%HJkEj*rDzAoj)+j~e&cdatNiKHXj
zAtT4`2hUorlKzm?8SI{V##)`l9OuW$8Hl3G8TK;B$K#ZYIUV=ks62nFx{a^ZuX8Tb
zV=Lqz=&{^+Bf|;eth;=$A17hIz~a3eFdnca+`n@#qUvNZIO#atvm{I0kr44a?FkY$
z6(^6#A0K|Nxx1c)>znX>K9y1#LABeB)<~Pl<Q_5QkL-MVdIf6lOuZu0_rKyTHHFBq
zGB0wqSe>Lmq>>D-2NGNMB>vrJWAph<Gc6qcl*Cw>;wvQ4+Ca`Ca6+kt<1VN+^Yg#^
zbViTy0VZmAWxYy8%D{*uuU@M~95JcjS)aGGfg6$Bo=2A^UZz%Qa<BFE^3N2OOBN42
zN)lOo#?0)SGVvZ4#>dZA2GND0hFsoqoRy{9oBe8T?m%XFPclqkje!3ES5u3iFf=Q7
zsOO1@##YRD=2tzZBV}Nxi>JXO%YL4u;Lhm{FHTclX)am?R2D~(LjWTQz$<WJx1R&7
zDn6#*GH_UeS&?kV&vf9?F(jX6>ce&()3(EL)ng~>hAU6Pa!B(%Xq}^$d6f)n#Q9G$
z5_s?ON6T;S>mx?$Jbt~9*~VKfHx0@%!jSHFAyBjZwp0HA%ge4=Pf{>i9@FEqI-?$!
zXMWGpv=B?YlJBr99D%n1eEq$hSTOli)JYZiEkgCDj7c2Touh(Q<+qT`8C!_(cm95*
z*HB(tQC7#}{-RDrMV*|MPzWl4izys;9__aysLfC6wH+4q*HYtNR+2Sg91*+B;Epfb
zNcY%*`ipo6u8;k{X_l(8;k1qRat`Xzx{%v%$f;6J%k$&O>-ow0b*yzYdQwAw6tj%N
zp|0_8E#2T_cqQh$o<MKl5$xEWictD}N7el$e@SYLO5?M##V7E4PJ|;$IlX)FOu%s=
zNaUZ>b@sQlCe3<Zs5Hg9x;tFdTQ(_~&3h>WteGlkSVJnX8wBDPky0CR)!WmLMJweo
z(&}A0@->QK-bzeD*akK|+S7(zqeJc=8-U$UYJ5DlG~JO7A{ZVZK?--V3z-h^qU=MU
zJdc6!{U_<9^U>-w$mc3pIK+`7inLX%yvnNK!nARz_ki01K6!EJkZ()!n)0F@Kcw?^
zrj9U?ESTw}C73Git;du^?Y`<y&;5dWyYSuX7CLlS(HZqv*@6nyCYziD@>E%4K*nWX
z(g+@SuP&NT!-wIg)Xj5|7;DrnVzJA@8LVY15j-r<;Tafv6a(%)IqkO*^Va5czxaN7
zU6Im|>0eT8Hl)Y3SmBc%WpP?K#H(kx7A8`EGi|)@vhn@8F<^hfTkv6f9~MFkEgWTu
zWR5WWCRSaJ&G}-<J9~d&`h5KK)nI=EzLV(PGh<8Wx3^XcRn|G15LTqJ!c?OEWPFay
z+=<+izn%I|@_*q+=+qh9m7J)}WJXn)n-?wnGga*q%to<tAQ0PfBgiNB_%nJ-{u{oF
z=@~w&6;t?U0ot_;c5FLNflEMnO+UDTcK-n6;yPhz{sO%c!Cn3^`bHeByAvgbd73$U
zfVUve-1iwgxc!&^02f91zY0E;M^+mYG{!!p(nJACAYLwZ3d<t)_xSKz=k4*(vr+yS
zaWs_$4L6O)&m-*%Q&m$edzJy!2I03p&`2BfG|%Z8x&lb7Wfratq98{n(}1!t*%y?B
z3ip2FbJs>UElT}_zm;T@ZYz(Bq{cf#i4n}qzzu?NCx4&q)gv^y9Z9P!L2l#~YR0Q7
zJvn1WhA`cv<Ff*i>_3;mU$*@nbY-)Mr(QbU%duMbwVCGLL>q0$sT+{W4<O!s-3I+f
z^*1-BGPBa9fw!BfEZfHo6hR$!azHHZqlyMy{v4Ck4mVe73lr4I>ZmDU@od4X)|Jaj
zgJ4ypVm~psozJuadL~rIW_1Qpau`XjNl3#Oj>K}x(T8Q&5{JRu{Qkgyzgd=aPI^0n
z^^59QCeuVC<Tq2Yk+~a?zDekPs$i28Qq^~btV1(_Er)a_ZYPV9zsjWU7s&C|I(>7I
z)D>aVbY<^QhSXKgKC*VH<#NT<R`z0aUgk)vH|3_}55G^2jIWZ$X<y)2)D-VF9~nm{
zbVP{-Onb8T<F5%j$!1x3dzfx{ZL;mtC85>i#a6}Gvm}+IjHt0D%u(trWooztkcvc#
z51;y;emW3(#b8N_zX6Ybrj})r7Cu!bB=@#E76p^Z`0LJJt=LcE{-IrhqL{5ok}GDh
zA*;v(afZT;xA!m4+;RBn!1R|?Y2QrnUrlcq{{X~c#dq($g)K|ABjn);@K~ShN5}H{
zxJT;N8(ZnQrPDKTwD#Co?Sitw+9bDZ$lvne5CYD32&=gFFGQ^o3O!vrPad-5?BwRZ
zdU+f_zU=+La@SC|F&t=yI=k}Zs?K=t?QHa=d`>>BnRww@aoCwMH7-iP{Y>}VcHOPa
z$7t)3EG&d<zz%;0rhiKIvO002j$&=rt0jWaOCuTLUu-MeW8%zkn8ElvZN%--^FU)P
zWTu9*Ua2KpQHO=?0A_`vh@?oCqW2iNC`Ly+tNWX+jrvW7lN*eR^`0@CHAHa8<f$;n
zv~f$!sM*fJ9DEX>{;rCXRIPBSw_VyqD#kM;j}h5@v5*^uj||@<efp7DQaI8iQQM3V
ziUNfLefaD?2H#)+`+rWgVfe+=80}M;y@S;HJ1X?F^fR=V1XBGIz>)s|*NWsyR!R7P
zM0=8X0oJ(muD<l8W`dID>uJh0rIr}#--N?!xUIJRmtI9kKLB_P#B^}0t2CM#GwJO~
zr7{vnVPa!%htxTLYK4ixB=*O~!{N4Wi8^OoUcZ>hRL)~`-5eV5k}RbRf+4-&DBd4E
zr{b@${$4|uo{G5pct~l>4l!v)^DvPunWAmt3(d+l-<mc%06dOA*Q+j*yY-c_PY}b%
zlv8w$^pJrgw(a_)ZO9~fRbOK_R4kUO)ll1poli{|L2klU8G0pTsIc+^J>?8!RP(q$
z8!q1*wqdcDPfu_-FuJGcqRrfcy&BiA3p%usf=?xH%ap5lCw=&zj;Z<IO!T$9e3vl!
zi}>khR&;levU4Y#WbH1><v!#E*!*-Nq3KVmHYGDpt8vz_5zKhP(>Ryu!6bzcdzwS@
z;B3Bq+jP!oOE{lQAz5;w9NXFg6AHv`MutEPuNjl{@f+>9_#dl&u6ca!gUM8qYS?^k
zHqE$M)u)LfkA2FhP|^Si+_6*V+wi?AxnE6m+vJ}ocTv^H&odGtr8KJ?h{dEVRb*k!
zRg?mwiwEP-pVh4!e>sjCdroBLv%Gtjtp#_GA#Kt|eYnm7@5%Q&2J`&AP<=nrUZCkp
z&q`bNX{c`6+D3^(9zZ(HviR8#&wvAUDVV`cOCt@urYh#9+557}D6N@QVk~!vabw>A
zY&Ilq)u&AQ_ou1Y{s~V`>gpD206w1167_15aM=Qf`ce+($N-JE=&MWAO>tt~S?J(i
zEMe_;M5{VX>&UkbM!0VPc_59C{&HHsT2D2YY{jG={g#ZDd8*qe8NBW}1MLI)dF9n>
z6OPq+S(_mh2<b!{^J9v?Y@}>>lvF54U6r>XJbzBVpQokQ^A_^B${8$dk;RSo$7Wdj
zJ4puRJDm0fNMq#qZHM2c1JoZ<WyMF2pAA>(EMf#br;Cczc@Y%L6z7idp(KO$@ymZ3
z9*NApyXjMQ7-p}XiZ_NDqG+SAmN*g4xk0<vW4PSx7<9<`0r*0-rm(eh8jjr5-s`lP
z3U17s!z^Ssw{Of|K?wVTe{Yu`0r+F|ry(OvJsQ>`nFGIavOg9l^=|uz+#ewM>f>MV
z-|1y5$dxqp8KH0jFt1y<#O?`DskqqqApCSs#QGsa6>h8G@OaAgQ)iMXApnHf6Uh}v
zB6e-JJ~}Dh(i(cy5Y-w>74?0-26*a6xN-u9-H+<zPtN}U_vk+l;hRxu`xzYO5vao*
z$)4npDOiL=ZM15iU|2T8f0NaI^m^9-EH+x9uJxm7A>6R%zz!#Ez5f8$_UfZo)SX2Y
zG{~$Dv}ssZxEb4X#fLDe2IpXRJ9+l)*3tbB^(z6Lz+$ad)cSc1no*^g=dlXd9LC4>
zo%i4N{{SxC9J1J*C4Ly0?BjA52n8M+mBCWOVn-L>Zy#^B>iOtjm(zM@A7ZtsZAU^X
z@&dveK#9G@m3R&JAo(BFx}@d(QpDq~)Wn*1slj!dY^9ls!`nl@vzLxXZTx%>^>rhr
zwdNlEn=>w<SnEt03Q2H+wk<{&29LU<a9hCGjsF1Evo}=Td!807QOD#pRX8lj&Ekd4
zrLL{HSvjErHz)79o-M_C2zrT{#7{z|pKP8xFB|O~)^1ADU5Uql<e(}>{{XNXj)z$t
zRgR;LgBep31<?-mBfJ$mQ4NO7ihQb#wopFJ{ehrU)A_FarnC{Yn=%H5cbJe7$!`(=
z04M<b@BaYXtArYVS7Sfsjg^R$VHy)KiHZ@=Vn0&-zU1{Eo=;GxN`}6hl>JAwuT_b5
zW4J#gcs_pK4?_ID2U%Iv7>b$9e1m%`mFg@K*&gTOttQ8B3+1=m5AD{{v_^_uJ?Z==
zz^fharAa4a8pct$1~Kqo)OO?jdIf4p@{-{$*@731Y~Gf#$|Nfxf+iu^8N4EpJ_pB1
zMzOVQ-L(W(WNL_HS!vBFSV0^)Ow%^+zz;to`gBnAs|hssF((C$ryS9=SLeTCKHSA6
zlYpEFM;g(xtM+$2!n?lRHyURm_40X0sQpE|bTR(`PbG<C?Jc``Iijyu=^@|R1V(le
zN_Snk?7cD?dRAr0>HKww;-0P!8`cnz<_l6-L}gY_`i;0HkIvF^@wZfOQStHRvG<?q
zBP|q@LnLvP4N}Cazw+hD-J|8pZ`?`!y3%i}7(8|_)Lgb(DFkcEmDHzgVg)2|7SbV*
ze6mT$Ve{jyZLg`mpy};vR@IBmk<(J;ZP&Sop2TJgc<DI`?5M<uxUgT4K~^EUWPJ|J
zTFPq6m~3p6;KE;?dF82Gs7iF*)>n$usz4Eq_?^AdZmIfu+J~l)u066@%WV}}(PCmu
zc$2t4CP~&5tqeeN?>$DENe14;WJAZ8>5HXyrJK15ki}jqnG01f<g1ARh8MU004<M|
z(0PsrsJ*9e06E{|ZkgR1S{z<S6qGH<5*1Mta!(AZYcSwy%`=_lXi{cQP2|TQ{#9O_
ze@L;h+Ld9JIN-AlhXQ*rv~b3Z`wI5OaqI+yF}YG-lJY;z*2l56tK+e-QL3SPNRmfc
z<Uu^rGJBNB{?~Urz;DuhyY24jcJ+tW`&t`T=zI(^rZT>z`k^g+s(lVd%#|4l80nZY
z&BQmlK3kAOrsKy<j-9zaLl;W*g$XU1(Mu@;FeEafy2uP?Z-eLh^&PFWh9g|*oW7UK
z*GVd5>dy)itGqS}F^bG&4a2Ov5;ylQ>U=Tl%(to@hx&hn*0~G1lG7~sOx4VBISAy(
zy$PyKG({e%W#)U)Y=k$#ALi;Rub}laO>0x=2rkUjN-|8gAT(?-UQXgAa#;el<8Ryi
z-B$G%rWv}FvL^OxCN7vhu+@&tl#&-w?H+6mwjV0Hl0Fx1uNqq?oYXpZ1qC~q>$x0Q
zN>WQf*i(@dh|0<X``vdTkH?;lH8g*!cCn9A9J@hcHWO8>vOH2u(XnM#aO0aX3=ZYL
z)79?@WAPD7w~WnAVy+^zK_MvQF_nC*uD>J}9DFyo=lQy%>hD;85;}5tskf&%T|)#C
z`?Gg)J@;5a2Xu_b%a?C;KRa^$`o8J?Q|jJoxy<TJ6KU)_Mc=JbqR%WBIDo}T+yza*
zP&e>A?a=y<;>RspTFInx*t4ctJBwE%k#Y1BA_f5Wdt+_e#^d#L!(*v>la{Xe{C1<m
z*@o*YQGzQ`S%42~Aqoj(RUfy5`*qEAyX9wHH>~3B&s?HSI#;m`Lg(PLzy&`m<6+1A
z$D&pnP1mbDH*tecWb2sG;763&b>jWS0a;hKCz1ml{Ev^fM9R9;Ok-JQOeJeKoOb7q
z3c-Yp%z&i_AiEE1YzEvs@A`VX<6_de>|L9e^!&8uXyq#Iq9Qz<-?k3zx7~m5*LEvK
zVlBeYM#Lg&<v6246N3ikB#|Tq8UFxI=kLSMR*YP4hsa0VZ8LH~8+NL_6z-kK4nsyx
z`?%b0qsPZq*|PXCGgr#tH2q{mSy0!AZzB++b=%Jq_g#VNe;026$uX^k#7?M-H{~i#
zFhv}Tv7G@;l<q&HY<JtN>RO9U$;>tqtyXSr%ju~qGb!DJ6_0g4*J8duS6|9<_O#A_
z#x9E&(^M<M6_z<+O~DN!_ed(t3G%D)zn!*jh`7Amw{k}xlZWbB%Lb0kaFQb(fPj;~
zBM%X_;1l=r)8*(kn5Tsz{{Yabl?pUbr-8n}Zd7~^Bfuc^{$uQ0%*9$eX)?rBU899u
zqQ@BksR^(H+xO^~PLPRE<Hg1RaFtj##g70%-A4pFc=_95(Vt6tcKk+DB{D#ayy_&9
zm-p}O;Tv)JVdQutf4|Q}VTIHc@vSRC8A)Xe8A1z$-+jDouqX8c_Wo|Z*P(lHUWIm!
z(?rufi4k^9fh@dvcwPShPwYRNp{{Ews3=wvUcYH1`hq|KqE=9MP!J7{;BDi5i+%c|
z>fFu!RBAl*<C37QB_S|~hl$t(+FgJlJZ^ulR1B_|)7ZT?0JUOE6W*OTP=N%JS$)7f
z;!<}5+<p{se?4X3G<#5rJf4};a94mo3FnwhvhsFQ{{TWVc<gsOY(C#VJrN<3)GTdF
zr74S6)z#pdW|9P~fmd;{MdaHbKP3L2w_NPy{YA;*jU>V|&m@9K<>4%?8xPeX00K}S
zpSQc?uDe=uTI6Z0I;x%Aj3}s)&*c#Yz=bM&gd1!Q<NpA^QdqxF7JPY+H<__rp)2$i
z(zL4e3+@tFrbXrB?pXXcQ<{6xTUkq8%;sMdD*#aB?b|7DweY`|PWza~+mAng)718p
z^h-}>F60*7I_n)<QpZ~DjexTfu@P8VScV+8Uk&m+^=`B1ev62h=4kCIHyI?T$SPNu
zA8LQ3@5grd>VcZ0qqK~#hRIj1w|h-3CZ}d8eVa3g%E!NMK&Nr%amfAKp|($3`a@Ep
z(Yvg&nCzqxw5g1?V=Ez2dBmS2sg0O_O~1|71%Fo-JXp!-ZBp0KgH>E^Lb6Q_6XgeQ
z+6xVbgZm$MSeidr<xZ)KHK{6BdAk!hnl|GE^Lc1wSl@9N<VVKCev6)~ddDscPExsD
zB%A5dwTYmPUfRv|1!HjTd=x<2@B1%7zfk>b$Jxc##p0xc>sGCJ;I9_UAbVU4lFSE%
z-;-=lpYwGi>CdE^Z`2u0{Zn_bXhIyGU6?OI#a*lWU0JLL_bcpFgU79hdUN=8^ur}y
z3}~BEX<8A|Yb}*n>RiU%c!~o(shf9=WA>5(;!u0IsOn=q_+ifAtvtFyOh#Uc%_c_H
zTOW0<WNtu;tsT$das_Mmq>L{F1Vpd=ya84S*T_qtuidSYT&y{ZuXVS`RJR`mn#5B5
zC5r~S$0f;ZFPM-X++3T$Db$8zAr-juMff#blrT*3u<f~@gTro0a<c6)6BZvoq>i-L
z_`rn6EZE96yG?FZj87=+_V$a6k{4#$INNi`gRo`mQ+e-0a@i&RAiB+%+mXOjTNb(T
zJa>hV9f2dmZ?Qc&UxQyq)6@GG^ZC4llL;B@JeP~}nAD7j>qP|a6iUwPyEnEaUPs;5
z)39-jtoXTc$^1ykJv(hmrx>2Ze7V-44|8RuiEB=uQ9A<D$r|p;ild|Nr1}q5>O8im
zglS8%WhSqWg4J@fwYbwEa>O|FW?%mRWf4@fGZxsYFC`l{PA-}B$64c-7Zv?0k;+QT
zUOw7KBv~nu(ToK4figd0K0uwF0<U-4%htj@2-4Hz?o>>a7a*1%eU`UbW|=FsbZIep
zxQjG*pyc}`u^vd{i+<fWw|jkw5=!qR3lyQ%Nd^egGDOuuL9+ln1=xT$P4?L9Tk)f(
z-sh)~WHla2{d|Us)p$)qp|u7w-|;4utyzwn(3Z`QHmEut5_l31xb}wWtNb&1nOD@U
z9W=Q6e^XloV#3cTMnIV=ajZ6yvp)Um!bKpk+y+gz==C+z4rZMw;%QRkHX6mC+=W>y
zqC64Est;&NzZ>n$55HP({B>vzN0Rk&>C;%eb<7pKT}$~`FV`=NT)n!hEbVu-A95*V
z3El1vLwP?=`}CDy@i~npr1JWMMc2+tdlw8-UemaCu%l*XTN2Wa(E?bN!)yQ?m-&44
zTB+);N7g!`#(M_;09S#vUmq<TbF4WT>a&=|yUG|edp6<Khquaw>hI|#1kpL&M(HKu
zm!(^{%27`g6)1_gfkeeA9`^a)@Av76tCG!MHS!OVwMqeGp<6WsvBP3L!iBR{+hiw?
z>K@U*FP@GVOB$K4FZ^JXvRG?Xm1$%#5=`>P!X-snV{aU{W%1zby-nqG#zI^bk@VFl
zU~0QdEsTd6mgk5P+*Wy;d8FHk0Dqfp`iRl`*FAJPMU<m&Lt{fE^yH_?NMwxh5T&Ia
z0Z`m`+-~2ipw7G0K8gB+Zq5Ta=@Eu^nW39e+O(7?Mupf5vDgR<NUAq(8!+Mew&;nZ
z{S?#Qis{OluNx(-uARv=QsM04GWP9Pfq6FZa<F)|z^}pbr~CDN&1-!ptbc}At4=D7
z5{Q1TSkt>^a-_6nN}lLq8_xRz$M3gROhRd#RIpEN>tbcGDp!hYGer$fB!C-&A0f8`
z#>e%3p09ZMG|S|!!GwmMU8vU&a(a&xp=ES(9zM`x^1gTS0+01|1(Me~PYG_EdV?s`
zFA4tu<tr_gX<>_!s|evm3frJickp|g{kpx|*Lrr_RlSAH!EVL4Rw(PkDu<TF+}uqG
zRoH-_gWw(u$K-S@%Vx3|Jx2~wo<SZ$%O*}M1w?HUusynyoyPuoFUN%;y!GBaQNrae
z+LGm?iAm$#W4qm!X%zy8`e7Q8$njn~{{WRo-=WmLuS0t)HC+ps)l*-nuNkpED%2P$
z2Oo56F-}s&<H7#Bex9v5LJw5+Ewr^JwZo>|h{r>VvkWzplJXMGDIPZRclbN;<=2M#
zUY%+G08sQ~z5H!j`i@9syY=*n!(m=+xx=(yqlja<+j1M*<E6hqdMmB5UaO28tB}50
z!>&K;5VTDz@ONQ2*!KHDKvrG%8!yjW^7I4n*^$I!1>HyyUeYOHC{f!yRf3}KRTs|N
zdHCtqtLW`qZa%%b3rGnV2cAM5fY^Js^Rn;beYX9&vuTY7TPGycZcj#(eAz{Wa0;&@
zyM+tk_g%lY>FAGRLgO14wdvTCBq+|R);Sa0VIc~zjfWLGd>{N>NZ7>7fhi@#W{$)$
z1b`0^DIV6@RP7-r%I)W@e06@r83CB>R)v^8J0}u`XN-VWIX?T3XdeTh->LapmLbQ}
zt3oMlChRP#6<6D0S;UGr0C^n<XQuhN<kR?xW22asxS7xt4I@awOjM(0*_e{sAGZGh
zvFNLf^t&S^IuXf;!q1zlSobT<Ixb?AM~IfyfHw;vPED}fjlJDoeLwU!TVI{EAEmh%
zvhXC6Et;9w*Z6<dhG^P8QWLzUMJflz{CM?F$=TP~EpRRRx1=xQaTr!KBBxTIhK5f4
zhU`ZxSU^?K7UjS4gXgV^VDug;j*6p{?&;k;aX8kyX1(|qC0{3vvW8$I$wmoVN(dVh
zu-ko?p$??hdJ8Lnk0FZ0PI6S`-qaIRb)h|OTovGDQMTI_<K%7bKc}Jn%laoNiCoo4
zt7K+|w5v3DxMgS}bqrb;ciW7r7s&V=dJ>$rKJChxJVhsp2%wTGP@3K9xfxjY<XM(Y
zxio&jNc($^{@$zibNMM#BMwrW(=~a>qPS>fuk_XebKY_rVv3`1J`Wy%TC!aBVq13M
z)maJ^a$T4K03`q0+Qm^iR!3Fh?7ri?=hz*#@Ol@|>E@%c6gF_WqQ&{%Na4SbwP;Bs
zOsu8m?oQjm7+w9O4gTI+p$2zMYPHqXg$&-F)GWwjnrW$5wA7VZl(A^kt8N1iBzfQ5
z-A-A;TEqA5U@rEW%u&*wmDuXkq-{s;vt}OVz=7^42gd8x6|Gg4r;9Y}RH=~4EFl9&
zli<%UBgy{&G;D~VZ^!*V@6bor-&OrZUrv6H8>RA@$=;l``}%oKLD&e-9h5fUhYUV`
zdPuzo^`}~D9cfEV>rFYTu+M%<xNDd4HD{{?J4zBKy=HI`AP6>Kq`5!K*Q+l<^rnc?
zweXncjx4QuSlX6m?$$?u%`9mB*ru`>`6zMi*bTpKw#Vs<dVQ%h<=gb_wksW0v5w(r
zLw9V%7^x5x+p=I@PZu4?`E8q_ZnpG|T0CyG!DX^Z*q`f4BixR7>LkToewj!}(Du2i
z0mXR@y`j1uY5ijtu3A>i^l4)A6zS?5mJS@G)Sl$BWO2@6%3qE&fEebAO6?SqE4L=!
zX5-ZZ)c*iWVXLJr$e60tg;?Y@-lT4kA>7E*d%*3fVZX-yJoV-8@x7=r&}1@|tIHhs
zEB6wbOgPuEBvOGA&noT~yDr?f{{Si9&yJQYGp{DNnk!G+y(AigCb1jUD%OfBD#;LP
z^=ZrcRvvy?-GC#{$6H<WH`BXXl9g$u!3><t`?XqvYbILI`|ZtPrj=q50^97X?+eG=
zy=}YF9XF&jww}+-FgP0-mzo+AOk%3J3b6kG%gPEx$+m@+71bAXD!%SJD|G348x~ro
zp%Pf7LTIw^tfE@fR~vC`Kxg_e@}mXFMq?}jjx7-6zU|wmmrdaYRz%L^=*39bjpJ-=
zuq0CL6A0gJpOT`GfPi)-yz=OR*xJiGTZ$-Rv|~s<p<-U(fghT{0{c*ME$#zx%dAaA
zVXcM8Tb<>Oy&CdltTc7LsoE+EY{<Qs*OZ@^5!@%efY=kh&E|vgbNJ!V`P{Ch(Hb*T
zQksTaR{sDH$kd~U%&t2hYOPeK4A`6YRpM2#Dt8$N1&x)*Y!lOY_#6CSdR3vlI<2NN
zd0OKm6f+LoOCXaea&|U7vk3N*a>aMs{KtPgY~bjxUGtaqE?ZYt(;2&%tcBSklN|Qt
zfqs$SbZXt`%2+VQ{Nn@M0V*~fQ#JRl8gJFTPnN`JYn1BV((5%nRXmhsxg?XzAO7s7
z+~6Bq?JR@GormcIszuB!_zgRzwQgStN)0b>Ct)(zEXy7?<RN1*#~hTTPUk6{Zyw`_
z9JltjNq4H=jp=PkmAC3wqFGd}b!=C)sj-;iVTw?aag`T<RIz7=>fo*M?)Q7T{iN_W
zBU@P9wxY&McG!w&ara;in`>4G6;cI^{&j^Rc*Y5R{@oFv^v~4KN^fJb+PYV>qqPQ3
zym?%8DlhzMgjd3IziyLCMQBxcmu3mLW%l@Pt{VL-;-Z=K2C>w6+;mMDuV$rXSZnO6
z{1O>eO@`!nmQ_C+ZPH`vr{ZH>OEfxvE1ooQwWo@;i+QYV*rhRsRC1K13T1M4_k3<o
zyP}Vyp1$flo^u=NwhFcywwt{aO_k415$#&;D=W(tNeYxKtj+gsM{j88;p-MMENWE5
z!T$gX+tcnl(o0S`?c{Ag0A3?iNm!TyK?B?E;(7sNH80`{lI1kMnzLt8>1?v?i^1v&
z_F;!g#8`y>7Xqfim=@R*zeFrHGuA9Hp$<<*XQDueR>U-khRoAAMSAkg4%^6}jkX}C
zyQXgq=+>at7+<f{6tfXWM8g!AX312uZdRHy##0bw;Gv6=vHt)`=#`~?0?|0kWyy5C
zonxbNFvPhGu3FYjp{tSYbLU>YW>*uE8HW&m`nrqEUi9|Z$78YioQ<tf+z^`=H0_+7
zx>ds%+S;ImlxEpVv;DtMhU)ikPU8JOhO9cf9Y#7CMu{<*JJpSQ**i86Ij>GQ)OPF!
zWg<Pm1Lv&h^gfWyG<Im_>)E4Qwznux8AeMI`_>s1zE11!A&B<)>iLKC&q7I%vn>ok
zNmig1=Iuo-7}a-p!U+|I?7Ye2?FGM^?egwV352l}Qep&EXGDgy)v+>6wR?k%bJ&q1
z0!~Bxqiw$H)#>iyaTOB8h8b4%AXd-trsD6#%SAFn<z!+u3-U+F>-jEdFH?PO(v&Bs
zUnP?N0Nl*NNu4ZNX~H276ErgOm&c9v+z*bnarig&lT~V*jGD(=;;Naj4;Za1kFc{W
zZk%k%6@=v;{G0Xo1%2Im9`vtVMe5}YO{{t#$8w4ra91naLmLGw8Bd>Z-*9#XM%#hY
zl5toZRy4@6{Xu9}1%g?*0K?ijF8gosu^%U_DKws+p!j@~ScYZDNjbY(NsLKV3hY7J
zK*w)CKl%Li&K+-}w7t`hitTYpgp$Nxc?47c0F-P#Hc~u(ul{ckdY{$&C5V2f9fp?a
z8?Sa~=6J{lV8wSIe~%k~FI78PZDW(hoTR#b^=xG*d%sIcHJWMCPlFPcCvvBC2gma$
z>X)aq#wSwhI}qe@mg!PJC10tt98#px@$R54=Zd-F;CNrvxE|h_I=VkWX`MY&_(q4D
zF{>Wq!wzxeT)?Xj9nwH5?(oRoJ3+Pt{kq|T!eFaraf4Cj+Z`rtUhZzyimQdpBz?mg
zQjTd)wM1jy03Z){x2v*xv8S|-A2X$NwjQ<~8d>6%FRV7Q2UO$4$Mb96Nfj$xRy>K@
z-MHvqlJyHs=5V(2c^vj(GL~|7Y9^S@VjkuqI8_XDT83#0IV_&-{{VLO`+Ba)>pDK1
z{{V*O&eww$$Px<D$2B?w8k3-rEX5>c-cCVw17YX-^;`b{iLYDbi-OK3rY?CUgt*Jn
ztzKUIVnNx4W6c$#Ra6IWKlpkc*L^`AVy-dk<>}MPWI4LxMPrSlMH}g3y?WmKLP({U
zgXO+<>kCy<{{V(83vlarR<m6CD(P}5WPuEF4ZCX;U?(CsJ9q>2b)T4<R$yUF#ebpJ
z$(WKV^QIp9R;LV7C@9JokcoVicKJOG^=5X~4%G-XRt`L7DrakWm?;=+SczwgYdGdm
z6|78MRI6?JdIkRg#n9OoriV}6f(rI)!Bkm;>1pNDyvAzcK77{7Me;WCe@|Wa^0^|G
z4xaS!g-dWVRhu15;?>?Tc*S~YNM-?#oxaQao`bEb=v7Kl>n%T#ud1an#f`_yRN2Hu
zByL2HT7CG+L~1?GRG;hTtel<ABdMszJeV6)?@eR!Rtv0h7a9^f%(dx;5Jn1**2DF6
zRJ~@_9W2yX?9;<qW>`(Nb(V@ofu!v>4l+r|`BSk6`MRzyN`&_*%N{1Z+fg_*>?CxR
zD~JlW)!de2<@PGufLr{(9c?qw4S61Nyt1wOuPiWDX(Hq$mxW5|jV*$%K}K=9ug1&P
z(|--Qui`$RYU_Gpx@EH(Zr!ZY((cCr$n66gqFGs38GwuO&~B)a1@gzILrGY}YYhci
zp$io}o;7&KGdf<cUQ{OChHuo;3C)4Qw&VJJtIz{ZR>OK#Lrvr1!~H%VH>$E&oOP;j
z3iPz|32e`et$MTsb`sYnlau**EOKH==yCX@*EDJAJdNZ^)z%2+?!gR-ptHrq@<u+^
zCzp>0uMS_1k5aGYJyp3|O*C@iDA+SY5S7=nd8Co1iJCBXG0n?m@-}WKrC&j6ooQD!
z7F5x*a@$CdRfdgk&9{ETEKyX5INC5~W#=O7LU>Qr*QuYv%=qmk=%$~~>j~;t$<1C9
zm9I?Lzh&-Pk~qyuvL?@A*1?Sms3={yF8enZD@L{RtTm*PNZ-REyNZJq2W4zelD7qF
z_HqK$$^QW4$g|50ZcGBi1MwaQTM_gv^yabk3pJ+}3)M2(B0N>hor)6J6ky0@X?;v}
zKB11;1N0TRkje?;++ZDm>AUpPC^c4}t*PayZmhR1<>QVElC!||YGo&ay}F(T^=2@~
zDi#V^A8n4^5}n(=RU9cu(YA1n9`d^>D51Hd@kRijVc+-PueYtiLWRtQuHmanrHhuk
zQAs?e3b4G>cHoIwr+(aQ0w^rR?4xtHUUU6k(O#VTi|R&GQRuBBqqP1@GF_#mf}U|h
zQ^wOX<FZ*CJdr=}qx946<Zc>PU|W)f=#lt&{6BhYl9t|KaP{zc9WjuS1{v6yp_Rzm
z##;*%owF@rWVB(O2s>=OI~Jv%6PK<0DY9MXuOdZC3yRo@%`nLY;0g$Ft0wHMrGvM}
zLb}-;by)KA)zLFzu2{Q1Vtnnob&`foSoY>xT5~CpW06@@xex9wze8;9Q&(KOIL*&y
zjaw^Z+Q-zkmod_-!X<|#40a`zt;C=xK$`_+Wj@}!bdBupCXmYIG{tQ}ilFmBPUJQ5
zl+wWM8GfE&LR9)$b}7Fo%duY{&DMTT7jI5t;q|VLM@8UijD#tV)A>P7?c2|z3liPH
zb3ainx{&sxn0XsWr{}1fy3<n09o=lcpX$7(FH+AJ@tr|{Y>lW=c@#uvyA{zPj)>cG
zs=Pe>aq81k)9p*1!o#g;WAj!h!yh<kW3OVEMyf#NPj1<c$pL>UlsC8h-3;^onCSg&
zYAAKT{8Mu&9Oz-IkZg5Kja3_QjaC@Xec16|J8$}%&rDyW--qu@biO60qky%D$z>;K
zq#5yFFJMWqDQ#q3-mO+Vf9ajNX0RH6LSyTu*o+nIo;7SV)~!=34L6QI465-Kl0xsx
z-M;?s-Fl^J3pKGb*s}&dPiCi(!zA<4TNwARG^I>&J$o?mk+`<QkF@>zr{_IG#8j57
z_9^8%@Xr%XK1U`d#9^f1IM#S$jI2?+gUfD1&-uEn>aS8psfLt!oGunTOo=JCm%`_3
zR?JjW8RL<BdAkt$6-3>c#^qOW^UzXXS@o6=9S%E7>--O?n2DgtRfiXj$VG;|hGy<|
zY@CK<n70f3$j6cWE62}WpNDNptMskhrh6~x?o#y(vrT5kv4cK!7PqTrG-D%Ag>9PN
zAzbYr1qf5)e;qKx);zzdk<E$A={&xqr(Sy!RIy%-YWFa4tOa7dlBG1EvGO|zz}T4;
zShw?Zs%qOFxaqdJkM!#wl)aX-Zc9^F$|$XasZD&>7V>QiB+;+miRJviZThij?^3@P
zbm8Z!vN~3*^Fm_&6NSdtFyzny5(zD-Egf_|#$uqb=&4^&`p=uCEgKYdCT{tKmc@KM
zcx!zGd!A8JyC7g;1hL$Zc3wwrqAL1%lzFj|<ug8_WAb)yv=LIwYgiJySpZ1s!k}o^
zZODu{QhR(r>%hfrQ|Yz(lh?*IYN6{%H!`)&n<?R5S9U&seZ0tgbVu}heI0uro3ntL
zQ7tuSa;0Ee)unc30pi@Sk~Rex5%)cA7x1I4>tl4qYM8Yqqe#hRE2}Jv@B<CT<Mnjy
zWPL)_oNHrr);Z&|eL0+!nA&$lQh2kI?N>W_`+uvhtuc(%8jgDw^ErF@2`D_Pau_9&
zJZ3*(u`e?KC6jVb5x4{Tb!TWX>uxM`s*=lT<A_eN83b|eHjyVIh)u@Cor&u7`ol};
zcr8<(>8@L2QmtCmAzLqL<=8t~k<j)a4a9G-+pN64tkgEF>G(8+kIwxt`HUyf%Ptce
z7%V5fmB|7a_CP|B#H145M;@-YiyBEMqb^4M3iHJ$e#5>Fd9s%-R%qpxy(UV`vDCFG
z@!aA#Atd#YtbIsoxh+e3Lq(0yF+DnRX1|75sBh(Eaw3jPX4`fvylf=cClCjp+o0}S
zSN{OTi~Kfxy%^%g*Rzl#Cj7EiCcK@MlM!l1-bmfzXOR%?vv~IL>#OK8IZQ1qnYT(T
zWlVxt$Jp;!iWt(gVm5~DC>h+Am&o^SO8dRNbG??eC7iLhLl+)BDah1p!D*@7UepRK
zPVXx>31o6gENEGN{bAuXt(*1l<M3K56D=%!6zS(OxghxqHQL7Axg)<|cO~T;c~n8~
zZ`IZgpr09Q4_{B%&^ShzPb{jfDaSOf<GmHB*`jHCyivGGzZK=jZmO6&dZ#m|7VaYD
zZ40OA<o^JO%On--XTG*-UC5f1{{X0BB~uhk+W-|vRs)w=H7Hoscq!+;@<onJq;Fdx
zm|5q^W^I{CE04g_ff|fS307d<$pbID$59r&E6wEbxr%rjdZPy|gpf77COY_@7KoP*
z20T9<_yXm_Hx>77)J_(jsnK))01nr*@p;*1i7V5~*sl3Ji0#F?psupW(dC{VC&@v#
z>Djz>X2AMA9<Io~PbG}WEG;vdbBomJaYl$dj<hD78j%nXhF%2nZJ*CYsW8|LA=zxa
zQ)H>YDdXx@N$SUn!q@Ot?2j^&lANQvO({LJ<Ky|dsq4>8y%*Q=WNB5@b@FgR7KDdS
z)T=$n)<}f1LkuemNGB}I`Z%+vzivGMGM<^&^w+hEpAnav30R5KHrNU3=CQHtor-y+
zj6c<5*-G)WyKqM3C+X`mO?o$h)OguqdBiVbd3*88ILp~+tUQw<(?(`;ts5s94&aaU
zj-qn^0EVno8jBmJ7D}||%^)~R5TyxjUYd|CblIq?4RDbbcioE-@DEkY$Kk)$-6;l3
zFX;wKoEIZmp|ejZKk(XD<dbDs<A|Q%yyZ|wu;N$Rquswm-$K6=ABf+>mN?-(QS^sZ
z(~eHfyBXYVc{0%5PGzmfR;D}`Mf+^a$ozTtba3?3{86Z3A<fadgw+(*JmzBF8FJcV
zJ&n08CTXRy+A+<I3pq!YP9Z!nV85%b?N9zE`Znc!-V(+iQC799F*47Nj%+QIX|h<_
z=`DLyVU<0%gk=~gB0$T>W4BtW{7QZ^J#_UG1h{<}Pdp*b<EEjKy_d(y4cNkljc=_u
z(TsK**n5sZ*;{qx*0XT`0ESOib(HU^(!K1xMz<lkJ(<Ffs{Jf2)C^@wq@TD^omY~N
z18%PPe?T=>2P;<f`!Qk`l<Ozc)#kG+#+|oxX$R#<6tE5QrMLe8FIzP9d+{Tpbe+8i
zq_rh_@V*{u&_hyEO;))Qve9;$U5Vn}{lMJ2Z~D4*{V4odXnAw_n)z)}jH5~$nHlY5
ztjS&pyW-qa%T|)H<ve1|itvU_`9HBsI=5Q~{38A+G&EYv)On!48^w~<YjiEHHKese
z=&R$_c!R$3g3-u)k6{6NdbFqFPtwc_+|q|oV+~v`QfjnXe2r*ltp-9%85k<tS!5f?
zs7X>h@Ksd}$aTwDelk54(zDl)W&Z$-S(2@VyB-yyy=NN&Mr~EGX1q#F*w>LJ{{WbX
zkqd@NLN`Y&PpaBKQfZw(sc>0}_GoIXd9{RXpOo`S6m4E9<d1cxOGy~sO@oHrcL#p8
zn!kiSPlWX=)D3B*GkCg`HC}4Yq>i+9(;=9{S?w^tr>o*2Zrz+W)X629?HG-f?C>D$
zr=pIO^zT=_k%_gb>QYvj(<N^wfuokA5-KajF=v(9TE@ZP2Za_Jj^5si*?&&5(bUJ~
zVtU6ZZ+vxImvQqZGaQw<#a3pi9KhL;;Z~AE+g{Gz?+esDx|UZLUk9x7ewgIwf6P3W
z@G@DznsU))H=8eGB3Vo^MA4U4QYG#MP5kv4W7A(v7M*Kaha;!%;|?{a6sRUnyX5Fl
zw97SZIdY>%B*_p$@+@1B18=fFSXo^a=>|&fJ13-lJjz{yV^&1UWwMs^?hb67IKSmB
zRi64P<#I8?{$s7nbH6?Kw^`Y*PU=z1;B$G*z6yJqf-0$|yIN-ynqN-Utjf3a{y!rn
ziDAE(n&a}ZFp(y+@fR)|bWxMh%}eynmc#3}!$n$SJseh;fh%G1FJXYOOF2UHGs0tu
zxd^~2ptrp3<Eo~hY?hyz-YWj7$J#bNnypzfI7~$}#AMW|ow78lPZe=Y@*TN@lOpnE
z9#^5pCo}1012>k3F|0i=!|RhHrb`v5OV`3#f@TpMrYA3YP*yfqEEXp`Kg-nSb5-ej
zscMydM@F+_DfWfRbT8MhYOzBlQx$}YRk2B>3)z@356hPxgEhTK)LOO(B+-|k%GiRd
zrMYm~%CAM18NKFd=Eoy7{$5;riiQ4OyR*YwE7r+s{aAF?BF(~|#I55fWU*CYs|Pz$
z<gC@WjZPj6zUe?g$y@a@_1>PF>77+oH6CM7VKsgRDLYj1bTT$9VK3Hc3d34FWOmdE
zq>y`E)wUm}sVe@MdSxx6Eo}o09L(HliK{T43-#4|ajaQC%u^v@!DE#Ihh4hOn@oCg
z*wd%-*w|}iezEGe*5jZ^qK*_cXM{Zw63(%rsPVfqKMxyq6`ZkNt_LEIrt1A{bQ1$Z
zF^<0Y<9OJG&N_t5j`G}9iDPGpWmZt;K4F)tCXN39+pR-d&I4QN9VbRzJz|}xa#~7Q
zANXRHuWZpbClIZ;19E9>7vFGLow;>U%<4S;fWh9Q>PN3{sxdKInAgbJmrrFO#j8ZN
z;DELw{119T%)D@cxm5#xE7f00dZ&T)FCF!I6E$O2>pH7;jR~6s*lP~cOkM3uR^pXv
z&XMiiclvnUloxJYTITfw)R?ei1z%rh{V4SV9gs-o#cFLHf*+u-#f@6|icm?9vsTm+
z2XgGr@c>lq)t?!ye-YEK(#K~?Yi7q}>y1gD7@8OANl{Qu5Aacll^Jcv9^aWNvc<&r
zO7;A`tHY?7zlgjSI}oEGidQDLXnAI0n<_7#9|SM<>vn(PmH2$tUZ!cxj-$@vMjuJp
z$U@zWv1p{qQ+D<}7zy2?Vu#2(uwXd+^}oFlqI3_Z@yAW)rm#XCp0rfi;#C_eMrDX`
zSxfO7u}~NK^hj+@fu01pB(h$bw35k>#N$fE0>l$%HyL>MT~w<bkI(7oiCZF*;i|-M
zKVsw!H7pGYAgtD4G4cs+;-E1(ep{FK@Lq?I>kJh;*Q;e9u=!fm+p(pv^ln*}Rpb(_
zySHOQy)}@Z5wK?C=c_!FG<I_lM!lJ5T$0TViLTX#Y+E_888@-7Tq_wIIxhnw4VQjh
zbW@s!ouz9aqIQw28+J_g(!K1iFBH+OY+@~WU2Cj?yeRQ7jaQd5)TG&rtk{3!6XvPl
zsA1=V*cXb_$mAf&PEjgN9^k`WJF%h;GC1+es%u?*r10XGCtnKbdTCcHmo)7~Y6J%K
zn0mt5-b&8Er*mVod=NUgTc;K5mLTHsc&iXnl`3jmcFlFQTDC$4NNr=+gGY;DD2&E-
zZc(=USC>!~u@xw0CXz@p7|douztP{r<s#t~dv)ByPW+8U(}PLL7%yk@30L3k)rV8*
z9XF_Oa%ukn_+ovTk_5SontImiTv<h_vNi0i%Eo4mM=n9i4`U8Jcj<^S@@0+&wvlP3
ziYTLNa6B}ty#TStTP!VPiG~E^iXGTHB(X5|gVoy?fz-QOitxkIhR#AtQMKl5^^0am
z*gUCzIMGaln+?Y(M=x(w>>d*lPMv&BtJmIq7H;}pqeUJX_LfJIO4MbJaER3Ynstyk
z8?fD!l^qzdH+0=hgiB8yUbaxEHUt#}HcmR%-ca(fa_p<dKm#dIJ_ke^)UIRN7m9{b
z>LW`vYZ1eG)=w7=kU*Wtlqrm?Y^Sz3lvD2NCdQ14?w)Dt=<nin^56cnyK*Ynxh(?k
zB&HbWvLsm~9Cu^n`%fX`p)MoSiE#%Ze@<zO%J~q|M-FE#9lV`*Vn>2%xbE!4K{AwA
zM_^V|*-IW<^^>Xa*_@5~$}nOKbxzz?gfJzDPBu9CKh{P9OyOC$0Z(FNmx1KI?!J0m
zpfaADYg+awVyIxTdYNvdnVT1|i-?L_N=0cWnxu1F7TyG8L@tdRHwjy&3QZ*hbKQHH
z*mLn;4+ODKW*gBJg`Om;)@)s6XyvZk5K(tA?pNA6Px1KM*qnB!hXe)|d_HRYc$Z|c
zBr)Qt*>#5y`6>*?jJwOiJ`u4I#;y8#`=YdFw$^^4;`P>N7BjkI2biB3inC%niCen#
z;)gYFm86K;$iXPu7KsEKFY7GFru#3ZX=ZUvj+yS`1nIhDVx=`ZwdQF;JhaG(+9^?$
zdr}r|AIjTxhZct&(2&KL%Fm2S_2<jxqI8!qQ2Sa@ES$uEo#c^tv;41E`MnE%7E5-q
z7+Vu#<PVIkZ-kVp#~fh1bIE1Lal$(~N8}y6lHD&Jr|3;BHWW*p#pzjbRg=8mRRuWK
z=0}o(^%jGN$2@N8kjjt9zn17be;J6v;H=QfHX3Zf#?i`Qav2KrRczp`M!{pZR-_7B
zAy#eTcP@Lr$1BM70qNd=z~gRP)h&Uo^xB4`?KWYoM_R5}v|X6(CIrbGkR%S%_8wn+
zg?Ok~w&iX<oauc*3s!9v4th>@QKYLCY+QKiwEJQP$X|6Lc_xIUpos%7>UHbCDflY%
zt1K}&9apDxJ~K?;5jtutW>s<)Ber0YS)uR51%Y%o1dXFtj%SdFw&2gz0@wI<eh#u~
z)N#I+<TJT8$fdcbr+-!2zAQomow5*G-Wb!HNU+AB2@fS^ZjYKn{57!=#f`hCJwEjw
zyc1NkaaJ`}y2{e5n90D6tJcId-u%rVk7e$wF<`8_o|%0q{ux+2RmwVtSLAHZX4KVT
z5LKx*N_ZfPfdSslz}B7`@$V$k5MxzS%aZNW8%xq}P5l<`+nWict!9$Fn$_=6%t-l|
z@)Dz*Y%*ptG~};H&W%*qiYUr5*luddtn9@4Q(Eq(($QJen0$7UnpN!Cz~gUWtu<_e
z6pMo6bi@53@>YS+DcxfubsTn54V?$3n#6V65tPRTj#=&H>e`yHvud+6++??2$K*`T
zoEy(>RFRX;-APcy!;XrL41F3g!+oZmMT#)hu=Bzl!lYHxc0&|yVJ#yr+*|&qR2@ly
zv07^`uPv(f5*C^m=V<elOwswcnE9-fNV`oWYaB|!*^hgVx1j!_Of?%TgwE=$t=gGK
zmGXExnqP<U8VL)LLiGi!GuCL}5s@t>i*72PzVsYjiWLR|TuviWW2^rF61>>$HL7H_
z1}bbn{UxcFlEP0mm66;<IxS<!!s`skCYOrE+>;-P)LPz~Wb9O{7LsiB?Gfqbe_GJO
zUGBVb1;)*k#7BzC<Rh&u?sX!W@7iV`PYWJhT5&jwk!hK$($sZv7#%H-qjL|ZDrqc^
z10jIaxitE$ReZKzP}Hkc@U2c&YZ0_Y<W{5$@9K?;#cIr!YWzB*Ijp@!*7UB$oS|dW
z2y&jB$&12>Le+yIZbUfz-4im(BUs0F#}!i&N_TASZKbff%Q0UiAEo;5{{U#2S~RgU
zW^ZZS^hsAU6@`-<lA9)NSESh#s01xq*K#CT9nwO43wEXSBUj<(G_>rRQqCTbsdCnq
z)jD@T-Nxke)~mevD|K>ufK8G5Q&*7Yv#94-_Tpw!*M0pvgp&n_`ppk(O~aCcw5fXu
zt8l%A$=gWk%PpNNYSoMQngyfmqr+*!bMB%=$bcc$UY>f{q%}oso|nVqt!W$URtmU0
z_3S1F4I4ZvQSZ>c?nbZ6UNOe7$2?**lENr*-rlqEx;t3mFuC1Ls&tmH)R-o!@@i~0
zVr*>;&Y!mLZpKG1ijd_dt9unJe@-pc`e`OsjTRD|!gMRtSMZ*m`c;3{YV+D%4UCCq
zj@#+GPPxphSW4G4!NOv(jUanc$yx-p3_@C6wv&&SBA<lyGdb#M)7i`$V^brNx7ekL
zvy#eU@fB8-am<*8inU0UCP=2`ChTn5j=Sx_$1bdTX2+=4{YZTRnEY;Tlj;1d13iky
z*~z|;si@va^Ap`|L6?e3SlKBv2wg>)ge}5I!|ZyH$^IR6R#rV7Yft*YpTS9|EnUIm
zaoCejOX2L{F4m4J_N082E|+X~rP?_x6@!FL$=BpqFdv5<J(kh)XEg?MUus0Hk+X-$
z)Lh+&Yv%rrM#su=m91U&&}o->=Ry^U9lmh3;c(9Sd!oHPmaK`PD&C>>ODqscLoa1D
z43_OpkAW#<87x&xwbs3W_G*1aa`Eh_oich$OI^?D9Ih`%)ym^E#!*3!{vp6;@lA!N
zkqh<uZ&2LC$|lrU(K<&Gy;#+tGI!AmdwQ~AeHztu^j(^FaKij}J2U?P38Rt7TD@Z%
zkBT1jvs$}uHnEbgCj$jkwHk$nG?egRz>oFbo^3s65qb2iSe-L!sTs=H%|U+`nWk>7
bWv@<Ci|xZc8h7GmmSGg!>?B+jK_~y&P~SxO

literal 0
HcmV?d00001

diff --git a/src/caffe/layers/image_data_layer.cpp b/src/caffe/layers/image_data_layer.cpp
index 62fda4accce..56d354655dc 100644
--- a/src/caffe/layers/image_data_layer.cpp
+++ b/src/caffe/layers/image_data_layer.cpp
@@ -37,10 +37,13 @@ void ImageDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
   const string& source = this->layer_param_.image_data_param().source();
   LOG(INFO) << "Opening file " << source;
   std::ifstream infile(source.c_str());
-  string filename;
+  string line;
+  size_t pos;
   int label;
-  while (infile >> filename >> label) {
-    lines_.push_back(std::make_pair(filename, label));
+  while (std::getline(infile, line)) {
+    pos = line.find_last_of(' ');
+    label = atoi(line.substr(pos + 1).c_str());
+    lines_.push_back(std::make_pair(line.substr(0, pos), label));
   }
 
   if (this->layer_param_.image_data_param().shuffle()) {
diff --git a/src/caffe/test/test_image_data_layer.cpp b/src/caffe/test/test_image_data_layer.cpp
index a4080ccd145..ce5e0bc62d6 100644
--- a/src/caffe/test/test_image_data_layer.cpp
+++ b/src/caffe/test/test_image_data_layer.cpp
@@ -34,16 +34,24 @@ class ImageDataLayerTest : public MultiDeviceTest<TypeParam> {
     std::ofstream outfile(filename_.c_str(), std::ofstream::out);
     LOG(INFO) << "Using temporary file " << filename_;
     for (int i = 0; i < 5; ++i) {
-      outfile << EXAMPLES_SOURCE_DIR "images/cat.jpg " << i;
+      outfile << EXAMPLES_SOURCE_DIR "images/cat.jpg " << i << std::endl;
     }
     outfile.close();
     // Create test input file for images of distinct sizes.
     MakeTempFilename(&filename_reshape_);
     std::ofstream reshapefile(filename_reshape_.c_str(), std::ofstream::out);
     LOG(INFO) << "Using temporary file " << filename_reshape_;
-    reshapefile << EXAMPLES_SOURCE_DIR "images/cat.jpg " << 0;
-    reshapefile << EXAMPLES_SOURCE_DIR "images/fish-bike.jpg " << 1;
+    reshapefile << EXAMPLES_SOURCE_DIR "images/cat.jpg " << 0 << std::endl;
+    reshapefile << EXAMPLES_SOURCE_DIR "images/fish-bike.jpg " << 1
+                << std::endl;
     reshapefile.close();
+    // Create test input file for images with space in names
+    MakeTempFilename(&filename_space_);
+    std::ofstream spacefile(filename_space_.c_str(), std::ofstream::out);
+    LOG(INFO) << "Using temporary file " << filename_space_;
+    spacefile << EXAMPLES_SOURCE_DIR "images/cat.jpg " << 0 << std::endl;
+    spacefile << EXAMPLES_SOURCE_DIR "images/cat gray.jpg " << 1 << std::endl;
+    spacefile.close();
   }
 
   virtual ~ImageDataLayerTest() {
@@ -54,6 +62,7 @@ class ImageDataLayerTest : public MultiDeviceTest<TypeParam> {
   int seed_;
   string filename_;
   string filename_reshape_;
+  string filename_space_;
   Blob<Dtype>* const blob_top_data_;
   Blob<Dtype>* const blob_top_label_;
   vector<Blob<Dtype>*> blob_bottom_vec_;
@@ -177,5 +186,34 @@ TYPED_TEST(ImageDataLayerTest, TestShuffle) {
   }
 }
 
+TYPED_TEST(ImageDataLayerTest, TestSpace) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter param;
+  ImageDataParameter* image_data_param = param.mutable_image_data_param();
+  image_data_param->set_batch_size(1);
+  image_data_param->set_source(this->filename_space_.c_str());
+  image_data_param->set_shuffle(false);
+  ImageDataLayer<Dtype> layer(param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  EXPECT_EQ(this->blob_top_label_->num(), 1);
+  EXPECT_EQ(this->blob_top_label_->channels(), 1);
+  EXPECT_EQ(this->blob_top_label_->height(), 1);
+  EXPECT_EQ(this->blob_top_label_->width(), 1);
+  // cat.jpg
+  layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+  EXPECT_EQ(this->blob_top_data_->num(), 1);
+  EXPECT_EQ(this->blob_top_data_->channels(), 3);
+  EXPECT_EQ(this->blob_top_data_->height(), 360);
+  EXPECT_EQ(this->blob_top_data_->width(), 480);
+  EXPECT_EQ(this->blob_top_label_->cpu_data()[0], 0);
+  // cat gray.jpg
+  layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+  EXPECT_EQ(this->blob_top_data_->num(), 1);
+  EXPECT_EQ(this->blob_top_data_->channels(), 3);
+  EXPECT_EQ(this->blob_top_data_->height(), 360);
+  EXPECT_EQ(this->blob_top_data_->width(), 480);
+  EXPECT_EQ(this->blob_top_label_->cpu_data()[0], 1);
+}
+
 }  // namespace caffe
 #endif  // USE_OPENCV
diff --git a/tools/convert_imageset.cpp b/tools/convert_imageset.cpp
index 9c52bfa0ef8..90cdb15d427 100644
--- a/tools/convert_imageset.cpp
+++ b/tools/convert_imageset.cpp
@@ -73,10 +73,13 @@ int main(int argc, char** argv) {
 
   std::ifstream infile(argv[2]);
   std::vector<std::pair<std::string, int> > lines;
-  std::string filename;
+  std::string line;
+  size_t pos;
   int label;
-  while (infile >> filename >> label) {
-    lines.push_back(std::make_pair(filename, label));
+  while (std::getline(infile, line)) {
+    pos = line.find_last_of(' ');
+    label = atoi(line.substr(pos + 1).c_str());
+    lines.push_back(std::make_pair(line.substr(0, pos), label));
   }
   if (FLAGS_shuffle) {
     // randomly shuffle data

From d4e7c93a6873f75a53d7618e82343e4b5b8a239e Mon Sep 17 00:00:00 2001
From: Aaron Schumacher <ajschumacher@gmail.com>
Date: Thu, 19 May 2016 14:04:22 -0500
Subject: [PATCH 061/264] convert non-uint8 dtypes to float; refs #2391

As recommended by @longjon, this will allow `caffe.io.array_to_datum` to handle, for example, numpy.float32 arrays.

It might be worth noting that `datum.float_data` is stored as protobuf type 2, which is float32, as opposed to protobuf type 1, which is float64. It is a little unintuitive that caffe currently requires data to be passed in as float64 but then writes float32 to LMDB. To demonstrate this:

```python
datum = caffe.io.array_to_datum(np.array([[[0.9]]]))
caffe.io.datum_to_array(datum)
# array([[[ 0.9]]])
datum_str = datum.SerializeToString()
new_datum = caffe.proto.caffe_pb2.Datum()
new_datum.ParseFromString(datum_str)
caffe.io.datum_to_array(new_datum)
# array([[[ 0.89999998]]])
```

This behavior is somewhat hidden because `datum_to_array` returns type float64, even though the data doesn't actually have that resolution if it has been stored as protobuf text anywhere (for example in LMDB).

Alternative solutions:
 * Require and return float32, consistent with the protobuf representation.
 * Change the protobuf to allow float32 or float64 and update surrounding code to support this.
---
 python/caffe/io.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/caffe/io.py b/python/caffe/io.py
index e1759beb587..966c164cffd 100644
--- a/python/caffe/io.py
+++ b/python/caffe/io.py
@@ -75,7 +75,7 @@ def array_to_datum(arr, label=None):
     if arr.dtype == np.uint8:
         datum.data = arr.tostring()
     else:
-        datum.float_data.extend(arr.flat)
+        datum.float_data.extend(arr.astype(float).flat)
     if label is not None:
         datum.label = label
     return datum

From 4bf4b186076b054a0fa06103bc8989a3577468ba Mon Sep 17 00:00:00 2001
From: Luke Yeager <luke.yeager@gmail.com>
Date: Tue, 24 May 2016 10:36:23 -0700
Subject: [PATCH 062/264] Overhaul TravisCI

* Run on Ubuntu 14.04
* Test cuDNN builds
* Build with OpenBLAS

NOTE: Python3 build only works with CMake
---
 .travis.yml                                   |  58 ++++++----
 scripts/travis/build.sh                       |  13 +++
 scripts/travis/configure-cmake.sh             |  32 ++++++
 scripts/travis/configure-make.sh              |  36 ++++++
 scripts/travis/configure.sh                   |  11 ++
 scripts/travis/defaults.sh                    |  10 ++
 scripts/travis/install-deps.sh                | 105 ++++++++++++++++++
 scripts/travis/install-python-deps.sh         |  14 +++
 scripts/travis/setup-venv.sh                  |  18 +++
 scripts/travis/test.sh                        |  19 ++++
 scripts/travis/travis_build_and_test.sh       |  54 ---------
 scripts/travis/travis_install.sh              | 101 -----------------
 .../travis/travis_setup_makefile_config.sh    |  31 ------
 13 files changed, 292 insertions(+), 210 deletions(-)
 create mode 100755 scripts/travis/build.sh
 create mode 100644 scripts/travis/configure-cmake.sh
 create mode 100644 scripts/travis/configure-make.sh
 create mode 100755 scripts/travis/configure.sh
 create mode 100755 scripts/travis/defaults.sh
 create mode 100755 scripts/travis/install-deps.sh
 create mode 100755 scripts/travis/install-python-deps.sh
 create mode 100755 scripts/travis/setup-venv.sh
 create mode 100755 scripts/travis/test.sh
 delete mode 100755 scripts/travis/travis_build_and_test.sh
 delete mode 100755 scripts/travis/travis_install.sh
 delete mode 100755 scripts/travis/travis_setup_makefile_config.sh

diff --git a/.travis.yml b/.travis.yml
index 4dc7ed72d6c..92d740cd88b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,40 +1,50 @@
-# Use a build matrix to do two builds in parallel:
-# one using CMake, and one using make.
+dist: trusty
+sudo: required
+
+language: cpp
+compiler: gcc
+
 env:
+  global:
+    - NUM_THREADS=4
   matrix:
-    - WITH_CUDA=false WITH_CMAKE=false WITH_IO=true
-    - WITH_CUDA=false WITH_CMAKE=true WITH_IO=true PYTHON_VERSION=3
-    - WITH_CUDA=true WITH_CMAKE=false WITH_IO=true
-    - WITH_CUDA=true WITH_CMAKE=true WITH_IO=true
-    - WITH_CUDA=false WITH_CMAKE=false WITH_IO=false
-    - WITH_CUDA=false WITH_CMAKE=true WITH_IO=false PYTHON_VERSION=3
+    # Use a build matrix to test many builds in parallel
+    # envvar defaults:
+    #   WITH_CMAKE: false
+    #   WITH_PYTHON3: false
+    #   WITH_IO: true
+    #   WITH_CUDA: false
+    #   WITH_CUDNN: false
+    - BUILD_NAME="default-make"
+#   - BUILD_NAME="python3-make" WITH_PYTHON3=true
+    - BUILD_NAME="no-io-make" WITH_IO=false
+    - BUILD_NAME="cuda-make" WITH_CUDA=true
+    - BUILD_NAME="cudnn-make" WITH_CUDA=true WITH_CUDNN=true
 
-language: cpp
+    - BUILD_NAME="default-cmake" WITH_CMAKE=true
+    - BUILD_NAME="python3-cmake" WITH_CMAKE=true WITH_PYTHON3=true
+    - BUILD_NAME="no-io-cmake" WITH_CMAKE=true WITH_IO=false
+    - BUILD_NAME="cuda-cmake" WITH_CMAKE=true WITH_CUDA=true
+    - BUILD_NAME="cudnn-cmake" WITH_CMAKE=true WITH_CUDA=true WITH_CUDNN=true
 
-# Cache Ubuntu apt packages.
 cache:
   apt: true
-  directories:
-  - /home/travis/miniconda
-  - /home/travis/miniconda2
-  - /home/travis/miniconda3
-
-compiler: gcc
 
 before_install:
-  - export NUM_THREADS=4
-  - export SCRIPTS=./scripts/travis
-  - export CONDA_DIR="/home/travis/miniconda$PYTHON_VERSION"
+  - source ./scripts/travis/defaults.sh
 
 install:
-  - sudo -E $SCRIPTS/travis_install.sh
+  - sudo -E ./scripts/travis/install-deps.sh
+  - ./scripts/travis/setup-venv.sh ~/venv
+  - source ~/venv/bin/activate
+  - ./scripts/travis/install-python-deps.sh
 
 before_script:
-  - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib:/usr/local/cuda/lib64:$CONDA_DIR/lib
-  - export PATH=$CONDA_DIR/bin:$PATH
-  - if ! $WITH_CMAKE; then $SCRIPTS/travis_setup_makefile_config.sh; fi
+  - ./scripts/travis/configure.sh
 
-script: $SCRIPTS/travis_build_and_test.sh
+script:
+  - ./scripts/travis/build.sh
+  - ./scripts/travis/test.sh
 
 notifications:
 # Emails are sent to the committer's git-configured email address by default,
diff --git a/scripts/travis/build.sh b/scripts/travis/build.sh
new file mode 100755
index 00000000000..bb9406f046c
--- /dev/null
+++ b/scripts/travis/build.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+# build the project
+
+BASEDIR=$(dirname $0)
+source $BASEDIR/defaults.sh
+
+if ! $WITH_CMAKE ; then
+  make --jobs $NUM_THREADS all test pycaffe warn
+else
+  cd build
+  make --jobs $NUM_THREADS all test.testbin
+fi
+make lint
diff --git a/scripts/travis/configure-cmake.sh b/scripts/travis/configure-cmake.sh
new file mode 100644
index 00000000000..772f1e2ce8d
--- /dev/null
+++ b/scripts/travis/configure-cmake.sh
@@ -0,0 +1,32 @@
+# CMake configuration
+
+mkdir -p build
+cd build
+
+ARGS="-DCMAKE_BUILD_TYPE=Release -DBLAS=Open"
+
+if $WITH_PYTHON3 ; then
+  ARGS="$ARGS -Dpython_version=3"
+fi
+
+if $WITH_IO ; then
+  ARGS="$ARGS -DUSE_OPENCV=On -DUSE_LMDB=On -DUSE_LEVELDB=On"
+else
+  ARGS="$ARGS -DUSE_OPENCV=Off -DUSE_LMDB=Off -DUSE_LEVELDB=Off"
+fi
+
+if $WITH_CUDA ; then
+  # Only build SM50
+  ARGS="$ARGS -DCPU_ONLY=Off -DCUDA_ARCH_NAME=Manual -DCUDA_ARCH_BIN=\"50\" -DCUDA_ARCH_PTX=\"\""
+else
+  ARGS="$ARGS -DCPU_ONLY=On"
+fi
+
+if $WITH_CUDNN ; then
+  ARGS="$ARGS -DUSE_CUDNN=On"
+else
+  ARGS="$ARGS -DUSE_CUDNN=Off"
+fi
+
+cmake .. $ARGS
+
diff --git a/scripts/travis/configure-make.sh b/scripts/travis/configure-make.sh
new file mode 100644
index 00000000000..ddc40fffa9d
--- /dev/null
+++ b/scripts/travis/configure-make.sh
@@ -0,0 +1,36 @@
+# raw Makefile configuration
+
+LINE () {
+  echo "$@" >> Makefile.config
+}
+
+cp Makefile.config.example Makefile.config
+
+LINE "BLAS := open"
+LINE "WITH_PYTHON_LAYER := 1"
+
+if $WITH_PYTHON3 ; then
+  # TODO(lukeyeager) this path is currently disabled because of test errors like:
+  #   ImportError: dynamic module does not define init function (PyInit__caffe)
+  LINE "PYTHON_LIBRARIES := python3.4m boost_python-py34"
+  LINE "PYTHON_INCLUDE := /usr/include/python3.4 /usr/lib/python3/dist-packages/numpy/core/include"
+  LINE "INCLUDE_DIRS := \$(INCLUDE_DIRS) \$(PYTHON_INCLUDE)"
+fi
+
+if ! $WITH_IO ; then
+  LINE "USE_OPENCV := 0"
+  LINE "USE_LEVELDB := 0"
+  LINE "USE_LMDB := 0"
+fi
+
+if $WITH_CUDA ; then
+  # Only build SM50
+  LINE "CUDA_ARCH := -gencode arch=compute_50,code=sm_50"
+else
+  LINE "CPU_ONLY := 1"
+fi
+
+if $WITH_CUDNN ; then
+  LINE "USE_CUDNN := 1"
+fi
+
diff --git a/scripts/travis/configure.sh b/scripts/travis/configure.sh
new file mode 100755
index 00000000000..ef740c8982e
--- /dev/null
+++ b/scripts/travis/configure.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# configure the project
+
+BASEDIR=$(dirname $0)
+source $BASEDIR/defaults.sh
+
+if ! $WITH_CMAKE ; then
+  source $BASEDIR/configure-make.sh
+else
+  source $BASEDIR/configure-cmake.sh
+fi
diff --git a/scripts/travis/defaults.sh b/scripts/travis/defaults.sh
new file mode 100755
index 00000000000..d69c0a7d964
--- /dev/null
+++ b/scripts/travis/defaults.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+# set default environment variables
+
+set -e
+
+WITH_CMAKE=${WITH_CMAKE:-false}
+WITH_PYTHON3=${WITH_PYTHON3:-false}
+WITH_IO=${WITH_IO:-true}
+WITH_CUDA=${WITH_CUDA:-false}
+WITH_CUDNN=${WITH_CUDNN:-false}
diff --git a/scripts/travis/install-deps.sh b/scripts/travis/install-deps.sh
new file mode 100755
index 00000000000..f7bfe4c4df9
--- /dev/null
+++ b/scripts/travis/install-deps.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+# install dependencies
+# (this script must be run as root)
+
+BASEDIR=$(dirname $0)
+source $BASEDIR/defaults.sh
+
+apt-get -y update
+apt-get install -y --no-install-recommends \
+  build-essential \
+  libboost-filesystem-dev \
+  libboost-python-dev \
+  libboost-system-dev \
+  libboost-thread-dev \
+  libgflags-dev \
+  libgoogle-glog-dev \
+  libhdf5-serial-dev \
+  libopenblas-dev \
+  python-virtualenv \
+  wget
+
+if $WITH_CMAKE ; then
+  apt-get install -y --no-install-recommends cmake
+fi
+
+if ! $WITH_PYTHON3 ; then
+  # Python2
+  apt-get install -y --no-install-recommends \
+    libprotobuf-dev \
+    protobuf-compiler \
+    python-dev \
+    python-numpy \
+    python-protobuf \
+    python-skimage
+else
+  # Python3
+  apt-get install -y --no-install-recommends \
+    python3-dev \
+    python3-numpy \
+    python3-skimage
+
+  # build Protobuf3 since it's needed for Python3
+  echo "Building protobuf3 from source ..."
+  pushd .
+  PROTOBUF3_DIR=~/protobuf3-build
+  rm -rf $PROTOBUF3_DIR
+  mkdir $PROTOBUF3_DIR
+
+  # install some more dependencies required to build protobuf3
+  apt-get install -y --no-install-recommends \
+    curl \
+    dh-autoreconf \
+    unzip
+
+  wget https://github.com/google/protobuf/archive/v3.0.0-beta-3.tar.gz -O protobuf3.tar.gz
+  tar -xzf protobuf3.tar.gz -C $PROTOBUF3_DIR --strip 1
+  rm protobuf3.tar.gz
+  cd $PROTOBUF3_DIR
+  ./autogen.sh
+  ./configure --prefix=/usr
+  make --jobs=$NUM_THREADS
+  make install
+  popd
+fi
+
+if $WITH_IO ; then
+  apt-get install -y --no-install-recommends \
+    libleveldb-dev \
+    liblmdb-dev \
+    libopencv-dev \
+    libsnappy-dev
+fi
+
+if $WITH_CUDA ; then
+  # install repo packages
+  CUDA_REPO_PKG=cuda-repo-ubuntu1404_7.5-18_amd64.deb
+  wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1404/x86_64/$CUDA_REPO_PKG
+  dpkg -i $CUDA_REPO_PKG
+  rm $CUDA_REPO_PKG
+
+  if $WITH_CUDNN ; then
+    ML_REPO_PKG=nvidia-machine-learning-repo_4.0-2_amd64.deb
+    wget http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1404/x86_64/$ML_REPO_PKG
+    dpkg -i $ML_REPO_PKG
+  fi
+
+  # update package lists
+  apt-get -y update
+
+  # install packages
+  CUDA_PKG_VERSION="7-5"
+  CUDA_VERSION="7.5"
+  apt-get install -y --no-install-recommends \
+    cuda-core-$CUDA_PKG_VERSION \
+    cuda-cudart-dev-$CUDA_PKG_VERSION \
+    cuda-cublas-dev-$CUDA_PKG_VERSION \
+    cuda-curand-dev-$CUDA_PKG_VERSION
+  # manually create CUDA symlink
+  ln -s /usr/local/cuda-$CUDA_VERSION /usr/local/cuda
+
+  if $WITH_CUDNN ; then
+    apt-get install -y --no-install-recommends libcudnn5-dev
+  fi
+fi
+
diff --git a/scripts/travis/install-python-deps.sh b/scripts/travis/install-python-deps.sh
new file mode 100755
index 00000000000..eeec302791f
--- /dev/null
+++ b/scripts/travis/install-python-deps.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+# install extra Python dependencies
+# (must come after setup-venv)
+
+BASEDIR=$(dirname $0)
+source $BASEDIR/defaults.sh
+
+if ! $WITH_PYTHON3 ; then
+  # Python2
+  :
+else
+  # Python3
+  pip install --pre protobuf==3.0.0b3
+fi
diff --git a/scripts/travis/setup-venv.sh b/scripts/travis/setup-venv.sh
new file mode 100755
index 00000000000..81245f146da
--- /dev/null
+++ b/scripts/travis/setup-venv.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+# setup a Python virtualenv
+# (must come after install-deps)
+
+BASEDIR=$(dirname $0)
+source $BASEDIR/defaults.sh
+
+VENV_DIR=${1:-~/venv}
+
+# setup our own virtualenv
+if $WITH_PYTHON3; then
+    PYTHON_EXE='/usr/bin/python3'
+else
+    PYTHON_EXE='/usr/bin/python2'
+fi
+
+# use --system-site-packages so that Python will use deb packages
+virtualenv $VENV_DIR -p $PYTHON_EXE --system-site-packages
diff --git a/scripts/travis/test.sh b/scripts/travis/test.sh
new file mode 100755
index 00000000000..fedd7e6b56e
--- /dev/null
+++ b/scripts/travis/test.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+# test the project
+
+BASEDIR=$(dirname $0)
+source $BASEDIR/defaults.sh
+
+if $WITH_CUDA ; then
+  echo "Skipping tests for CUDA build"
+  exit 0
+fi
+
+if ! $WITH_CMAKE ; then
+  make runtest
+  make pytest
+else
+  cd build
+  make runtest
+  make pytest
+fi
diff --git a/scripts/travis/travis_build_and_test.sh b/scripts/travis/travis_build_and_test.sh
deleted file mode 100755
index 174f1ee5a0a..00000000000
--- a/scripts/travis/travis_build_and_test.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/bin/bash
-# Script called by Travis to build and test Caffe.
-# Travis CI tests are CPU-only for lack of compatible hardware.
-
-set -e
-MAKE="make --jobs=$NUM_THREADS --keep-going"
-
-if $WITH_CMAKE; then
-  mkdir build
-  cd build
-  CPU_ONLY=" -DCPU_ONLY=ON"
-  if ! $WITH_CUDA; then
-    CPU_ONLY=" -DCPU_ONLY=OFF"
-  fi
-  PYTHON_ARGS=""
-  if [ "$PYTHON_VERSION" = "3" ]; then
-    PYTHON_ARGS="$PYTHON_ARGS -Dpython_version=3 -DBOOST_LIBRARYDIR=$CONDA_DIR/lib/"
-  fi
-  if $WITH_IO; then
-    IO_ARGS="-DUSE_OPENCV=ON -DUSE_LMDB=ON -DUSE_LEVELDB=ON"
-  else
-    IO_ARGS="-DUSE_OPENCV=OFF -DUSE_LMDB=OFF -DUSE_LEVELDB=OFF"
-  fi
-  cmake -DBUILD_python=ON -DCMAKE_BUILD_TYPE=Release $CPU_ONLY $PYTHON_ARGS -DCMAKE_INCLUDE_PATH="$CONDA_DIR/include/" -DCMAKE_LIBRARY_PATH="$CONDA_DIR/lib/" $IO_ARGS ..
-  $MAKE
-  $MAKE pytest
-  if ! $WITH_CUDA; then
-    $MAKE runtest
-    $MAKE lint
-  fi
-  $MAKE clean
-  cd -
-else
-  if ! $WITH_CUDA; then
-    export CPU_ONLY=1
-  fi
-  if $WITH_IO; then
-    export USE_LMDB=1
-    export USE_LEVELDB=1
-    export USE_OPENCV=1
-  fi
-  $MAKE all test pycaffe warn lint || true
-  if ! $WITH_CUDA; then
-    $MAKE runtest
-  fi
-  $MAKE all
-  $MAKE test
-  $MAKE pycaffe
-  $MAKE pytest
-  $MAKE warn
-  if ! $WITH_CUDA; then
-    $MAKE lint
-  fi
-fi
diff --git a/scripts/travis/travis_install.sh b/scripts/travis/travis_install.sh
deleted file mode 100755
index 091e92431f0..00000000000
--- a/scripts/travis/travis_install.sh
+++ /dev/null
@@ -1,101 +0,0 @@
-#!/bin/bash
-# This script must be run with sudo.
-
-set -e
-
-MAKE="make --jobs=$NUM_THREADS"
-# Install apt packages where the Ubuntu 12.04 default and ppa works for Caffe
-
-# This ppa is for gflags and glog
-add-apt-repository -y ppa:tuleu/precise-backports
-apt-get -y update
-apt-get install \
-    wget git curl \
-    python-dev python-numpy python3-dev\
-    libleveldb-dev libsnappy-dev libopencv-dev \
-    libprotobuf-dev protobuf-compiler \
-    libatlas-dev libatlas-base-dev \
-    libhdf5-serial-dev libgflags-dev libgoogle-glog-dev \
-    bc
-
-# Add a special apt-repository to install CMake 2.8.9 for CMake Caffe build,
-# if needed.  By default, Aptitude in Ubuntu 12.04 installs CMake 2.8.7, but
-# Caffe requires a minimum CMake version of 2.8.8.
-if $WITH_CMAKE; then
-  # cmake 3 will make sure that the python interpreter and libraries match
-  wget --no-check-certificate http://www.cmake.org/files/v3.2/cmake-3.2.3-Linux-x86_64.sh -O cmake3.sh
-  chmod +x cmake3.sh
-  ./cmake3.sh --prefix=/usr/ --skip-license --exclude-subdir
-fi
-
-# Install CUDA, if needed
-if $WITH_CUDA; then
-  CUDA_URL=http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1204/x86_64/cuda-repo-ubuntu1204_6.5-14_amd64.deb
-  CUDA_FILE=/tmp/cuda_install.deb
-  curl $CUDA_URL -o $CUDA_FILE
-  dpkg -i $CUDA_FILE
-  rm -f $CUDA_FILE
-  apt-get -y update
-  # Install the minimal CUDA subpackages required to test Caffe build.
-  # For a full CUDA installation, add 'cuda' to the list of packages.
-  apt-get -y install cuda-core-6-5 cuda-cublas-6-5 cuda-cublas-dev-6-5 cuda-cudart-6-5 cuda-cudart-dev-6-5 cuda-curand-6-5 cuda-curand-dev-6-5
-  # Create CUDA symlink at /usr/local/cuda
-  # (This would normally be created by the CUDA installer, but we create it
-  # manually since we did a partial installation.)
-  ln -s /usr/local/cuda-6.5 /usr/local/cuda
-fi
-
-# Install LMDB
-LMDB_URL=https://github.com/LMDB/lmdb/archive/LMDB_0.9.14.tar.gz
-LMDB_FILE=/tmp/lmdb.tar.gz
-pushd .
-wget $LMDB_URL -O $LMDB_FILE
-tar -C /tmp -xzvf $LMDB_FILE
-cd /tmp/lmdb*/libraries/liblmdb/
-$MAKE
-$MAKE install
-popd
-rm -f $LMDB_FILE
-
-# Install the Python runtime dependencies via miniconda (this is much faster
-# than using pip for everything).
-export PATH=$CONDA_DIR/bin:$PATH
-# clear any cached conda (see #3786)
-rm -rf $CONDA_DIR
-if [ ! -d $CONDA_DIR ]; then
-  if [ "$PYTHON_VERSION" -eq "3" ]; then
-    wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
-  else
-    wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh
-  fi
-  chmod +x miniconda.sh
-  ./miniconda.sh -b -p $CONDA_DIR
-
-  conda update --yes conda
-  # The version of boost we're using for Python 3 depends on 3.4 for now.
-  if [ "$PYTHON_VERSION" -eq "3" ]; then
-    conda install --yes python=3.4
-  fi
-  conda install --yes numpy scipy matplotlib scikit-image pip
-  # Let conda install boost (so that boost_python matches)
-  conda install --yes -c https://conda.binstar.org/menpo boost=1.56.0
-fi
-
-# install protobuf 3 (just use the miniconda3 directory to avoid having to setup the path again)
-if [ "$PYTHON_VERSION" -eq "3" ] && [ ! -e "$CONDA_DIR/bin/protoc" ]; then
-  pushd .
-  wget https://github.com/google/protobuf/archive/v3.0.0-alpha-3.1.tar.gz -O protobuf-3.tar.gz
-  tar -C /tmp -xzvf protobuf-3.tar.gz
-  cd /tmp/protobuf-3*/
-  ./autogen.sh
-  ./configure --prefix=$CONDA_DIR
-  $MAKE
-  $MAKE install
-  popd
-fi
-
-if [ "$PYTHON_VERSION" -eq "3" ]; then
-  pip install --pre protobuf==3.0.0b2
-else
-  pip install protobuf
-fi
diff --git a/scripts/travis/travis_setup_makefile_config.sh b/scripts/travis/travis_setup_makefile_config.sh
deleted file mode 100755
index 83aacf11fb0..00000000000
--- a/scripts/travis/travis_setup_makefile_config.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/bash
-
-set -e
-
-mv Makefile.config.example Makefile.config
-
-if $WITH_CUDA; then
-  # Only generate compute_50.
-  GENCODE="-gencode arch=compute_50,code=sm_50"
-  GENCODE="$GENCODE -gencode arch=compute_50,code=compute_50"
-  echo "CUDA_ARCH := $GENCODE" >> Makefile.config
-fi
-
-# Remove IO library settings from Makefile.config
-# to avoid conflicts with CI configuration
-sed -i -e '/USE_LMDB/d' Makefile.config
-sed -i -e '/USE_LEVELDB/d' Makefile.config
-sed -i -e '/USE_OPENCV/d' Makefile.config
-
-cat << 'EOF' >> Makefile.config
-# Travis' nvcc doesn't like newer boost versions
-NVCCFLAGS := -Xcudafe --diag_suppress=cc_clobber_ignored -Xcudafe --diag_suppress=useless_using_declaration -Xcudafe --diag_suppress=set_but_not_used
-ANACONDA_HOME := $(CONDA_DIR)
-PYTHON_INCLUDE := $(ANACONDA_HOME)/include \
-		$(ANACONDA_HOME)/include/python2.7 \
-		$(ANACONDA_HOME)/lib/python2.7/site-packages/numpy/core/include
-PYTHON_LIB := $(ANACONDA_HOME)/lib
-INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include
-LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib
-WITH_PYTHON_LAYER := 1
-EOF

From 26879320898aacfcb5236c725938e259788c10fc Mon Sep 17 00:00:00 2001
From: Luke Yeager <luke.yeager@gmail.com>
Date: Wed, 25 May 2016 16:39:55 -0700
Subject: [PATCH 063/264] Remove misleading comment from a test file

---
 src/caffe/test/test_caffe_main.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/caffe/test/test_caffe_main.cpp b/src/caffe/test/test_caffe_main.cpp
index fccf6f1613b..6473b74d0a6 100644
--- a/src/caffe/test/test_caffe_main.cpp
+++ b/src/caffe/test/test_caffe_main.cpp
@@ -1,6 +1,3 @@
-// The main caffe test code. Your test cpp code should include this hpp
-// to allow a main function to be compiled into the binary.
-
 #include "caffe/caffe.hpp"
 #include "caffe/test/test_caffe_main.hpp"
 

From a355f9c9d0bf28ac81552ddb4873b01d09581fb3 Mon Sep 17 00:00:00 2001
From: Siddarth Malreddy <malreddysid@gmail.com>
Date: Thu, 26 May 2016 23:31:31 +0530
Subject: [PATCH 064/264] Check for non-empty ImageData filelist.

---
 src/caffe/layers/image_data_layer.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/caffe/layers/image_data_layer.cpp b/src/caffe/layers/image_data_layer.cpp
index 56d354655dc..7ee7dc40714 100644
--- a/src/caffe/layers/image_data_layer.cpp
+++ b/src/caffe/layers/image_data_layer.cpp
@@ -46,6 +46,8 @@ void ImageDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
     lines_.push_back(std::make_pair(line.substr(0, pos), label));
   }
 
+  CHECK(!lines_.empty()) << "File is empty";
+
   if (this->layer_param_.image_data_param().shuffle()) {
     // randomly shuffle data
     LOG(INFO) << "Shuffling data";

From 09546dbe9130789f0571a76a36b0fc265cd81fe3 Mon Sep 17 00:00:00 2001
From: Lumin Zhou <CDLuminate@users.noreply.github.com>
Date: Mon, 30 May 2016 04:14:42 +0000
Subject: [PATCH 065/264] fix spelling error in memory_data_layer.cpp

---
 src/caffe/layers/memory_data_layer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/caffe/layers/memory_data_layer.cpp b/src/caffe/layers/memory_data_layer.cpp
index 82909874054..975f4841723 100644
--- a/src/caffe/layers/memory_data_layer.cpp
+++ b/src/caffe/layers/memory_data_layer.cpp
@@ -107,7 +107,7 @@ void MemoryDataLayer<Dtype>::set_batch_size(int new_size) {
 template <typename Dtype>
 void MemoryDataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
-  CHECK(data_) << "MemoryDataLayer needs to be initalized by calling Reset";
+  CHECK(data_) << "MemoryDataLayer needs to be initialized by calling Reset";
   top[0]->Reshape(batch_size_, channels_, height_, width_);
   top[1]->Reshape(batch_size_, 1, 1, 1);
   top[0]->set_cpu_data(data_ + pos_ * size_);

From 5d7a71ae108f86c05bc03eb542155b30bd28ca74 Mon Sep 17 00:00:00 2001
From: Lumin Zhou <CDLuminate@users.noreply.github.com>
Date: Mon, 30 May 2016 04:19:16 +0000
Subject: [PATCH 066/264] using GNUInstallDirs in root cmake file

---
 CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index da7142c9b3c..c765889e99c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,6 +18,7 @@ add_definitions(-DCAFFE_VERSION=${CAFFE_TARGET_VERSION})
 list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
 
 include(ExternalProject)
+include(GNUInstallDirs)
 
 include(cmake/Utils.cmake)
 include(cmake/Targets.cmake)

From 90b98ce76fe8613d345932f47a6250dc772f7b8f Mon Sep 17 00:00:00 2001
From: Lumin Zhou <CDLuminate@users.noreply.github.com>
Date: Mon, 30 May 2016 04:21:27 +0000
Subject: [PATCH 067/264] fix install path with GNUInstallDir support

---
 src/caffe/CMakeLists.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/caffe/CMakeLists.txt b/src/caffe/CMakeLists.txt
index 8a80c940488..5a1b73f7493 100644
--- a/src/caffe/CMakeLists.txt
+++ b/src/caffe/CMakeLists.txt
@@ -29,9 +29,9 @@ set_target_properties(caffe PROPERTIES
  add_subdirectory(test)
 
 # ---[ Install
-install(DIRECTORY ${Caffe_INCLUDE_DIR}/caffe DESTINATION include)
-install(FILES ${proto_hdrs} DESTINATION include/caffe/proto)
-install(TARGETS caffe proto EXPORT CaffeTargets DESTINATION lib)
+install(DIRECTORY ${Caffe_INCLUDE_DIR}/caffe DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
+install(FILES ${proto_hdrs} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/caffe/proto)
+install(TARGETS caffe proto EXPORT CaffeTargets DESTINATION ${CMAKE_INSTALL_LIBDIR})
 
 file(WRITE ${PROJECT_BINARY_DIR}/__init__.py)
 list(APPEND proto_python ${PROJECT_BINARY_DIR}/__init__.py)

From 581650b18d7580df726d1d6d54d83c397d1379bb Mon Sep 17 00:00:00 2001
From: Lumin Zhou <CDLuminate@users.noreply.github.com>
Date: Mon, 30 May 2016 04:22:42 +0000
Subject: [PATCH 068/264] fix install path with GNUInstallDir support

---
 tools/CMakeLists.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 02fbd5cadd8..3789450555e 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -25,5 +25,6 @@ foreach(source ${srcs})
   endif()
 
   # Install
-  install(TARGETS ${name} DESTINATION bin)
+  install(TARGETS ${name} DESTINATION ${CMAKE_INSTALL_BINDIR})
+
 endforeach(source)

From f710ef5e89d3ec22891b24099c66b7a6e9f06c45 Mon Sep 17 00:00:00 2001
From: Lumin Zhou <CDLuminate@users.noreply.github.com>
Date: Mon, 30 May 2016 04:24:13 +0000
Subject: [PATCH 069/264] fix install path with GNUInstallDir support

---
 examples/CMakeLists.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 663d7360b7d..2a2300332ad 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -19,7 +19,8 @@ foreach(source_file ${examples_srcs})
   caffe_set_solution_folder(${name} examples)
 
   # install
-  install(TARGETS ${name} DESTINATION bin)
+  install(TARGETS ${name} DESTINATION ${CMAKE_INSTALL_BINDIR})
+
 
   if(UNIX OR APPLE)
     # Funny command to make tutorials work

From 918d9994e4b2e9d82bd7929b0ef1d90393f68b31 Mon Sep 17 00:00:00 2001
From: Josh Klontz <josh.klontz@gmail.com>
Date: Tue, 31 May 2016 18:08:04 -0600
Subject: [PATCH 070/264] Fix vecLib search order for clients with both the old
 vecLib framework and the new Accelerate framework

---
 cmake/Modules/FindvecLib.cmake | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/cmake/Modules/FindvecLib.cmake b/cmake/Modules/FindvecLib.cmake
index 9600da43647..46043367362 100644
--- a/cmake/Modules/FindvecLib.cmake
+++ b/cmake/Modules/FindvecLib.cmake
@@ -14,9 +14,10 @@ set(__veclib_include_suffix "Frameworks/vecLib.framework/Versions/Current/Header
 
 find_path(vecLib_INCLUDE_DIR vecLib.h
           DOC "vecLib include directory"
-          PATHS /System/Library/${__veclib_include_suffix}
-                /System/Library/Frameworks/Accelerate.framework/Versions/Current/${__veclib_include_suffix}
-                /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.9.sdk/System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers/)
+          PATHS /System/Library/Frameworks/Accelerate.framework/Versions/Current/${__veclib_include_suffix}
+                /System/Library/${__veclib_include_suffix}
+                /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.9.sdk/System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers/
+          NO_DEFAULT_PATH)
 
 include(FindPackageHandleStandardArgs)
 find_package_handle_standard_args(vecLib DEFAULT_MSG vecLib_INCLUDE_DIR)

From 994a033a725c23811dc50e4b2874450a45f2ecd1 Mon Sep 17 00:00:00 2001
From: Luke Yeager <luke.yeager@gmail.com>
Date: Wed, 1 Jun 2016 10:37:14 -0700
Subject: [PATCH 071/264] Cache protobuf3 build in TravisCI

---
 .travis.yml                    |  3 +++
 scripts/travis/install-deps.sh | 37 +++++++++++++++++++---------------
 2 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 92d740cd88b..4849a7ac289 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -28,7 +28,10 @@ env:
     - BUILD_NAME="cudnn-cmake" WITH_CMAKE=true WITH_CUDA=true WITH_CUDNN=true
 
 cache:
+  timeout: 604800  # 1 week
   apt: true
+  directories:
+    - ~/protobuf3
 
 before_install:
   - source ./scripts/travis/defaults.sh
diff --git a/scripts/travis/install-deps.sh b/scripts/travis/install-deps.sh
index f7bfe4c4df9..ee16d36a7fc 100755
--- a/scripts/travis/install-deps.sh
+++ b/scripts/travis/install-deps.sh
@@ -40,25 +40,30 @@ else
     python3-skimage
 
   # build Protobuf3 since it's needed for Python3
-  echo "Building protobuf3 from source ..."
+  PROTOBUF3_DIR=~/protobuf3
   pushd .
-  PROTOBUF3_DIR=~/protobuf3-build
-  rm -rf $PROTOBUF3_DIR
-  mkdir $PROTOBUF3_DIR
+  if [ -d "$PROTOBUF3_DIR" ] && [ -e "$PROTOBUF3_DIR/src/protoc" ]; then
+    echo "Using cached protobuf3 build ..."
+    cd $PROTOBUF3_DIR
+  else
+    echo "Building protobuf3 from source ..."
+    rm -rf $PROTOBUF3_DIR
+    mkdir $PROTOBUF3_DIR
 
-  # install some more dependencies required to build protobuf3
-  apt-get install -y --no-install-recommends \
-    curl \
-    dh-autoreconf \
-    unzip
+    # install some more dependencies required to build protobuf3
+    apt-get install -y --no-install-recommends \
+      curl \
+      dh-autoreconf \
+      unzip
 
-  wget https://github.com/google/protobuf/archive/v3.0.0-beta-3.tar.gz -O protobuf3.tar.gz
-  tar -xzf protobuf3.tar.gz -C $PROTOBUF3_DIR --strip 1
-  rm protobuf3.tar.gz
-  cd $PROTOBUF3_DIR
-  ./autogen.sh
-  ./configure --prefix=/usr
-  make --jobs=$NUM_THREADS
+    wget https://github.com/google/protobuf/archive/v3.0.0-beta-3.tar.gz -O protobuf3.tar.gz
+    tar -xzf protobuf3.tar.gz -C $PROTOBUF3_DIR --strip 1
+    rm protobuf3.tar.gz
+    cd $PROTOBUF3_DIR
+    ./autogen.sh
+    ./configure --prefix=/usr
+    make --jobs=$NUM_THREADS
+  fi
   make install
   popd
 fi

From 5f2d845fafc8883aa16b437b79fa52b39f8a0ddb Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Sun, 15 Feb 2015 14:28:01 -0800
Subject: [PATCH 072/264] Add RecurrentLayer: an abstract superclass for other
 recurrent layer types

---
 include/caffe/layers/recurrent_layer.hpp | 187 ++++++++++++++
 src/caffe/layers/recurrent_layer.cpp     | 295 +++++++++++++++++++++++
 src/caffe/layers/recurrent_layer.cu      |  44 ++++
 src/caffe/proto/caffe.proto              |  22 +-
 4 files changed, 547 insertions(+), 1 deletion(-)
 create mode 100644 include/caffe/layers/recurrent_layer.hpp
 create mode 100644 src/caffe/layers/recurrent_layer.cpp
 create mode 100644 src/caffe/layers/recurrent_layer.cu

diff --git a/include/caffe/layers/recurrent_layer.hpp b/include/caffe/layers/recurrent_layer.hpp
new file mode 100644
index 00000000000..ca17371b994
--- /dev/null
+++ b/include/caffe/layers/recurrent_layer.hpp
@@ -0,0 +1,187 @@
+#ifndef CAFFE_RECURRENT_LAYER_HPP_
+#define CAFFE_RECURRENT_LAYER_HPP_
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/net.hpp"
+#include "caffe/proto/caffe.pb.h"
+#include "caffe/util/format.hpp"
+
+namespace caffe {
+
+template <typename Dtype> class RecurrentLayer;
+
+/**
+ * @brief An abstract class for implementing recurrent behavior inside of an
+ *        unrolled network.  This Layer type cannot be instantiated -- instead,
+ *        you should use one of its implementations which defines the recurrent
+ *        architecture, such as RNNLayer or LSTMLayer.
+ */
+template <typename Dtype>
+class RecurrentLayer : public Layer<Dtype> {
+ public:
+  explicit RecurrentLayer(const LayerParameter& param)
+      : Layer<Dtype>(param) {}
+  virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Reset();
+
+  virtual inline const char* type() const { return "Recurrent"; }
+  virtual inline int MinBottomBlobs() const {
+    int min_bottoms = 2;
+    if (this->layer_param_.recurrent_param().expose_hidden()) {
+      vector<string> inputs;
+      this->RecurrentInputBlobNames(&inputs);
+      min_bottoms += inputs.size();
+    }
+    return min_bottoms;
+  }
+  virtual inline int MaxBottomBlobs() const { return MinBottomBlobs() + 1; }
+  virtual inline int ExactNumTopBlobs() const {
+    int num_tops = 1;
+    if (this->layer_param_.recurrent_param().expose_hidden()) {
+      vector<string> outputs;
+      this->RecurrentOutputBlobNames(&outputs);
+      num_tops += outputs.size();
+    }
+    return num_tops;
+  }
+
+  virtual inline bool AllowForceBackward(const int bottom_index) const {
+    // Can't propagate to sequence continuation indicators.
+    return bottom_index != 1;
+  }
+
+ protected:
+  /**
+   * @brief Fills net_param with the recurrent network architecture.  Subclasses
+   *        should define this -- see RNNLayer and LSTMLayer for examples.
+   */
+  virtual void FillUnrolledNet(NetParameter* net_param) const = 0;
+
+  /**
+   * @brief Fills names with the names of the 0th timestep recurrent input
+   *        Blob&s.  Subclasses should define this -- see RNNLayer and LSTMLayer
+   *        for examples.
+   */
+  virtual void RecurrentInputBlobNames(vector<string>* names) const = 0;
+
+  /**
+   * @brief Fills shapes with the shapes of the recurrent input Blob&s.
+   *        Subclasses should define this -- see RNNLayer and LSTMLayer
+   *        for examples.
+   */
+  virtual void RecurrentInputShapes(vector<BlobShape>* shapes) const = 0;
+
+  /**
+   * @brief Fills names with the names of the Tth timestep recurrent output
+   *        Blob&s.  Subclasses should define this -- see RNNLayer and LSTMLayer
+   *        for examples.
+   */
+  virtual void RecurrentOutputBlobNames(vector<string>* names) const = 0;
+
+  /**
+   * @brief Fills names with the names of the output blobs, concatenated across
+   *        all timesteps.  Should return a name for each top Blob.
+   *        Subclasses should define this -- see RNNLayer and LSTMLayer for
+   *        examples.
+   */
+  virtual void OutputBlobNames(vector<string>* names) const = 0;
+
+  /**
+   * @param bottom input Blob vector (length 2-3)
+   *
+   *   -# @f$ (T \times N \times ...) @f$
+   *      the time-varying input @f$ x @f$.  After the first two axes, whose
+   *      dimensions must correspond to the number of timesteps @f$ T @f$ and
+   *      the number of independent streams @f$ N @f$, respectively, its
+   *      dimensions may be arbitrary.  Note that the ordering of dimensions --
+   *      @f$ (T \times N \times ...) @f$, rather than
+   *      @f$ (N \times T \times ...) @f$ -- means that the @f$ N @f$
+   *      independent input streams must be "interleaved".
+   *
+   *   -# @f$ (T \times N) @f$
+   *      the sequence continuation indicators @f$ \delta @f$.
+   *      These inputs should be binary (0 or 1) indicators, where
+   *      @f$ \delta_{t,n} = 0 @f$ means that timestep @f$ t @f$ of stream
+   *      @f$ n @f$ is the beginning of a new sequence, and hence the previous
+   *      hidden state @f$ h_{t-1} @f$ is multiplied by @f$ \delta_t = 0 @f$
+   *      and has no effect on the cell's output at timestep @f$ t @f$, and
+   *      a value of @f$ \delta_{t,n} = 1 @f$ means that timestep @f$ t @f$ of
+   *      stream @f$ n @f$ is a continuation from the previous timestep
+   *      @f$ t-1 @f$, and the previous hidden state @f$ h_{t-1} @f$ affects the
+   *      updated hidden state and output.
+   *
+   *   -# @f$ (N \times ...) @f$ (optional)
+   *      the static (non-time-varying) input @f$ x_{static} @f$.
+   *      After the first axis, whose dimension must be the number of
+   *      independent streams, its dimensions may be arbitrary.
+   *      This is mathematically equivalent to using a time-varying input of
+   *      @f$ x'_t = [x_t; x_{static}] @f$ -- i.e., tiling the static input
+   *      across the @f$ T @f$ timesteps and concatenating with the time-varying
+   *      input.  Note that if this input is used, all timesteps in a single
+   *      batch within a particular one of the @f$ N @f$ streams must share the
+   *      same static input, even if the sequence continuation indicators
+   *      suggest that difference sequences are ending and beginning within a
+   *      single batch.  This may require padding and/or truncation for uniform
+   *      length.
+   *
+   * @param top output Blob vector (length 1)
+   *   -# @f$ (T \times N \times D) @f$
+   *      the time-varying output @f$ y @f$, where @f$ D @f$ is
+   *      <code>recurrent_param.num_output()</code>.
+   *      Refer to documentation for particular RecurrentLayer implementations
+   *      (such as RNNLayer and LSTMLayer) for the definition of @f$ y @f$.
+   */
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+  /// @brief A Net to implement the Recurrent functionality.
+  shared_ptr<Net<Dtype> > unrolled_net_;
+
+  /// @brief The number of independent streams to process simultaneously.
+  int N_;
+
+  /**
+   * @brief The number of timesteps in the layer's input, and the number of
+   *        timesteps over which to backpropagate through time.
+   */
+  int T_;
+
+  /// @brief Whether the layer has a "static" input copied across all timesteps.
+  bool static_input_;
+
+  /**
+   * @brief The last layer to run in the network. (Any later layers are losses
+   *        added to force the recurrent net to do backprop.)
+   */
+  int last_layer_index_;
+
+  /**
+   * @brief Whether the layer's hidden state at the first and last timesteps
+   *        are layer inputs and outputs, respectively.
+   */
+  bool expose_hidden_;
+
+  vector<Blob<Dtype>* > recur_input_blobs_;
+  vector<Blob<Dtype>* > recur_output_blobs_;
+  vector<Blob<Dtype>* > output_blobs_;
+  Blob<Dtype>* x_input_blob_;
+  Blob<Dtype>* x_static_input_blob_;
+  Blob<Dtype>* cont_input_blob_;
+};
+
+}  // namespace caffe
+
+#endif  // CAFFE_RECURRENT_LAYER_HPP_
diff --git a/src/caffe/layers/recurrent_layer.cpp b/src/caffe/layers/recurrent_layer.cpp
new file mode 100644
index 00000000000..e0c82773392
--- /dev/null
+++ b/src/caffe/layers/recurrent_layer.cpp
@@ -0,0 +1,295 @@
+#include <string>
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/layers/recurrent_layer.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void RecurrentLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  CHECK_GE(bottom[0]->num_axes(), 2)
+      << "bottom[0] must have at least 2 axes -- (#timesteps, #streams, ...)";
+  T_ = bottom[0]->shape(0);
+  N_ = bottom[0]->shape(1);
+  LOG(INFO) << "Initializing recurrent layer: assuming input batch contains "
+            << T_ << " timesteps of " << N_ << " independent streams.";
+
+  CHECK_EQ(bottom[1]->num_axes(), 2)
+      << "bottom[1] must have exactly 2 axes -- (#timesteps, #streams)";
+  CHECK_EQ(T_, bottom[1]->shape(0));
+  CHECK_EQ(N_, bottom[1]->shape(1));
+
+  // If expose_hidden is set, we take as input and produce as output
+  // the hidden state blobs at the first and last timesteps.
+  expose_hidden_ = this->layer_param_.recurrent_param().expose_hidden();
+
+  // Get (recurrent) input/output names.
+  vector<string> output_names;
+  OutputBlobNames(&output_names);
+  vector<string> recur_input_names;
+  RecurrentInputBlobNames(&recur_input_names);
+  vector<string> recur_output_names;
+  RecurrentOutputBlobNames(&recur_output_names);
+  const int num_recur_blobs = recur_input_names.size();
+  CHECK_EQ(num_recur_blobs, recur_output_names.size());
+
+  // If provided, bottom[2] is a static input to the recurrent net.
+  const int num_hidden_exposed = expose_hidden_ * num_recur_blobs;
+  static_input_ = (bottom.size() > 2 + num_hidden_exposed);
+  if (static_input_) {
+    CHECK_GE(bottom[2]->num_axes(), 1);
+    CHECK_EQ(N_, bottom[2]->shape(0));
+  }
+
+  // Create a NetParameter; setup the inputs that aren't unique to particular
+  // recurrent architectures.
+  NetParameter net_param;
+
+  LayerParameter* input_layer_param = net_param.add_layer();
+  input_layer_param->set_type("Input");
+  InputParameter* input_param = input_layer_param->mutable_input_param();
+  input_layer_param->add_top("x");
+  BlobShape input_shape;
+  for (int i = 0; i < bottom[0]->num_axes(); ++i) {
+    input_shape.add_dim(bottom[0]->shape(i));
+  }
+  input_param->add_shape()->CopyFrom(input_shape);
+
+  input_shape.Clear();
+  for (int i = 0; i < bottom[1]->num_axes(); ++i) {
+    input_shape.add_dim(bottom[1]->shape(i));
+  }
+  input_layer_param->add_top("cont");
+  input_param->add_shape()->CopyFrom(input_shape);
+
+  if (static_input_) {
+    input_shape.Clear();
+    for (int i = 0; i < bottom[2]->num_axes(); ++i) {
+      input_shape.add_dim(bottom[2]->shape(i));
+    }
+    input_layer_param->add_top("x_static");
+    input_param->add_shape()->CopyFrom(input_shape);
+  }
+
+  // Call the child's FillUnrolledNet implementation to specify the unrolled
+  // recurrent architecture.
+  this->FillUnrolledNet(&net_param);
+
+  // Prepend this layer's name to the names of each layer in the unrolled net.
+  const string& layer_name = this->layer_param_.name();
+  if (layer_name.size()) {
+    for (int i = 0; i < net_param.layer_size(); ++i) {
+      LayerParameter* layer = net_param.mutable_layer(i);
+      layer->set_name(layer_name + "_" + layer->name());
+    }
+  }
+
+  // Add "pseudo-losses" to all outputs to force backpropagation.
+  // (Setting force_backward is too aggressive as we may not need to backprop to
+  // all inputs, e.g., the sequence continuation indicators.)
+  vector<string> pseudo_losses(output_names.size());
+  for (int i = 0; i < output_names.size(); ++i) {
+    LayerParameter* layer = net_param.add_layer();
+    pseudo_losses[i] = output_names[i] + "_pseudoloss";
+    layer->set_name(pseudo_losses[i]);
+    layer->set_type("Reduction");
+    layer->add_bottom(output_names[i]);
+    layer->add_top(pseudo_losses[i]);
+    layer->add_loss_weight(1);
+  }
+
+  // Create the unrolled net.
+  unrolled_net_.reset(new Net<Dtype>(net_param));
+  unrolled_net_->set_debug_info(
+      this->layer_param_.recurrent_param().debug_info());
+
+  // Setup pointers to the inputs.
+  x_input_blob_ = CHECK_NOTNULL(unrolled_net_->blob_by_name("x").get());
+  cont_input_blob_ = CHECK_NOTNULL(unrolled_net_->blob_by_name("cont").get());
+  if (static_input_) {
+    x_static_input_blob_ =
+        CHECK_NOTNULL(unrolled_net_->blob_by_name("x_static").get());
+  }
+
+  // Setup pointers to paired recurrent inputs/outputs.
+  recur_input_blobs_.resize(num_recur_blobs);
+  recur_output_blobs_.resize(num_recur_blobs);
+  for (int i = 0; i < recur_input_names.size(); ++i) {
+    recur_input_blobs_[i] =
+        CHECK_NOTNULL(unrolled_net_->blob_by_name(recur_input_names[i]).get());
+    recur_output_blobs_[i] =
+        CHECK_NOTNULL(unrolled_net_->blob_by_name(recur_output_names[i]).get());
+  }
+
+  // Setup pointers to outputs.
+  CHECK_EQ(top.size() - num_hidden_exposed, output_names.size())
+      << "OutputBlobNames must provide an output blob name for each top.";
+  output_blobs_.resize(output_names.size());
+  for (int i = 0; i < output_names.size(); ++i) {
+    output_blobs_[i] =
+        CHECK_NOTNULL(unrolled_net_->blob_by_name(output_names[i]).get());
+  }
+
+  // We should have 2 inputs (x and cont), plus a number of recurrent inputs,
+  // plus maybe a static input.
+  CHECK_EQ(2 + num_recur_blobs + static_input_,
+           unrolled_net_->input_blobs().size());
+
+  // This layer's parameters are any parameters in the layers of the unrolled
+  // net. We only want one copy of each parameter, so check that the parameter
+  // is "owned" by the layer, rather than shared with another.
+  this->blobs_.clear();
+  for (int i = 0; i < unrolled_net_->params().size(); ++i) {
+    if (unrolled_net_->param_owners()[i] == -1) {
+      LOG(INFO) << "Adding parameter " << i << ": "
+                << unrolled_net_->param_display_names()[i];
+      this->blobs_.push_back(unrolled_net_->params()[i]);
+    }
+  }
+  // Check that param_propagate_down is set for all of the parameters in the
+  // unrolled net; set param_propagate_down to true in this layer.
+  for (int i = 0; i < unrolled_net_->layers().size(); ++i) {
+    for (int j = 0; j < unrolled_net_->layers()[i]->blobs().size(); ++j) {
+      CHECK(unrolled_net_->layers()[i]->param_propagate_down(j))
+          << "param_propagate_down not set for layer " << i << ", param " << j;
+    }
+  }
+  this->param_propagate_down_.clear();
+  this->param_propagate_down_.resize(this->blobs_.size(), true);
+
+  // Set the diffs of recurrent outputs to 0 -- we can't backpropagate across
+  // batches.
+  for (int i = 0; i < recur_output_blobs_.size(); ++i) {
+    caffe_set(recur_output_blobs_[i]->count(), Dtype(0),
+              recur_output_blobs_[i]->mutable_cpu_diff());
+  }
+
+  // Check that the last output_names.size() layers are the pseudo-losses;
+  // set last_layer_index so that we don't actually run these layers.
+  const vector<string>& layer_names = unrolled_net_->layer_names();
+  last_layer_index_ = layer_names.size() - 1 - pseudo_losses.size();
+  for (int i = last_layer_index_ + 1, j = 0; i < layer_names.size(); ++i, ++j) {
+    CHECK_EQ(layer_names[i], pseudo_losses[j]);
+  }
+}
+
+template <typename Dtype>
+void RecurrentLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top) {
+  CHECK_GE(bottom[0]->num_axes(), 2)
+      << "bottom[0] must have at least 2 axes -- (#timesteps, #streams, ...)";
+  CHECK_EQ(T_, bottom[0]->shape(0)) << "input number of timesteps changed";
+  N_ = bottom[0]->shape(1);
+  CHECK_EQ(bottom[1]->num_axes(), 2)
+      << "bottom[1] must have exactly 2 axes -- (#timesteps, #streams)";
+  CHECK_EQ(T_, bottom[1]->shape(0));
+  CHECK_EQ(N_, bottom[1]->shape(1));
+  x_input_blob_->ReshapeLike(*bottom[0]);
+  vector<int> cont_shape = bottom[1]->shape();
+  cont_input_blob_->Reshape(cont_shape);
+  if (static_input_) {
+    x_static_input_blob_->ReshapeLike(*bottom[2]);
+  }
+  vector<BlobShape> recur_input_shapes;
+  RecurrentInputShapes(&recur_input_shapes);
+  CHECK_EQ(recur_input_shapes.size(), recur_input_blobs_.size());
+  for (int i = 0; i < recur_input_shapes.size(); ++i) {
+    recur_input_blobs_[i]->Reshape(recur_input_shapes[i]);
+  }
+  unrolled_net_->Reshape();
+  x_input_blob_->ShareData(*bottom[0]);
+  x_input_blob_->ShareDiff(*bottom[0]);
+  cont_input_blob_->ShareData(*bottom[1]);
+  if (static_input_) {
+    x_static_input_blob_->ShareData(*bottom[2]);
+    x_static_input_blob_->ShareDiff(*bottom[2]);
+  }
+  if (expose_hidden_) {
+    const int bottom_offset = 2 + static_input_;
+    for (int i = bottom_offset, j = 0; i < bottom.size(); ++i, ++j) {
+      CHECK(recur_input_blobs_[j]->shape() == bottom[i]->shape())
+          << "bottom[" << i << "] shape must match hidden state input shape: "
+          << recur_input_blobs_[j]->shape_string();
+      recur_input_blobs_[j]->ShareData(*bottom[i]);
+    }
+  }
+  for (int i = 0; i < output_blobs_.size(); ++i) {
+    top[i]->ReshapeLike(*output_blobs_[i]);
+    top[i]->ShareData(*output_blobs_[i]);
+    top[i]->ShareDiff(*output_blobs_[i]);
+  }
+  if (expose_hidden_) {
+    const int top_offset = output_blobs_.size();
+    for (int i = top_offset, j = 0; i < top.size(); ++i, ++j) {
+      top[i]->ReshapeLike(*recur_output_blobs_[j]);
+    }
+  }
+}
+
+template <typename Dtype>
+void RecurrentLayer<Dtype>::Reset() {
+  // "Reset" the hidden state of the net by zeroing out all recurrent outputs.
+  for (int i = 0; i < recur_output_blobs_.size(); ++i) {
+    caffe_set(recur_output_blobs_[i]->count(), Dtype(0),
+              recur_output_blobs_[i]->mutable_cpu_data());
+  }
+}
+
+template <typename Dtype>
+void RecurrentLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+    const vector<Blob<Dtype>*>& top) {
+  // Hacky fix for test time: reshare all the internal shared blobs, which may
+  // currently point to a stale owner blob that was dropped when Solver::Test
+  // called test_net->ShareTrainedLayersWith(net_.get()).
+  // TODO: somehow make this work non-hackily.
+  if (this->phase_ == TEST) {
+    unrolled_net_->ShareWeights();
+  }
+
+  DCHECK_EQ(recur_input_blobs_.size(), recur_output_blobs_.size());
+  if (!expose_hidden_) {
+    for (int i = 0; i < recur_input_blobs_.size(); ++i) {
+      const int count = recur_input_blobs_[i]->count();
+      DCHECK_EQ(count, recur_output_blobs_[i]->count());
+      const Dtype* timestep_T_data = recur_output_blobs_[i]->cpu_data();
+      Dtype* timestep_0_data = recur_input_blobs_[i]->mutable_cpu_data();
+      caffe_copy(count, timestep_T_data, timestep_0_data);
+    }
+  }
+
+  unrolled_net_->ForwardTo(last_layer_index_);
+
+  if (expose_hidden_) {
+    const int top_offset = output_blobs_.size();
+    for (int i = top_offset, j = 0; i < top.size(); ++i, ++j) {
+      top[i]->ShareData(*recur_output_blobs_[j]);
+    }
+  }
+}
+
+template <typename Dtype>
+void RecurrentLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  CHECK(!propagate_down[1]) << "Cannot backpropagate to sequence indicators.";
+
+  // TODO: skip backpropagation to inputs and parameters inside the unrolled
+  // net according to propagate_down[0] and propagate_down[2]. For now just
+  // backprop to inputs and parameters unconditionally, as either the inputs or
+  // the parameters do need backward (or Net would have set
+  // layer_needs_backward_[i] == false for this layer).
+  unrolled_net_->BackwardFrom(last_layer_index_);
+}
+
+#ifdef CPU_ONLY
+STUB_GPU_FORWARD(RecurrentLayer, Forward);
+#endif
+
+INSTANTIATE_CLASS(RecurrentLayer);
+
+}  // namespace caffe
diff --git a/src/caffe/layers/recurrent_layer.cu b/src/caffe/layers/recurrent_layer.cu
new file mode 100644
index 00000000000..4dd2b0e2165
--- /dev/null
+++ b/src/caffe/layers/recurrent_layer.cu
@@ -0,0 +1,44 @@
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/layers/recurrent_layer.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void RecurrentLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+    const vector<Blob<Dtype>*>& top) {
+  // Hacky fix for test time... reshare all the shared blobs.
+  // TODO: somehow make this work non-hackily.
+  if (this->phase_ == TEST) {
+    unrolled_net_->ShareWeights();
+  }
+
+  DCHECK_EQ(recur_input_blobs_.size(), recur_output_blobs_.size());
+  if (!expose_hidden_) {
+    for (int i = 0; i < recur_input_blobs_.size(); ++i) {
+      const int count = recur_input_blobs_[i]->count();
+      DCHECK_EQ(count, recur_output_blobs_[i]->count());
+      const Dtype* timestep_T_data = recur_output_blobs_[i]->gpu_data();
+      Dtype* timestep_0_data = recur_input_blobs_[i]->mutable_gpu_data();
+      caffe_copy(count, timestep_T_data, timestep_0_data);
+    }
+  }
+
+  unrolled_net_->ForwardTo(last_layer_index_);
+
+  if (expose_hidden_) {
+    const int top_offset = output_blobs_.size();
+    for (int i = top_offset, j = 0; i < top.size(); ++i, ++j) {
+      top[i]->ShareData(*recur_output_blobs_[j]);
+    }
+  }
+}
+
+INSTANTIATE_LAYER_GPU_FORWARD(RecurrentLayer);
+
+}  // namespace caffe
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 15810718631..1556781cbc2 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -306,7 +306,7 @@ message ParamSpec {
 // NOTE
 // Update the next available ID when you add a new LayerParameter field.
 //
-// LayerParameter next available layer-specific ID: 146 (last added: parameter_param)
+// LayerParameter next available layer-specific ID: 147 (last added: recurrent_param)
 message LayerParameter {
   optional string name = 1; // the layer name
   optional string type = 2; // the layer type
@@ -390,6 +390,7 @@ message LayerParameter {
   optional PowerParameter power_param = 122;
   optional PReLUParameter prelu_param = 131;
   optional PythonParameter python_param = 130;
+  optional RecurrentParameter recurrent_param = 146;
   optional ReductionParameter reduction_param = 136;
   optional ReLUParameter relu_param = 123;
   optional ReshapeParameter reshape_param = 133;
@@ -928,6 +929,25 @@ message PythonParameter {
   optional bool share_in_parallel = 4 [default = false];
 }
 
+// Message that stores parameters used by RecurrentLayer
+message RecurrentParameter {
+  // The dimension of the output (and usually hidden state) representation --
+  // must be explicitly set to non-zero.
+  optional uint32 num_output = 1 [default = 0];
+
+  optional FillerParameter weight_filler = 2; // The filler for the weight
+  optional FillerParameter bias_filler = 3; // The filler for the bias
+
+  // Whether to enable displaying debug_info in the unrolled recurrent net.
+  optional bool debug_info = 4 [default = false];
+
+  // Whether to add as additional inputs (bottoms) the initial hidden state
+  // blobs, and add as additional outputs (tops) the final timestep hidden state
+  // blobs.  The number of additional bottom/top blobs required depends on the
+  // recurrent architecture -- e.g., 1 for RNNs, 2 for LSTMs.
+  optional bool expose_hidden = 5 [default = false];
+}
+
 // Message that stores parameters used by ReductionLayer
 message ReductionParameter {
   enum ReductionOp {

From cf5f369574dd51045c1c92625c0fe6694a031f2a Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Sun, 15 Feb 2015 14:56:50 -0800
Subject: [PATCH 073/264] Add RNNLayer, with tests

---
 include/caffe/layers/rnn_layer.hpp |  47 ++++++
 src/caffe/layers/rnn_layer.cpp     | 236 +++++++++++++++++++++++++++++
 src/caffe/test/test_rnn_layer.cpp  | 217 ++++++++++++++++++++++++++
 3 files changed, 500 insertions(+)
 create mode 100644 include/caffe/layers/rnn_layer.hpp
 create mode 100644 src/caffe/layers/rnn_layer.cpp
 create mode 100644 src/caffe/test/test_rnn_layer.cpp

diff --git a/include/caffe/layers/rnn_layer.hpp b/include/caffe/layers/rnn_layer.hpp
new file mode 100644
index 00000000000..6dce238ae17
--- /dev/null
+++ b/include/caffe/layers/rnn_layer.hpp
@@ -0,0 +1,47 @@
+#ifndef CAFFE_RNN_LAYER_HPP_
+#define CAFFE_RNN_LAYER_HPP_
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/layers/recurrent_layer.hpp"
+#include "caffe/net.hpp"
+#include "caffe/proto/caffe.pb.h"
+
+namespace caffe {
+
+template <typename Dtype> class RecurrentLayer;
+
+/**
+ * @brief Processes time-varying inputs using a simple recurrent neural network
+ *        (RNN). Implemented as a network unrolling the RNN computation in time.
+ *
+ * Given time-varying inputs @f$ x_t @f$, computes hidden state @f$
+ *     h_t := \tanh[ W_{hh} h_{t_1} + W_{xh} x_t + b_h ]
+ * @f$, and outputs @f$
+ *     o_t := \tanh[ W_{ho} h_t + b_o ]
+ * @f$.
+ */
+template <typename Dtype>
+class RNNLayer : public RecurrentLayer<Dtype> {
+ public:
+  explicit RNNLayer(const LayerParameter& param)
+      : RecurrentLayer<Dtype>(param) {}
+
+  virtual inline const char* type() const { return "RNN"; }
+
+ protected:
+  virtual void FillUnrolledNet(NetParameter* net_param) const;
+  virtual void RecurrentInputBlobNames(vector<string>* names) const;
+  virtual void RecurrentOutputBlobNames(vector<string>* names) const;
+  virtual void RecurrentInputShapes(vector<BlobShape>* shapes) const;
+  virtual void OutputBlobNames(vector<string>* names) const;
+};
+
+}  // namespace caffe
+
+#endif  // CAFFE_RNN_LAYER_HPP_
diff --git a/src/caffe/layers/rnn_layer.cpp b/src/caffe/layers/rnn_layer.cpp
new file mode 100644
index 00000000000..f62ae8c77de
--- /dev/null
+++ b/src/caffe/layers/rnn_layer.cpp
@@ -0,0 +1,236 @@
+#include <string>
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/layers/rnn_layer.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void RNNLayer<Dtype>::RecurrentInputBlobNames(vector<string>* names) const {
+  names->resize(1);
+  (*names)[0] = "h_0";
+}
+
+template <typename Dtype>
+void RNNLayer<Dtype>::RecurrentOutputBlobNames(vector<string>* names) const {
+  names->resize(1);
+  (*names)[0] = "h_" + format_int(this->T_);
+}
+
+template <typename Dtype>
+void RNNLayer<Dtype>::RecurrentInputShapes(vector<BlobShape>* shapes) const {
+  const int num_output = this->layer_param_.recurrent_param().num_output();
+  shapes->resize(1);
+  (*shapes)[0].Clear();
+  (*shapes)[0].add_dim(1);  // a single timestep
+  (*shapes)[0].add_dim(this->N_);
+  (*shapes)[0].add_dim(num_output);
+}
+
+template <typename Dtype>
+void RNNLayer<Dtype>::OutputBlobNames(vector<string>* names) const {
+  names->resize(1);
+  (*names)[0] = "o";
+}
+
+template <typename Dtype>
+void RNNLayer<Dtype>::FillUnrolledNet(NetParameter* net_param) const {
+  const int num_output = this->layer_param_.recurrent_param().num_output();
+  CHECK_GT(num_output, 0) << "num_output must be positive";
+  const FillerParameter& weight_filler =
+      this->layer_param_.recurrent_param().weight_filler();
+  const FillerParameter& bias_filler =
+      this->layer_param_.recurrent_param().bias_filler();
+
+  // Add generic LayerParameter's (without bottoms/tops) of layer types we'll
+  // use to save redundant code.
+  LayerParameter hidden_param;
+  hidden_param.set_type("InnerProduct");
+  hidden_param.mutable_inner_product_param()->set_num_output(num_output);
+  hidden_param.mutable_inner_product_param()->set_bias_term(false);
+  hidden_param.mutable_inner_product_param()->set_axis(2);
+  hidden_param.mutable_inner_product_param()->
+      mutable_weight_filler()->CopyFrom(weight_filler);
+
+  LayerParameter biased_hidden_param(hidden_param);
+  biased_hidden_param.mutable_inner_product_param()->set_bias_term(true);
+  biased_hidden_param.mutable_inner_product_param()->
+      mutable_bias_filler()->CopyFrom(bias_filler);
+
+  LayerParameter sum_param;
+  sum_param.set_type("Eltwise");
+  sum_param.mutable_eltwise_param()->set_operation(
+      EltwiseParameter_EltwiseOp_SUM);
+
+  LayerParameter tanh_param;
+  tanh_param.set_type("TanH");
+
+  LayerParameter scale_param;
+  scale_param.set_type("Scale");
+  scale_param.mutable_scale_param()->set_axis(0);
+
+  LayerParameter slice_param;
+  slice_param.set_type("Slice");
+  slice_param.mutable_slice_param()->set_axis(0);
+
+  vector<BlobShape> input_shapes;
+  RecurrentInputShapes(&input_shapes);
+  CHECK_EQ(1, input_shapes.size());
+
+  LayerParameter* input_layer_param = net_param->add_layer();
+  input_layer_param->set_type("Input");
+  InputParameter* input_param = input_layer_param->mutable_input_param();
+  input_layer_param->add_top("h_0");
+  input_param->add_shape()->CopyFrom(input_shapes[0]);
+
+  LayerParameter* cont_slice_param = net_param->add_layer();
+  cont_slice_param->CopyFrom(slice_param);
+  cont_slice_param->set_name("cont_slice");
+  cont_slice_param->add_bottom("cont");
+  cont_slice_param->mutable_slice_param()->set_axis(0);
+
+  // Add layer to transform all timesteps of x to the hidden state dimension.
+  //     W_xh_x = W_xh * x + b_h
+  {
+    LayerParameter* x_transform_param = net_param->add_layer();
+    x_transform_param->CopyFrom(biased_hidden_param);
+    x_transform_param->set_name("x_transform");
+    x_transform_param->add_param()->set_name("W_xh");
+    x_transform_param->add_param()->set_name("b_h");
+    x_transform_param->add_bottom("x");
+    x_transform_param->add_top("W_xh_x");
+    x_transform_param->add_propagate_down(true);
+  }
+
+  if (this->static_input_) {
+    // Add layer to transform x_static to the hidden state dimension.
+    //     W_xh_x_static = W_xh_static * x_static
+    LayerParameter* x_static_transform_param = net_param->add_layer();
+    x_static_transform_param->CopyFrom(hidden_param);
+    x_static_transform_param->mutable_inner_product_param()->set_axis(1);
+    x_static_transform_param->set_name("W_xh_x_static");
+    x_static_transform_param->add_param()->set_name("W_xh_static");
+    x_static_transform_param->add_bottom("x_static");
+    x_static_transform_param->add_top("W_xh_x_static_preshape");
+    x_static_transform_param->add_propagate_down(true);
+
+    LayerParameter* reshape_param = net_param->add_layer();
+    reshape_param->set_type("Reshape");
+    BlobShape* new_shape =
+         reshape_param->mutable_reshape_param()->mutable_shape();
+    new_shape->add_dim(1);  // One timestep.
+    // Should infer this->N as the dimension so we can reshape on batch size.
+    new_shape->add_dim(-1);
+    new_shape->add_dim(
+        x_static_transform_param->inner_product_param().num_output());
+    reshape_param->set_name("W_xh_x_static_reshape");
+    reshape_param->add_bottom("W_xh_x_static_preshape");
+    reshape_param->add_top("W_xh_x_static");
+  }
+
+  LayerParameter* x_slice_param = net_param->add_layer();
+  x_slice_param->CopyFrom(slice_param);
+  x_slice_param->set_name("W_xh_x_slice");
+  x_slice_param->add_bottom("W_xh_x");
+
+  LayerParameter output_concat_layer;
+  output_concat_layer.set_name("o_concat");
+  output_concat_layer.set_type("Concat");
+  output_concat_layer.add_top("o");
+  output_concat_layer.mutable_concat_param()->set_axis(0);
+
+  for (int t = 1; t <= this->T_; ++t) {
+    string tm1s = format_int(t - 1);
+    string ts = format_int(t);
+
+    cont_slice_param->add_top("cont_" + ts);
+    x_slice_param->add_top("W_xh_x_" + ts);
+
+    // Add layer to flush the hidden state when beginning a new sequence,
+    // as indicated by cont_t.
+    //     h_conted_{t-1} := cont_t * h_{t-1}
+    //
+    // Normally, cont_t is binary (i.e., 0 or 1), so:
+    //     h_conted_{t-1} := h_{t-1} if cont_t == 1
+    //                       0   otherwise
+    {
+      LayerParameter* cont_h_param = net_param->add_layer();
+      cont_h_param->CopyFrom(scale_param);
+      cont_h_param->set_name("h_conted_" + tm1s);
+      cont_h_param->add_bottom("h_" + tm1s);
+      cont_h_param->add_bottom("cont_" + ts);
+      cont_h_param->add_top("h_conted_" + tm1s);
+    }
+
+    // Add layer to compute
+    //     W_hh_h_{t-1} := W_hh * h_conted_{t-1}
+    {
+      LayerParameter* w_param = net_param->add_layer();
+      w_param->CopyFrom(hidden_param);
+      w_param->set_name("W_hh_h_" + tm1s);
+      w_param->add_param()->set_name("W_hh");
+      w_param->add_bottom("h_conted_" + tm1s);
+      w_param->add_top("W_hh_h_" + tm1s);
+      w_param->mutable_inner_product_param()->set_axis(2);
+    }
+
+    // Add layers to compute
+    //     h_t := \tanh( W_hh * h_conted_{t-1} + W_xh * x_t + b_h )
+    //          = \tanh( W_hh_h_{t-1} + W_xh_t )
+    {
+      LayerParameter* h_input_sum_param = net_param->add_layer();
+      h_input_sum_param->CopyFrom(sum_param);
+      h_input_sum_param->set_name("h_input_sum_" + ts);
+      h_input_sum_param->add_bottom("W_hh_h_" + tm1s);
+      h_input_sum_param->add_bottom("W_xh_x_" + ts);
+      if (this->static_input_) {
+        h_input_sum_param->add_bottom("W_xh_x_static");
+      }
+      h_input_sum_param->add_top("h_neuron_input_" + ts);
+    }
+    {
+      LayerParameter* h_neuron_param = net_param->add_layer();
+      h_neuron_param->CopyFrom(tanh_param);
+      h_neuron_param->set_name("h_neuron_" + ts);
+      h_neuron_param->add_bottom("h_neuron_input_" + ts);
+      h_neuron_param->add_top("h_" + ts);
+    }
+
+    // Add layer to compute
+    //     W_ho_h_t := W_ho * h_t + b_o
+    {
+      LayerParameter* w_param = net_param->add_layer();
+      w_param->CopyFrom(biased_hidden_param);
+      w_param->set_name("W_ho_h_" + ts);
+      w_param->add_param()->set_name("W_ho");
+      w_param->add_param()->set_name("b_o");
+      w_param->add_bottom("h_" + ts);
+      w_param->add_top("W_ho_h_" + ts);
+      w_param->mutable_inner_product_param()->set_axis(2);
+    }
+
+    // Add layers to compute
+    //     o_t := \tanh( W_ho h_t + b_o)
+    //          = \tanh( W_ho_h_t )
+    {
+      LayerParameter* o_neuron_param = net_param->add_layer();
+      o_neuron_param->CopyFrom(tanh_param);
+      o_neuron_param->set_name("o_neuron_" + ts);
+      o_neuron_param->add_bottom("W_ho_h_" + ts);
+      o_neuron_param->add_top("o_" + ts);
+    }
+    output_concat_layer.add_bottom("o_" + ts);
+  }  // for (int t = 1; t <= this->T_; ++t)
+
+  net_param->add_layer()->CopyFrom(output_concat_layer);
+}
+
+INSTANTIATE_CLASS(RNNLayer);
+REGISTER_LAYER_CLASS(RNN);
+
+}  // namespace caffe
diff --git a/src/caffe/test/test_rnn_layer.cpp b/src/caffe/test/test_rnn_layer.cpp
new file mode 100644
index 00000000000..dd8952d62d6
--- /dev/null
+++ b/src/caffe/test/test_rnn_layer.cpp
@@ -0,0 +1,217 @@
+#include <cstring>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/layers/rnn_layer.hpp"
+
+#include "caffe/test/test_caffe_main.hpp"
+#include "caffe/test/test_gradient_check_util.hpp"
+
+namespace caffe {
+
+template <typename TypeParam>
+class RNNLayerTest : public MultiDeviceTest<TypeParam> {
+  typedef typename TypeParam::Dtype Dtype;
+
+ protected:
+  RNNLayerTest() : num_output_(7) {
+    blob_bottom_vec_.push_back(&blob_bottom_);
+    blob_bottom_vec_.push_back(&blob_bottom_cont_);
+    blob_top_vec_.push_back(&blob_top_);
+
+    ReshapeBlobs(1, 3);
+
+    layer_param_.mutable_recurrent_param()->set_num_output(num_output_);
+    FillerParameter* weight_filler =
+        layer_param_.mutable_recurrent_param()->mutable_weight_filler();
+    weight_filler->set_type("gaussian");
+    weight_filler->set_std(0.2);
+    FillerParameter* bias_filler =
+        layer_param_.mutable_recurrent_param()->mutable_bias_filler();
+    bias_filler->set_type("gaussian");
+    bias_filler->set_std(0.1);
+
+    layer_param_.set_phase(TEST);
+  }
+
+  void ReshapeBlobs(int num_timesteps, int num_instances) {
+    blob_bottom_.Reshape(num_timesteps, num_instances, 3, 2);
+    blob_bottom_static_.Reshape(num_instances, 2, 3, 4);
+    vector<int> shape(2);
+    shape[0] = num_timesteps;
+    shape[1] = num_instances;
+    blob_bottom_cont_.Reshape(shape);
+
+    FillerParameter filler_param;
+    filler_param.set_min(-1);
+    filler_param.set_max(1);
+    UniformFiller<Dtype> filler(filler_param);
+    filler.Fill(&blob_bottom_);
+  }
+
+  int num_output_;
+  LayerParameter layer_param_;
+  Blob<Dtype> blob_bottom_;
+  Blob<Dtype> blob_bottom_cont_;
+  Blob<Dtype> blob_bottom_static_;
+  Blob<Dtype> blob_top_;
+  vector<Blob<Dtype>*> blob_bottom_vec_;
+  vector<Blob<Dtype>*> blob_top_vec_;
+};
+
+TYPED_TEST_CASE(RNNLayerTest, TestDtypesAndDevices);
+
+TYPED_TEST(RNNLayerTest, TestSetUp) {
+  typedef typename TypeParam::Dtype Dtype;
+  RNNLayer<Dtype> layer(this->layer_param_);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  vector<int> expected_top_shape = this->blob_bottom_.shape();
+  expected_top_shape.resize(3);
+  expected_top_shape[2] = this->num_output_;
+  EXPECT_TRUE(this->blob_top_.shape() == expected_top_shape);
+}
+
+TYPED_TEST(RNNLayerTest, TestForward) {
+  typedef typename TypeParam::Dtype Dtype;
+  const int kNumTimesteps = 3;
+  const int num = this->blob_bottom_.shape(1);
+  this->ReshapeBlobs(kNumTimesteps, num);
+
+  // Fill the cont blob with <0, 1, 1, ..., 1>,
+  // indicating a sequence that begins at the first timestep
+  // then continues for the rest of the sequence.
+  for (int t = 0; t < kNumTimesteps; ++t) {
+    for (int n = 0; n < num; ++n) {
+      this->blob_bottom_cont_.mutable_cpu_data()[t * num + n] = t > 0;
+    }
+  }
+
+  // Process the full sequence in a single batch.
+  FillerParameter filler_param;
+  filler_param.set_mean(0);
+  filler_param.set_std(1);
+  GaussianFiller<Dtype> sequence_filler(filler_param);
+  sequence_filler.Fill(&this->blob_bottom_);
+  shared_ptr<RNNLayer<Dtype> > layer(new RNNLayer<Dtype>(this->layer_param_));
+  Caffe::set_random_seed(1701);
+  layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  LOG(INFO) << "Calling forward for full sequence RNN";
+  layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+
+  // Copy the inputs and outputs to reuse/check them later.
+  Blob<Dtype> bottom_copy(this->blob_bottom_.shape());
+  bottom_copy.CopyFrom(this->blob_bottom_);
+  Blob<Dtype> top_copy(this->blob_top_.shape());
+  top_copy.CopyFrom(this->blob_top_);
+
+  // Process the batch one timestep at a time;
+  // check that we get the same result.
+  this->ReshapeBlobs(1, num);
+  layer.reset(new RNNLayer<Dtype>(this->layer_param_));
+  Caffe::set_random_seed(1701);
+  layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  const int bottom_count = this->blob_bottom_.count();
+  const int top_count = this->blob_top_.count();
+  const Dtype kEpsilon = 1e-5;
+  for (int t = 0; t < kNumTimesteps; ++t) {
+    caffe_copy(bottom_count, bottom_copy.cpu_data() + t * bottom_count,
+               this->blob_bottom_.mutable_cpu_data());
+    for (int n = 0; n < num; ++n) {
+      this->blob_bottom_cont_.mutable_cpu_data()[n] = t > 0;
+    }
+    LOG(INFO) << "Calling forward for RNN timestep " << t;
+    layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+    for (int i = 0; i < top_count; ++i) {
+      ASSERT_LT(t * top_count + i, top_copy.count());
+      EXPECT_NEAR(this->blob_top_.cpu_data()[i],
+                  top_copy.cpu_data()[t * top_count + i], kEpsilon)
+         << "t = " << t << "; i = " << i;
+    }
+  }
+
+  // Process the batch one timestep at a time with all cont blobs set to 0.
+  // Check that we get a different result, except in the first timestep.
+  Caffe::set_random_seed(1701);
+  layer.reset(new RNNLayer<Dtype>(this->layer_param_));
+  layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  for (int t = 0; t < kNumTimesteps; ++t) {
+    caffe_copy(bottom_count, bottom_copy.cpu_data() + t * bottom_count,
+               this->blob_bottom_.mutable_cpu_data());
+    for (int n = 0; n < num; ++n) {
+      this->blob_bottom_cont_.mutable_cpu_data()[n] = 0;
+    }
+    LOG(INFO) << "Calling forward for RNN timestep " << t;
+    layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+    for (int i = 0; i < top_count; ++i) {
+      if (t == 0) {
+        EXPECT_NEAR(this->blob_top_.cpu_data()[i],
+                    top_copy.cpu_data()[t * top_count + i], kEpsilon)
+           << "t = " << t << "; i = " << i;
+      } else {
+        EXPECT_NE(this->blob_top_.cpu_data()[i],
+                  top_copy.cpu_data()[t * top_count + i])
+           << "t = " << t << "; i = " << i;
+      }
+    }
+  }
+}
+
+TYPED_TEST(RNNLayerTest, TestGradient) {
+  typedef typename TypeParam::Dtype Dtype;
+  RNNLayer<Dtype> layer(this->layer_param_);
+  GradientChecker<Dtype> checker(1e-2, 1e-3);
+  checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+      this->blob_top_vec_, 0);
+}
+
+TYPED_TEST(RNNLayerTest, TestGradientNonZeroCont) {
+  typedef typename TypeParam::Dtype Dtype;
+  RNNLayer<Dtype> layer(this->layer_param_);
+  GradientChecker<Dtype> checker(1e-2, 1e-3);
+  for (int i = 0; i < this->blob_bottom_cont_.count(); ++i) {
+    this->blob_bottom_cont_.mutable_cpu_data()[i] = i > 2;
+  }
+  checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+      this->blob_top_vec_, 0);
+}
+
+TYPED_TEST(RNNLayerTest, TestGradientNonZeroContBufferSize2) {
+  typedef typename TypeParam::Dtype Dtype;
+  this->ReshapeBlobs(2, 2);
+  // fill the values
+  FillerParameter filler_param;
+  UniformFiller<Dtype> filler(filler_param);
+  filler.Fill(&this->blob_bottom_);
+  RNNLayer<Dtype> layer(this->layer_param_);
+  GradientChecker<Dtype> checker(1e-2, 1e-3);
+  for (int i = 0; i < this->blob_bottom_cont_.count(); ++i) {
+    this->blob_bottom_cont_.mutable_cpu_data()[i] = i > 2;
+  }
+  checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+      this->blob_top_vec_, 0);
+}
+
+TYPED_TEST(RNNLayerTest, TestGradientNonZeroContBufferSize2WithStaticInput) {
+  typedef typename TypeParam::Dtype Dtype;
+  this->ReshapeBlobs(2, 2);
+  FillerParameter filler_param;
+  UniformFiller<Dtype> filler(filler_param);
+  filler.Fill(&this->blob_bottom_);
+  filler.Fill(&this->blob_bottom_static_);
+  this->blob_bottom_vec_.push_back(&this->blob_bottom_static_);
+  RNNLayer<Dtype> layer(this->layer_param_);
+  GradientChecker<Dtype> checker(1e-2, 1e-3);
+  for (int i = 0; i < this->blob_bottom_cont_.count(); ++i) {
+    this->blob_bottom_cont_.mutable_cpu_data()[i] = i > 2;
+  }
+  checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+      this->blob_top_vec_, 0);
+  checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+      this->blob_top_vec_, 2);
+}
+
+}  // namespace caffe

From 51a68f0a0e9e376597d7cabae709ff969ad30c98 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Tue, 5 Apr 2016 09:56:04 -0700
Subject: [PATCH 074/264] Add LSTMLayer and LSTMUnitLayer, with tests

---
 include/caffe/layers/lstm_layer.hpp  | 154 ++++++++++++++
 src/caffe/layers/lstm_layer.cpp      | 244 +++++++++++++++++++++++
 src/caffe/layers/lstm_unit_layer.cpp | 131 ++++++++++++
 src/caffe/layers/lstm_unit_layer.cu  | 154 ++++++++++++++
 src/caffe/test/test_lstm_layer.cpp   | 288 +++++++++++++++++++++++++++
 5 files changed, 971 insertions(+)
 create mode 100644 include/caffe/layers/lstm_layer.hpp
 create mode 100644 src/caffe/layers/lstm_layer.cpp
 create mode 100644 src/caffe/layers/lstm_unit_layer.cpp
 create mode 100644 src/caffe/layers/lstm_unit_layer.cu
 create mode 100644 src/caffe/test/test_lstm_layer.cpp

diff --git a/include/caffe/layers/lstm_layer.hpp b/include/caffe/layers/lstm_layer.hpp
new file mode 100644
index 00000000000..a0e67c9d432
--- /dev/null
+++ b/include/caffe/layers/lstm_layer.hpp
@@ -0,0 +1,154 @@
+#ifndef CAFFE_LSTM_LAYER_HPP_
+#define CAFFE_LSTM_LAYER_HPP_
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/layers/recurrent_layer.hpp"
+#include "caffe/net.hpp"
+#include "caffe/proto/caffe.pb.h"
+
+namespace caffe {
+
+template <typename Dtype> class RecurrentLayer;
+
+/**
+ * @brief Processes sequential inputs using a "Long Short-Term Memory" (LSTM)
+ *        [1] style recurrent neural network (RNN). Implemented by unrolling
+ *        the LSTM computation through time.
+ *
+ * The specific architecture used in this implementation is as described in
+ * "Learning to Execute" [2], reproduced below:
+ *     i_t := \sigmoid[ W_{hi} * h_{t-1} + W_{xi} * x_t + b_i ]
+ *     f_t := \sigmoid[ W_{hf} * h_{t-1} + W_{xf} * x_t + b_f ]
+ *     o_t := \sigmoid[ W_{ho} * h_{t-1} + W_{xo} * x_t + b_o ]
+ *     g_t :=    \tanh[ W_{hg} * h_{t-1} + W_{xg} * x_t + b_g ]
+ *     c_t := (f_t .* c_{t-1}) + (i_t .* g_t)
+ *     h_t := o_t .* \tanh[c_t]
+ * In the implementation, the i, f, o, and g computations are performed as a
+ * single inner product.
+ *
+ * Notably, this implementation lacks the "diagonal" gates, as used in the
+ * LSTM architectures described by Alex Graves [3] and others.
+ *
+ * [1] Hochreiter, Sepp, and Schmidhuber, Jürgen. "Long short-term memory."
+ *     Neural Computation 9, no. 8 (1997): 1735-1780.
+ *
+ * [2] Zaremba, Wojciech, and Sutskever, Ilya. "Learning to execute."
+ *     arXiv preprint arXiv:1410.4615 (2014).
+ *
+ * [3] Graves, Alex. "Generating sequences with recurrent neural networks."
+ *     arXiv preprint arXiv:1308.0850 (2013).
+ */
+template <typename Dtype>
+class LSTMLayer : public RecurrentLayer<Dtype> {
+ public:
+  explicit LSTMLayer(const LayerParameter& param)
+      : RecurrentLayer<Dtype>(param) {}
+
+  virtual inline const char* type() const { return "LSTM"; }
+
+ protected:
+  virtual void FillUnrolledNet(NetParameter* net_param) const;
+  virtual void RecurrentInputBlobNames(vector<string>* names) const;
+  virtual void RecurrentOutputBlobNames(vector<string>* names) const;
+  virtual void RecurrentInputShapes(vector<BlobShape>* shapes) const;
+  virtual void OutputBlobNames(vector<string>* names) const;
+};
+
+/**
+ * @brief A helper for LSTMLayer: computes a single timestep of the
+ *        non-linearity of the LSTM, producing the updated cell and hidden
+ *        states.
+ */
+template <typename Dtype>
+class LSTMUnitLayer : public Layer<Dtype> {
+ public:
+  explicit LSTMUnitLayer(const LayerParameter& param)
+      : Layer<Dtype>(param) {}
+  virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+
+  virtual inline const char* type() const { return "LSTMUnit"; }
+  virtual inline int ExactNumBottomBlobs() const { return 3; }
+  virtual inline int ExactNumTopBlobs() const { return 2; }
+
+  virtual inline bool AllowForceBackward(const int bottom_index) const {
+    // Can't propagate to sequence continuation indicators.
+    return bottom_index != 2;
+  }
+
+ protected:
+  /**
+   * @param bottom input Blob vector (length 3)
+   *   -# @f$ (1 \times N \times D) @f$
+   *      the previous timestep cell state @f$ c_{t-1} @f$
+   *   -# @f$ (1 \times N \times 4D) @f$
+   *      the "gate inputs" @f$ [i_t', f_t', o_t', g_t'] @f$
+   *   -# @f$ (1 \times N) @f$
+   *      the sequence continuation indicators  @f$ \delta_t @f$
+   * @param top output Blob vector (length 2)
+   *   -# @f$ (1 \times N \times D) @f$
+   *      the updated cell state @f$ c_t @f$, computed as:
+   *          i_t := \sigmoid[i_t']
+   *          f_t := \sigmoid[f_t']
+   *          o_t := \sigmoid[o_t']
+   *          g_t := \tanh[g_t']
+   *          c_t := cont_t * (f_t .* c_{t-1}) + (i_t .* g_t)
+   *   -# @f$ (1 \times N \times D) @f$
+   *      the updated hidden state @f$ h_t @f$, computed as:
+   *          h_t := o_t .* \tanh[c_t]
+   */
+  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
+
+  /**
+   * @brief Computes the error gradient w.r.t. the LSTMUnit inputs.
+   *
+   * @param top output Blob vector (length 2), providing the error gradient with
+   *        respect to the outputs
+   *   -# @f$ (1 \times N \times D) @f$:
+   *      containing error gradients @f$ \frac{\partial E}{\partial c_t} @f$
+   *      with respect to the updated cell state @f$ c_t @f$
+   *   -# @f$ (1 \times N \times D) @f$:
+   *      containing error gradients @f$ \frac{\partial E}{\partial h_t} @f$
+   *      with respect to the updated cell state @f$ h_t @f$
+   * @param propagate_down see Layer::Backward.
+   * @param bottom input Blob vector (length 3), into which the error gradients
+   *        with respect to the LSTMUnit inputs @f$ c_{t-1} @f$ and the gate
+   *        inputs are computed.  Computatation of the error gradients w.r.t.
+   *        the sequence indicators is not implemented.
+   *   -# @f$ (1 \times N \times D) @f$
+   *      the error gradient w.r.t. the previous timestep cell state
+   *      @f$ c_{t-1} @f$
+   *   -# @f$ (1 \times N \times 4D) @f$
+   *      the error gradient w.r.t. the "gate inputs"
+   *      @f$ [
+   *          \frac{\partial E}{\partial i_t}
+   *          \frac{\partial E}{\partial f_t}
+   *          \frac{\partial E}{\partial o_t}
+   *          \frac{\partial E}{\partial g_t}
+   *          ] @f$
+   *   -# @f$ (1 \times 1 \times N) @f$
+   *      the gradient w.r.t. the sequence continuation indicators
+   *      @f$ \delta_t @f$ is currently not computed.
+   */
+  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+  /// @brief The hidden and output dimension.
+  int hidden_dim_;
+  Blob<Dtype> X_acts_;
+};
+
+}  // namespace caffe
+
+#endif  // CAFFE_LSTM_LAYER_HPP_
diff --git a/src/caffe/layers/lstm_layer.cpp b/src/caffe/layers/lstm_layer.cpp
new file mode 100644
index 00000000000..da48dba4c05
--- /dev/null
+++ b/src/caffe/layers/lstm_layer.cpp
@@ -0,0 +1,244 @@
+#include <string>
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/layers/lstm_layer.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void LSTMLayer<Dtype>::RecurrentInputBlobNames(vector<string>* names) const {
+  names->resize(2);
+  (*names)[0] = "h_0";
+  (*names)[1] = "c_0";
+}
+
+template <typename Dtype>
+void LSTMLayer<Dtype>::RecurrentOutputBlobNames(vector<string>* names) const {
+  names->resize(2);
+  (*names)[0] = "h_" + format_int(this->T_);
+  (*names)[1] = "c_T";
+}
+
+template <typename Dtype>
+void LSTMLayer<Dtype>::RecurrentInputShapes(vector<BlobShape>* shapes) const {
+  const int num_output = this->layer_param_.recurrent_param().num_output();
+  const int num_blobs = 2;
+  shapes->resize(num_blobs);
+  for (int i = 0; i < num_blobs; ++i) {
+    (*shapes)[i].Clear();
+    (*shapes)[i].add_dim(1);  // a single timestep
+    (*shapes)[i].add_dim(this->N_);
+    (*shapes)[i].add_dim(num_output);
+  }
+}
+
+template <typename Dtype>
+void LSTMLayer<Dtype>::OutputBlobNames(vector<string>* names) const {
+  names->resize(1);
+  (*names)[0] = "h";
+}
+
+template <typename Dtype>
+void LSTMLayer<Dtype>::FillUnrolledNet(NetParameter* net_param) const {
+  const int num_output = this->layer_param_.recurrent_param().num_output();
+  CHECK_GT(num_output, 0) << "num_output must be positive";
+  const FillerParameter& weight_filler =
+      this->layer_param_.recurrent_param().weight_filler();
+  const FillerParameter& bias_filler =
+      this->layer_param_.recurrent_param().bias_filler();
+
+  // Add generic LayerParameter's (without bottoms/tops) of layer types we'll
+  // use to save redundant code.
+  LayerParameter hidden_param;
+  hidden_param.set_type("InnerProduct");
+  hidden_param.mutable_inner_product_param()->set_num_output(num_output * 4);
+  hidden_param.mutable_inner_product_param()->set_bias_term(false);
+  hidden_param.mutable_inner_product_param()->set_axis(2);
+  hidden_param.mutable_inner_product_param()->
+      mutable_weight_filler()->CopyFrom(weight_filler);
+
+  LayerParameter biased_hidden_param(hidden_param);
+  biased_hidden_param.mutable_inner_product_param()->set_bias_term(true);
+  biased_hidden_param.mutable_inner_product_param()->
+      mutable_bias_filler()->CopyFrom(bias_filler);
+
+  LayerParameter sum_param;
+  sum_param.set_type("Eltwise");
+  sum_param.mutable_eltwise_param()->set_operation(
+      EltwiseParameter_EltwiseOp_SUM);
+
+  LayerParameter scale_param;
+  scale_param.set_type("Scale");
+  scale_param.mutable_scale_param()->set_axis(0);
+
+  LayerParameter slice_param;
+  slice_param.set_type("Slice");
+  slice_param.mutable_slice_param()->set_axis(0);
+
+  LayerParameter split_param;
+  split_param.set_type("Split");
+
+  vector<BlobShape> input_shapes;
+  RecurrentInputShapes(&input_shapes);
+  CHECK_EQ(2, input_shapes.size());
+
+  LayerParameter* input_layer_param = net_param->add_layer();
+  input_layer_param->set_type("Input");
+  InputParameter* input_param = input_layer_param->mutable_input_param();
+
+  input_layer_param->add_top("c_0");
+  input_param->add_shape()->CopyFrom(input_shapes[0]);
+
+  input_layer_param->add_top("h_0");
+  input_param->add_shape()->CopyFrom(input_shapes[1]);
+
+  LayerParameter* cont_slice_param = net_param->add_layer();
+  cont_slice_param->CopyFrom(slice_param);
+  cont_slice_param->set_name("cont_slice");
+  cont_slice_param->add_bottom("cont");
+  cont_slice_param->mutable_slice_param()->set_axis(0);
+
+  // Add layer to transform all timesteps of x to the hidden state dimension.
+  //     W_xc_x = W_xc * x + b_c
+  {
+    LayerParameter* x_transform_param = net_param->add_layer();
+    x_transform_param->CopyFrom(biased_hidden_param);
+    x_transform_param->set_name("x_transform");
+    x_transform_param->add_param()->set_name("W_xc");
+    x_transform_param->add_param()->set_name("b_c");
+    x_transform_param->add_bottom("x");
+    x_transform_param->add_top("W_xc_x");
+    x_transform_param->add_propagate_down(true);
+  }
+
+  if (this->static_input_) {
+    // Add layer to transform x_static to the gate dimension.
+    //     W_xc_x_static = W_xc_static * x_static
+    LayerParameter* x_static_transform_param = net_param->add_layer();
+    x_static_transform_param->CopyFrom(hidden_param);
+    x_static_transform_param->mutable_inner_product_param()->set_axis(1);
+    x_static_transform_param->set_name("W_xc_x_static");
+    x_static_transform_param->add_param()->set_name("W_xc_static");
+    x_static_transform_param->add_bottom("x_static");
+    x_static_transform_param->add_top("W_xc_x_static_preshape");
+    x_static_transform_param->add_propagate_down(true);
+
+    LayerParameter* reshape_param = net_param->add_layer();
+    reshape_param->set_type("Reshape");
+    BlobShape* new_shape =
+         reshape_param->mutable_reshape_param()->mutable_shape();
+    new_shape->add_dim(1);  // One timestep.
+    // Should infer this->N as the dimension so we can reshape on batch size.
+    new_shape->add_dim(-1);
+    new_shape->add_dim(
+        x_static_transform_param->inner_product_param().num_output());
+    reshape_param->set_name("W_xc_x_static_reshape");
+    reshape_param->add_bottom("W_xc_x_static_preshape");
+    reshape_param->add_top("W_xc_x_static");
+  }
+
+  LayerParameter* x_slice_param = net_param->add_layer();
+  x_slice_param->CopyFrom(slice_param);
+  x_slice_param->add_bottom("W_xc_x");
+  x_slice_param->set_name("W_xc_x_slice");
+
+  LayerParameter output_concat_layer;
+  output_concat_layer.set_name("h_concat");
+  output_concat_layer.set_type("Concat");
+  output_concat_layer.add_top("h");
+  output_concat_layer.mutable_concat_param()->set_axis(0);
+
+  for (int t = 1; t <= this->T_; ++t) {
+    string tm1s = format_int(t - 1);
+    string ts = format_int(t);
+
+    cont_slice_param->add_top("cont_" + ts);
+    x_slice_param->add_top("W_xc_x_" + ts);
+
+    // Add layers to flush the hidden state when beginning a new
+    // sequence, as indicated by cont_t.
+    //     h_conted_{t-1} := cont_t * h_{t-1}
+    //
+    // Normally, cont_t is binary (i.e., 0 or 1), so:
+    //     h_conted_{t-1} := h_{t-1} if cont_t == 1
+    //                       0   otherwise
+    {
+      LayerParameter* cont_h_param = net_param->add_layer();
+      cont_h_param->CopyFrom(scale_param);
+      cont_h_param->set_name("h_conted_" + tm1s);
+      cont_h_param->add_bottom("h_" + tm1s);
+      cont_h_param->add_bottom("cont_" + ts);
+      cont_h_param->add_top("h_conted_" + tm1s);
+    }
+
+    // Add layer to compute
+    //     W_hc_h_{t-1} := W_hc * h_conted_{t-1}
+    {
+      LayerParameter* w_param = net_param->add_layer();
+      w_param->CopyFrom(hidden_param);
+      w_param->set_name("transform_" + ts);
+      w_param->add_param()->set_name("W_hc");
+      w_param->add_bottom("h_conted_" + tm1s);
+      w_param->add_top("W_hc_h_" + tm1s);
+      w_param->mutable_inner_product_param()->set_axis(2);
+    }
+
+    // Add the outputs of the linear transformations to compute the gate input.
+    //     gate_input_t := W_hc * h_conted_{t-1} + W_xc * x_t + b_c
+    //                   = W_hc_h_{t-1} + W_xc_x_t + b_c
+    {
+      LayerParameter* input_sum_layer = net_param->add_layer();
+      input_sum_layer->CopyFrom(sum_param);
+      input_sum_layer->set_name("gate_input_" + ts);
+      input_sum_layer->add_bottom("W_hc_h_" + tm1s);
+      input_sum_layer->add_bottom("W_xc_x_" + ts);
+      if (this->static_input_) {
+        input_sum_layer->add_bottom("W_xc_x_static");
+      }
+      input_sum_layer->add_top("gate_input_" + ts);
+    }
+
+    // Add LSTMUnit layer to compute the cell & hidden vectors c_t and h_t.
+    // Inputs: c_{t-1}, gate_input_t = (i_t, f_t, o_t, g_t), cont_t
+    // Outputs: c_t, h_t
+    //     [ i_t' ]
+    //     [ f_t' ] := gate_input_t
+    //     [ o_t' ]
+    //     [ g_t' ]
+    //         i_t := \sigmoid[i_t']
+    //         f_t := \sigmoid[f_t']
+    //         o_t := \sigmoid[o_t']
+    //         g_t := \tanh[g_t']
+    //         c_t := cont_t * (f_t .* c_{t-1}) + (i_t .* g_t)
+    //         h_t := o_t .* \tanh[c_t]
+    {
+      LayerParameter* lstm_unit_param = net_param->add_layer();
+      lstm_unit_param->set_type("LSTMUnit");
+      lstm_unit_param->add_bottom("c_" + tm1s);
+      lstm_unit_param->add_bottom("gate_input_" + ts);
+      lstm_unit_param->add_bottom("cont_" + ts);
+      lstm_unit_param->add_top("c_" + ts);
+      lstm_unit_param->add_top("h_" + ts);
+      lstm_unit_param->set_name("unit_" + ts);
+    }
+    output_concat_layer.add_bottom("h_" + ts);
+  }  // for (int t = 1; t <= this->T_; ++t)
+
+  {
+    LayerParameter* c_T_copy_param = net_param->add_layer();
+    c_T_copy_param->CopyFrom(split_param);
+    c_T_copy_param->add_bottom("c_" + format_int(this->T_));
+    c_T_copy_param->add_top("c_T");
+  }
+  net_param->add_layer()->CopyFrom(output_concat_layer);
+}
+
+INSTANTIATE_CLASS(LSTMLayer);
+REGISTER_LAYER_CLASS(LSTM);
+
+}  // namespace caffe
diff --git a/src/caffe/layers/lstm_unit_layer.cpp b/src/caffe/layers/lstm_unit_layer.cpp
new file mode 100644
index 00000000000..277c031ad15
--- /dev/null
+++ b/src/caffe/layers/lstm_unit_layer.cpp
@@ -0,0 +1,131 @@
+#include <algorithm>
+#include <cmath>
+#include <vector>
+
+#include "caffe/layer.hpp"
+#include "caffe/layers/lstm_layer.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+inline Dtype sigmoid(Dtype x) {
+  return 1. / (1. + exp(-x));
+}
+
+template <typename Dtype>
+inline Dtype tanh(Dtype x) {
+  return 2. * sigmoid(2. * x) - 1.;
+}
+
+template <typename Dtype>
+void LSTMUnitLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
+    const vector<Blob<Dtype>*>& top) {
+  const int num_instances = bottom[0]->shape(1);
+  for (int i = 0; i < bottom.size(); ++i) {
+    if (i == 2) {
+      CHECK_EQ(2, bottom[i]->num_axes());
+    } else {
+      CHECK_EQ(3, bottom[i]->num_axes());
+    }
+    CHECK_EQ(1, bottom[i]->shape(0));
+    CHECK_EQ(num_instances, bottom[i]->shape(1));
+  }
+  hidden_dim_ = bottom[0]->shape(2);
+  CHECK_EQ(num_instances, bottom[1]->shape(1));
+  CHECK_EQ(4 * hidden_dim_, bottom[1]->shape(2));
+  top[0]->ReshapeLike(*bottom[0]);
+  top[1]->ReshapeLike(*bottom[0]);
+  X_acts_.ReshapeLike(*bottom[1]);
+}
+
+template <typename Dtype>
+void LSTMUnitLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
+    const vector<Blob<Dtype>*>& top) {
+  const int num = bottom[0]->shape(1);
+  const int x_dim = hidden_dim_ * 4;
+  const Dtype* C_prev = bottom[0]->cpu_data();
+  const Dtype* X = bottom[1]->cpu_data();
+  const Dtype* cont = bottom[2]->cpu_data();
+  Dtype* C = top[0]->mutable_cpu_data();
+  Dtype* H = top[1]->mutable_cpu_data();
+  for (int n = 0; n < num; ++n) {
+    for (int d = 0; d < hidden_dim_; ++d) {
+      const Dtype i = sigmoid(X[d]);
+      const Dtype f = (*cont == 0) ? 0 :
+          (*cont * sigmoid(X[1 * hidden_dim_ + d]));
+      const Dtype o = sigmoid(X[2 * hidden_dim_ + d]);
+      const Dtype g = tanh(X[3 * hidden_dim_ + d]);
+      const Dtype c_prev = C_prev[d];
+      const Dtype c = f * c_prev + i * g;
+      C[d] = c;
+      const Dtype tanh_c = tanh(c);
+      H[d] = o * tanh_c;
+    }
+    C_prev += hidden_dim_;
+    X += x_dim;
+    C += hidden_dim_;
+    H += hidden_dim_;
+    ++cont;
+  }
+}
+
+template <typename Dtype>
+void LSTMUnitLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  CHECK(!propagate_down[2]) << "Cannot backpropagate to sequence indicators.";
+  if (!propagate_down[0] && !propagate_down[1]) { return; }
+
+  const int num = bottom[0]->shape(1);
+  const int x_dim = hidden_dim_ * 4;
+  const Dtype* C_prev = bottom[0]->cpu_data();
+  const Dtype* X = bottom[1]->cpu_data();
+  const Dtype* cont = bottom[2]->cpu_data();
+  const Dtype* C = top[0]->cpu_data();
+  const Dtype* H = top[1]->cpu_data();
+  const Dtype* C_diff = top[0]->cpu_diff();
+  const Dtype* H_diff = top[1]->cpu_diff();
+  Dtype* C_prev_diff = bottom[0]->mutable_cpu_diff();
+  Dtype* X_diff = bottom[1]->mutable_cpu_diff();
+  for (int n = 0; n < num; ++n) {
+    for (int d = 0; d < hidden_dim_; ++d) {
+      const Dtype i = sigmoid(X[d]);
+      const Dtype f = (*cont == 0) ? 0 :
+          (*cont * sigmoid(X[1 * hidden_dim_ + d]));
+      const Dtype o = sigmoid(X[2 * hidden_dim_ + d]);
+      const Dtype g = tanh(X[3 * hidden_dim_ + d]);
+      const Dtype c_prev = C_prev[d];
+      const Dtype c = C[d];
+      const Dtype tanh_c = tanh(c);
+      Dtype* c_prev_diff = C_prev_diff + d;
+      Dtype* i_diff = X_diff + d;
+      Dtype* f_diff = X_diff + 1 * hidden_dim_ + d;
+      Dtype* o_diff = X_diff + 2 * hidden_dim_ + d;
+      Dtype* g_diff = X_diff + 3 * hidden_dim_ + d;
+      const Dtype c_term_diff =
+          C_diff[d] + H_diff[d] * o * (1 - tanh_c * tanh_c);
+      *c_prev_diff = c_term_diff * f;
+      *i_diff = c_term_diff * g * i * (1 - i);
+      *f_diff = c_term_diff * c_prev * f * (1 - f);
+      *o_diff = H_diff[d] * tanh_c * o * (1 - o);
+      *g_diff = c_term_diff * i * (1 - g * g);
+    }
+    C_prev += hidden_dim_;
+    X += x_dim;
+    C += hidden_dim_;
+    H += hidden_dim_;
+    C_diff += hidden_dim_;
+    H_diff += hidden_dim_;
+    X_diff += x_dim;
+    C_prev_diff += hidden_dim_;
+    ++cont;
+  }
+}
+
+#ifdef CPU_ONLY
+STUB_GPU(LSTMUnitLayer);
+#endif
+
+INSTANTIATE_CLASS(LSTMUnitLayer);
+REGISTER_LAYER_CLASS(LSTMUnit);
+
+}  // namespace caffe
diff --git a/src/caffe/layers/lstm_unit_layer.cu b/src/caffe/layers/lstm_unit_layer.cu
new file mode 100644
index 00000000000..15bb451d9e0
--- /dev/null
+++ b/src/caffe/layers/lstm_unit_layer.cu
@@ -0,0 +1,154 @@
+#include <algorithm>
+#include <cmath>
+#include <vector>
+
+#include "caffe/layer.hpp"
+#include "caffe/layers/lstm_layer.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+__device__ Dtype sigmoid(const Dtype x) {
+  return Dtype(1) / (Dtype(1) + exp(-x));
+}
+
+template <typename Dtype>
+__device__ Dtype tanh(const Dtype x) {
+  return Dtype(2) * sigmoid(Dtype(2) * x) - Dtype(1);
+}
+
+template <typename Dtype>
+__global__ void LSTMActsForward(const int nthreads, const int dim,
+                                const Dtype* X, Dtype* X_acts) {
+  CUDA_KERNEL_LOOP(index, nthreads) {
+    const int x_dim = 4 * dim;
+    const int d = index % x_dim;
+    if (d < 3 * dim) {
+      X_acts[index] = sigmoid(X[index]);
+    } else {
+      X_acts[index] = tanh(X[index]);
+    }
+  }
+}
+
+template <typename Dtype>
+__global__ void LSTMUnitForward(const int nthreads, const int dim,
+    const Dtype* C_prev, const Dtype* X, const Dtype* cont,
+    Dtype* C, Dtype* H) {
+  CUDA_KERNEL_LOOP(index, nthreads) {
+    const int n = index / dim;
+    const int d = index % dim;
+    const Dtype* X_offset = X + 4 * dim * n;
+    const Dtype i = X_offset[d];
+    const Dtype f = X_offset[1 * dim + d];
+    const Dtype o = X_offset[2 * dim + d];
+    const Dtype g = X_offset[3 * dim + d];
+    const Dtype c_prev = C_prev[index];
+    const Dtype c = cont[n] * f * c_prev + i * g;
+    C[index] = c;
+    const Dtype tanh_c = tanh(c);
+    H[index] = o * tanh_c;
+  }
+}
+
+template <typename Dtype>
+void LSTMUnitLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+    const vector<Blob<Dtype>*>& top) {
+  const int count = top[1]->count();
+  const Dtype* C_prev = bottom[0]->gpu_data();
+  const Dtype* X = bottom[1]->gpu_data();
+  const Dtype* cont = bottom[2]->gpu_data();
+  Dtype* X_acts = X_acts_.mutable_gpu_data();
+  Dtype* C = top[0]->mutable_gpu_data();
+  Dtype* H = top[1]->mutable_gpu_data();
+  const int X_count = bottom[1]->count();
+  // NOLINT_NEXT_LINE(whitespace/operators)
+  LSTMActsForward<Dtype><<<CAFFE_GET_BLOCKS(X_count), CAFFE_CUDA_NUM_THREADS>>>(
+      X_count, hidden_dim_, X, X_acts);
+  CUDA_POST_KERNEL_CHECK;
+  // NOLINT_NEXT_LINE(whitespace/operators)
+  LSTMUnitForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+      count, hidden_dim_, C_prev, X_acts, cont, C, H);
+  CUDA_POST_KERNEL_CHECK;
+}
+
+template <typename Dtype>
+__global__ void LSTMUnitBackward(const int nthreads, const int dim,
+    const Dtype* C_prev, const Dtype* X, const Dtype* C, const Dtype* H,
+    const Dtype* cont, const Dtype* C_diff, const Dtype* H_diff,
+    Dtype* C_prev_diff, Dtype* X_diff) {
+  CUDA_KERNEL_LOOP(index, nthreads) {
+    const int n = index / dim;
+    const int d = index % dim;
+    const Dtype* X_offset = X + 4 * dim * n;
+    const Dtype i = X_offset[d];
+    const Dtype f = X_offset[1 * dim + d];
+    const Dtype o = X_offset[2 * dim + d];
+    const Dtype g = X_offset[3 * dim + d];
+    const Dtype c_prev = C_prev[index];
+    const Dtype c = C[index];
+    const Dtype tanh_c = tanh(c);
+    Dtype* c_prev_diff = C_prev_diff + index;
+    Dtype* X_diff_offset = X_diff + 4 * dim * n;
+    Dtype* i_diff = X_diff_offset + d;
+    Dtype* f_diff = X_diff_offset + 1 * dim + d;
+    Dtype* o_diff = X_diff_offset + 2 * dim + d;
+    Dtype* g_diff = X_diff_offset + 3 * dim + d;
+    const Dtype c_term_diff =
+        C_diff[index] + H_diff[index] * o * (1 - tanh_c * tanh_c);
+    const Dtype cont_n = cont[n];
+    *c_prev_diff = cont_n * c_term_diff * f;
+    *i_diff = c_term_diff * g;
+    *f_diff = cont_n * c_term_diff * c_prev;
+    *o_diff = H_diff[index] * tanh_c;
+    *g_diff = c_term_diff * i;
+  }
+}
+
+template <typename Dtype>
+__global__ void LSTMActsBackward(const int nthreads, const int dim,
+    const Dtype* X_acts, const Dtype* X_acts_diff, Dtype* X_diff) {
+  CUDA_KERNEL_LOOP(index, nthreads) {
+    const int x_dim = 4 * dim;
+    const int d = index % x_dim;
+    const Dtype X_act = X_acts[index];
+    if (d < 3 * dim) {
+      X_diff[index] = X_acts_diff[index] * X_act * (Dtype(1) - X_act);
+    } else {
+      X_diff[index] = X_acts_diff[index] * (Dtype(1) - X_act * X_act);
+    }
+  }
+}
+
+template <typename Dtype>
+void LSTMUnitLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down,
+    const vector<Blob<Dtype>*>& bottom) {
+  CHECK(!propagate_down[2]) << "Cannot backpropagate to sequence indicators.";
+  if (!propagate_down[0] && !propagate_down[1]) { return; }
+
+  const int count = top[1]->count();
+  const Dtype* C_prev = bottom[0]->gpu_data();
+  const Dtype* X_acts = X_acts_.gpu_data();
+  const Dtype* cont = bottom[2]->gpu_data();
+  const Dtype* C = top[0]->gpu_data();
+  const Dtype* H = top[1]->gpu_data();
+  const Dtype* C_diff = top[0]->gpu_diff();
+  const Dtype* H_diff = top[1]->gpu_diff();
+  Dtype* C_prev_diff = bottom[0]->mutable_gpu_diff();
+  Dtype* X_acts_diff = X_acts_.mutable_gpu_diff();
+  LSTMUnitBackward<Dtype>  // NOLINT_NEXT_LINE(whitespace/operators)
+      <<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(count, hidden_dim_,
+      C_prev, X_acts, C, H, cont, C_diff, H_diff, C_prev_diff, X_acts_diff);
+  CUDA_POST_KERNEL_CHECK;
+  const int X_count = bottom[1]->count();
+  Dtype* X_diff = bottom[1]->mutable_gpu_diff();
+  LSTMActsBackward<Dtype>  // NOLINT_NEXT_LINE(whitespace/operators)
+      <<<CAFFE_GET_BLOCKS(X_count), CAFFE_CUDA_NUM_THREADS>>>(
+      X_count, hidden_dim_, X_acts, X_acts_diff, X_diff);
+  CUDA_POST_KERNEL_CHECK;
+}
+
+INSTANTIATE_LAYER_GPU_FUNCS(LSTMUnitLayer);
+
+}  // namespace caffe
diff --git a/src/caffe/test/test_lstm_layer.cpp b/src/caffe/test/test_lstm_layer.cpp
new file mode 100644
index 00000000000..51905baafac
--- /dev/null
+++ b/src/caffe/test/test_lstm_layer.cpp
@@ -0,0 +1,288 @@
+#include <cstring>
+#include <vector>
+
+#include "gtest/gtest.h"
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/layers/lstm_layer.hpp"
+
+#include "caffe/test/test_caffe_main.hpp"
+#include "caffe/test/test_gradient_check_util.hpp"
+
+namespace caffe {
+
+template <typename TypeParam>
+class LSTMLayerTest : public MultiDeviceTest<TypeParam> {
+  typedef typename TypeParam::Dtype Dtype;
+
+ protected:
+  LSTMLayerTest() : num_output_(7) {
+    blob_bottom_vec_.push_back(&blob_bottom_);
+    blob_bottom_vec_.push_back(&blob_bottom_cont_);
+    blob_top_vec_.push_back(&blob_top_);
+    unit_blob_bottom_vec_.push_back(&unit_blob_bottom_c_prev_);
+    unit_blob_bottom_vec_.push_back(&unit_blob_bottom_x_);
+    unit_blob_bottom_vec_.push_back(&unit_blob_bottom_cont_);
+    unit_blob_top_vec_.push_back(&unit_blob_top_c_);
+    unit_blob_top_vec_.push_back(&unit_blob_top_h_);
+
+    ReshapeBlobs(1, 3);
+
+    layer_param_.mutable_recurrent_param()->set_num_output(num_output_);
+    FillerParameter* weight_filler =
+        layer_param_.mutable_recurrent_param()->mutable_weight_filler();
+    weight_filler->set_type("gaussian");
+    weight_filler->set_std(0.2);
+    FillerParameter* bias_filler =
+        layer_param_.mutable_recurrent_param()->mutable_bias_filler();
+    bias_filler->set_type("gaussian");
+    bias_filler->set_std(0.1);
+
+    layer_param_.set_phase(TEST);
+  }
+
+  void ReshapeBlobs(int num_timesteps, int num_instances) {
+    blob_bottom_.Reshape(num_timesteps, num_instances, 3, 2);
+    blob_bottom_static_.Reshape(num_instances, 2, 3, 4);
+    vector<int> shape(2);
+    shape[0] = num_timesteps;
+    shape[1] = num_instances;
+    blob_bottom_cont_.Reshape(shape);
+    shape.push_back(num_output_);
+
+    shape[0] = 1; shape[1] = num_instances; shape[2] = 4 * num_output_;
+    unit_blob_bottom_x_.Reshape(shape);
+    shape[0] = 1; shape[1] = num_instances; shape[2] = num_output_;
+    unit_blob_bottom_c_prev_.Reshape(shape);
+    shape.resize(2);
+    shape[0] = 1; shape[1] = num_instances;
+    unit_blob_bottom_cont_.Reshape(shape);
+
+    FillerParameter filler_param;
+    filler_param.set_min(-1);
+    filler_param.set_max(1);
+    UniformFiller<Dtype> filler(filler_param);
+    filler.Fill(&blob_bottom_);
+    filler.Fill(&unit_blob_bottom_c_prev_);
+    filler.Fill(&unit_blob_bottom_x_);
+  }
+
+  int num_output_;
+  LayerParameter layer_param_;
+  Blob<Dtype> blob_bottom_;
+  Blob<Dtype> blob_bottom_cont_;
+  Blob<Dtype> blob_bottom_static_;
+  Blob<Dtype> blob_top_;
+  vector<Blob<Dtype>*> blob_bottom_vec_;
+  vector<Blob<Dtype>*> blob_top_vec_;
+
+  Blob<Dtype> unit_blob_bottom_cont_;
+  Blob<Dtype> unit_blob_bottom_c_prev_;
+  Blob<Dtype> unit_blob_bottom_x_;
+  Blob<Dtype> unit_blob_top_c_;
+  Blob<Dtype> unit_blob_top_h_;
+  vector<Blob<Dtype>*> unit_blob_bottom_vec_;
+  vector<Blob<Dtype>*> unit_blob_top_vec_;
+};
+
+TYPED_TEST_CASE(LSTMLayerTest, TestDtypesAndDevices);
+
+TYPED_TEST(LSTMLayerTest, TestSetUp) {
+  typedef typename TypeParam::Dtype Dtype;
+  LSTMLayer<Dtype> layer(this->layer_param_);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  vector<int> expected_top_shape = this->blob_bottom_.shape();
+  expected_top_shape.resize(3);
+  expected_top_shape[2] = this->num_output_;
+  EXPECT_TRUE(this->blob_top_.shape() == expected_top_shape);
+}
+
+TYPED_TEST(LSTMLayerTest, TestForward) {
+  typedef typename TypeParam::Dtype Dtype;
+  const int kNumTimesteps = 3;
+  const int num = this->blob_bottom_.shape(1);
+  this->ReshapeBlobs(kNumTimesteps, num);
+
+  // Fill the cont blob with <0, 1, 1, ..., 1>,
+  // indicating a sequence that begins at the first timestep
+  // then continues for the rest of the sequence.
+  for (int t = 0; t < kNumTimesteps; ++t) {
+    for (int n = 0; n < num; ++n) {
+      this->blob_bottom_cont_.mutable_cpu_data()[t * num + n] = t > 0;
+    }
+  }
+
+  // Process the full sequence in a single batch.
+  FillerParameter filler_param;
+  filler_param.set_mean(0);
+  filler_param.set_std(1);
+  GaussianFiller<Dtype> sequence_filler(filler_param);
+  Caffe::set_random_seed(1);
+  sequence_filler.Fill(&this->blob_bottom_);
+  shared_ptr<LSTMLayer<Dtype> > layer(new LSTMLayer<Dtype>(this->layer_param_));
+  Caffe::set_random_seed(1701);
+  layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  LOG(INFO) << "Calling forward for full sequence LSTM";
+  layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+
+  // Copy the inputs and outputs to reuse/check them later.
+  Blob<Dtype> bottom_copy(this->blob_bottom_.shape());
+  bottom_copy.CopyFrom(this->blob_bottom_);
+  Blob<Dtype> top_copy(this->blob_top_.shape());
+  top_copy.CopyFrom(this->blob_top_);
+
+  // Process the batch one timestep at a time;
+  // check that we get the same result.
+  this->ReshapeBlobs(1, num);
+  layer.reset(new LSTMLayer<Dtype>(this->layer_param_));
+  Caffe::set_random_seed(1701);
+  layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  const int bottom_count = this->blob_bottom_.count();
+  const int top_count = this->blob_top_.count();
+  const Dtype kEpsilon = 1e-5;
+  for (int t = 0; t < kNumTimesteps; ++t) {
+    caffe_copy(bottom_count, bottom_copy.cpu_data() + t * bottom_count,
+               this->blob_bottom_.mutable_cpu_data());
+    for (int n = 0; n < num; ++n) {
+      this->blob_bottom_cont_.mutable_cpu_data()[n] = t > 0;
+    }
+    LOG(INFO) << "Calling forward for LSTM timestep " << t;
+    layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+    for (int i = 0; i < top_count; ++i) {
+      ASSERT_LT(t * top_count + i, top_copy.count());
+      EXPECT_NEAR(this->blob_top_.cpu_data()[i],
+                  top_copy.cpu_data()[t * top_count + i], kEpsilon)
+         << "t = " << t << "; i = " << i;
+    }
+  }
+
+  // Process the batch one timestep at a time with all cont blobs set to 0.
+  // Check that we get a different result, except in the first timestep.
+  Caffe::set_random_seed(1701);
+  layer.reset(new LSTMLayer<Dtype>(this->layer_param_));
+  layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  for (int t = 0; t < kNumTimesteps; ++t) {
+    caffe_copy(bottom_count, bottom_copy.cpu_data() + t * bottom_count,
+               this->blob_bottom_.mutable_cpu_data());
+    for (int n = 0; n < num; ++n) {
+      this->blob_bottom_cont_.mutable_cpu_data()[n] = 0;
+    }
+    LOG(INFO) << "Calling forward for LSTM timestep " << t;
+    layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+    for (int i = 0; i < top_count; ++i) {
+      if (t == 0) {
+        EXPECT_NEAR(this->blob_top_.cpu_data()[i],
+                    top_copy.cpu_data()[t * top_count + i], kEpsilon)
+           << "t = " << t << "; i = " << i;
+      } else {
+        EXPECT_NE(this->blob_top_.cpu_data()[i],
+                  top_copy.cpu_data()[t * top_count + i])
+           << "t = " << t << "; i = " << i;
+      }
+    }
+  }
+}
+
+TYPED_TEST(LSTMLayerTest, TestLSTMUnitSetUp) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  LSTMUnitLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->unit_blob_bottom_vec_, this->unit_blob_top_vec_);
+  const int num_axes = this->unit_blob_bottom_c_prev_.num_axes();
+  ASSERT_EQ(num_axes, this->unit_blob_top_c_.num_axes());
+  ASSERT_EQ(num_axes, this->unit_blob_top_h_.num_axes());
+  for (int i = 0; i < num_axes; ++i) {
+    EXPECT_EQ(this->unit_blob_bottom_c_prev_.shape(i),
+              this->unit_blob_top_c_.shape(i));
+    EXPECT_EQ(this->unit_blob_bottom_c_prev_.shape(i),
+              this->unit_blob_top_h_.shape(i));
+  }
+}
+
+TYPED_TEST(LSTMLayerTest, TestLSTMUnitGradient) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  LSTMUnitLayer<Dtype> layer(layer_param);
+  GradientChecker<Dtype> checker(1e-2, 1e-3);
+  Dtype* cont_data = this->blob_bottom_cont_.mutable_cpu_data();
+  cont_data[0] = 0;
+  cont_data[1] = 0;
+  cont_data[2] = 0;
+  checker.CheckGradientExhaustive(&layer, this->unit_blob_bottom_vec_,
+      this->unit_blob_top_vec_, 0);
+  checker.CheckGradientExhaustive(&layer, this->unit_blob_bottom_vec_,
+      this->unit_blob_top_vec_, 1);
+}
+
+TYPED_TEST(LSTMLayerTest, TestLSTMUnitGradientNonZeroCont) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter layer_param;
+  LSTMUnitLayer<Dtype> layer(layer_param);
+  GradientChecker<Dtype> checker(1e-2, 1e-3);
+  Dtype* cont_data = this->blob_bottom_cont_.mutable_cpu_data();
+  cont_data[0] = 1;
+  cont_data[1] = 0;
+  cont_data[2] = 1;
+  checker.CheckGradientExhaustive(&layer, this->unit_blob_bottom_vec_,
+      this->unit_blob_top_vec_, 0);
+  checker.CheckGradientExhaustive(&layer, this->unit_blob_bottom_vec_,
+      this->unit_blob_top_vec_, 1);
+}
+
+TYPED_TEST(LSTMLayerTest, TestGradient) {
+  typedef typename TypeParam::Dtype Dtype;
+  LSTMLayer<Dtype> layer(this->layer_param_);
+  GradientChecker<Dtype> checker(1e-2, 1e-3);
+  checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+      this->blob_top_vec_, 0);
+}
+
+TYPED_TEST(LSTMLayerTest, TestGradientNonZeroCont) {
+  typedef typename TypeParam::Dtype Dtype;
+  LSTMLayer<Dtype> layer(this->layer_param_);
+  GradientChecker<Dtype> checker(1e-2, 1e-3);
+  for (int i = 0; i < this->blob_bottom_cont_.count(); ++i) {
+    this->blob_bottom_cont_.mutable_cpu_data()[i] = i > 2;
+  }
+  checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+      this->blob_top_vec_, 0);
+}
+
+TYPED_TEST(LSTMLayerTest, TestGradientNonZeroContBufferSize2) {
+  typedef typename TypeParam::Dtype Dtype;
+  this->ReshapeBlobs(2, 2);
+  FillerParameter filler_param;
+  UniformFiller<Dtype> filler(filler_param);
+  filler.Fill(&this->blob_bottom_);
+  LSTMLayer<Dtype> layer(this->layer_param_);
+  GradientChecker<Dtype> checker(1e-2, 1e-3);
+  for (int i = 0; i < this->blob_bottom_cont_.count(); ++i) {
+    this->blob_bottom_cont_.mutable_cpu_data()[i] = i > 2;
+  }
+  checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+      this->blob_top_vec_, 0);
+}
+
+TYPED_TEST(LSTMLayerTest, TestGradientNonZeroContBufferSize2WithStaticInput) {
+  typedef typename TypeParam::Dtype Dtype;
+  this->ReshapeBlobs(2, 2);
+  FillerParameter filler_param;
+  UniformFiller<Dtype> filler(filler_param);
+  filler.Fill(&this->blob_bottom_);
+  filler.Fill(&this->blob_bottom_static_);
+  this->blob_bottom_vec_.push_back(&this->blob_bottom_static_);
+  LSTMLayer<Dtype> layer(this->layer_param_);
+  GradientChecker<Dtype> checker(1e-2, 1e-3);
+  for (int i = 0; i < this->blob_bottom_cont_.count(); ++i) {
+    this->blob_bottom_cont_.mutable_cpu_data()[i] = i > 2;
+  }
+  checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+      this->blob_top_vec_, 0);
+  checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
+      this->blob_top_vec_, 2);
+}
+
+
+}  // namespace caffe

From 7e7631f27e4145b9838b17d46bb1ffc42279b1e4 Mon Sep 17 00:00:00 2001
From: Chuck Cho <chuck.cho@dextro.co>
Date: Thu, 2 Jun 2016 14:35:14 -0400
Subject: [PATCH 075/264] Fixing a typo

---
 tools/extract_features.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/extract_features.cpp b/tools/extract_features.cpp
index 704467250a6..51c791e4021 100644
--- a/tools/extract_features.cpp
+++ b/tools/extract_features.cpp
@@ -130,7 +130,7 @@ int feature_extraction_pipeline(int argc, char** argv) {
     txns.push_back(txn);
   }
 
-  LOG(ERROR)<< "Extacting Features";
+  LOG(ERROR)<< "Extracting Features";
 
   Datum datum;
   std::vector<int> image_indices(num_features, 0);

From 742c93f31be4c874aa5fd0103f25f8a2f8d4d63d Mon Sep 17 00:00:00 2001
From: philkr <philkr@users.noreply.github.com>
Date: Mon, 23 May 2016 20:09:45 -0700
Subject: [PATCH 076/264] Exposing load_hdf5 and save_hdf5 to python

---
 python/caffe/_caffe.cpp       | 12 +++++++++++-
 python/caffe/test/test_net.py | 14 ++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
index 32b5d921094..48a0c8f2e95 100644
--- a/python/caffe/_caffe.cpp
+++ b/python/caffe/_caffe.cpp
@@ -114,6 +114,14 @@ void Net_Save(const Net<Dtype>& net, string filename) {
   WriteProtoToBinaryFile(net_param, filename.c_str());
 }
 
+void Net_SaveHDF5(const Net<Dtype>& net, string filename) {
+  net.ToHDF5(filename);
+}
+
+void Net_LoadHDF5(Net<Dtype>* net, string filename) {
+  net->CopyTrainedLayersFromHDF5(filename.c_str());
+}
+
 void Net_SetInputArrays(Net<Dtype>* net, bp::object data_obj,
     bp::object labels_obj) {
   // check that this network has an input MemoryDataLayer
@@ -267,7 +275,9 @@ BOOST_PYTHON_MODULE(_caffe) {
         bp::return_value_policy<bp::copy_const_reference>()))
     .def("_set_input_arrays", &Net_SetInputArrays,
         bp::with_custodian_and_ward<1, 2, bp::with_custodian_and_ward<1, 3> >())
-    .def("save", &Net_Save);
+    .def("save", &Net_Save)
+    .def("save_hdf5", &Net_SaveHDF5)
+    .def("load_hdf5", &Net_LoadHDF5);
   BP_REGISTER_SHARED_PTR_TO_PYTHON(Net<Dtype>);
 
   bp::class_<Blob<Dtype>, shared_ptr<Blob<Dtype> >, boost::noncopyable>(
diff --git a/python/caffe/test/test_net.py b/python/caffe/test/test_net.py
index aad828aa8aa..4cacfcd05bb 100644
--- a/python/caffe/test/test_net.py
+++ b/python/caffe/test/test_net.py
@@ -79,3 +79,17 @@ def test_save_and_read(self):
             for i in range(len(self.net.params[name])):
                 self.assertEqual(abs(self.net.params[name][i].data
                     - net2.params[name][i].data).sum(), 0)
+
+    def test_save_hdf5(self):
+        f = tempfile.NamedTemporaryFile(mode='w+', delete=False)
+        f.close()
+        self.net.save_hdf5(f.name)
+        net_file = simple_net_file(self.num_output)
+        net2 = caffe.Net(net_file, caffe.TRAIN)
+        net2.load_hdf5(f.name)
+        os.remove(net_file)
+        os.remove(f.name)
+        for name in self.net.params:
+            for i in range(len(self.net.params[name])):
+                self.assertEqual(abs(self.net.params[name][i].data
+                    - net2.params[name][i].data).sum(), 0)

From d167e61a23a54de529d51731fbe543ff4cec0d3c Mon Sep 17 00:00:00 2001
From: Luke Yeager <luke.yeager@gmail.com>
Date: Wed, 1 Jun 2016 09:50:57 -0700
Subject: [PATCH 077/264] Add level and stages to Net constructor

This internal functionality will be exposed through the various
interfaces in subsequent commits
Also adds C++ tests for all-in-one nets
---
 include/caffe/net.hpp       |   1 +
 src/caffe/net.cpp           |  11 +++-
 src/caffe/test/test_net.cpp | 128 ++++++++++++++++++++++++++++++++++++
 3 files changed, 139 insertions(+), 1 deletion(-)

diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp
index 0addb3c2a6d..493bdf294e2 100644
--- a/include/caffe/net.hpp
+++ b/include/caffe/net.hpp
@@ -25,6 +25,7 @@ class Net {
  public:
   explicit Net(const NetParameter& param, const Net* root_net = NULL);
   explicit Net(const string& param_file, Phase phase,
+      const int level = 0, const vector<string>* stages = NULL,
       const Net* root_net = NULL);
   virtual ~Net() {}
 
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index f0bf594936c..644cb7e97ee 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -28,11 +28,20 @@ Net<Dtype>::Net(const NetParameter& param, const Net* root_net)
 }
 
 template <typename Dtype>
-Net<Dtype>::Net(const string& param_file, Phase phase, const Net* root_net)
+Net<Dtype>::Net(const string& param_file, Phase phase,
+    const int level, const vector<string>* stages,
+    const Net* root_net)
     : root_net_(root_net) {
   NetParameter param;
   ReadNetParamsFromTextFileOrDie(param_file, &param);
+  // Set phase, stages and level
   param.mutable_state()->set_phase(phase);
+  if (stages != NULL) {
+    for (int i = 0; i < stages->size(); i++) {
+      param.mutable_state()->add_stage((*stages)[i]);
+    }
+  }
+  param.mutable_state()->set_level(level);
   Init(param);
 }
 
diff --git a/src/caffe/test/test_net.cpp b/src/caffe/test/test_net.cpp
index 92fd317fee8..24b957f2acc 100644
--- a/src/caffe/test/test_net.cpp
+++ b/src/caffe/test/test_net.cpp
@@ -9,6 +9,7 @@
 #include "caffe/common.hpp"
 #include "caffe/filler.hpp"
 #include "caffe/net.hpp"
+#include "caffe/util/io.hpp"
 #include "caffe/util/math_functions.hpp"
 
 #include "caffe/test/test_caffe_main.hpp"
@@ -29,6 +30,17 @@ class NetTest : public MultiDeviceTest<TypeParam> {
     net_.reset(new Net<Dtype>(param));
   }
 
+  virtual void InitNetFromProtoFileWithState(const string& proto,
+      Phase phase = caffe::TRAIN, const int level = 0,
+      const vector<string>* stages = NULL) {
+    NetParameter param;
+    CHECK(google::protobuf::TextFormat::ParseFromString(proto, &param));
+    string param_file;
+    MakeTempFilename(&param_file);
+    WriteProtoToTextFile(param, param_file);
+    net_.reset(new Net<Dtype>(param_file, phase, level, stages));
+  }
+
   virtual void CopyNetBlobs(const bool copy_diff,
       vector<shared_ptr<Blob<Dtype> > >* blobs_copy) {
     CHECK(net_);
@@ -771,6 +783,62 @@ class NetTest : public MultiDeviceTest<TypeParam> {
     InitNetFromProtoString(proto);
   }
 
+  virtual void InitAllInOneNet(Phase phase = caffe::TRAIN,
+      const int level = 0, const vector<string>* stages = NULL) {
+    string proto =
+      "name: 'All-in-one Network'"
+      "layer { "
+      "  name: 'train-data' "
+      "  type: 'DummyData' "
+      "  top: 'data' "
+      "  top: 'label' "
+      "  dummy_data_param { "
+      "    shape { dim: 1 dim: 10 } "
+      "    shape { dim: 1 dim: 1 } "
+      "  } "
+      "  include { phase: TRAIN stage: 'train' } "
+      "} "
+      "layer { "
+      "  name: 'val-data' "
+      "  type: 'DummyData' "
+      "  top: 'data' "
+      "  top: 'label' "
+      "  dummy_data_param { "
+      "    shape { dim: 1 dim: 10 } "
+      "    shape { dim: 1 dim: 1 } "
+      "  } "
+      "  include { phase: TEST stage: 'val' } "
+      "} "
+      "layer { "
+      "  name: 'deploy-data' "
+      "  type: 'Input' "
+      "  top: 'data' "
+      "  input_param { "
+      "    shape { dim: 1 dim: 10 } "
+      "  } "
+      "  include { phase: TEST stage: 'deploy' } "
+      "} "
+      "layer { "
+      "  name: 'ip' "
+      "  type: 'InnerProduct' "
+      "  bottom: 'data' "
+      "  top: 'ip' "
+      "  inner_product_param { "
+      "    num_output: 2 "
+      "  } "
+      "} "
+      "layer { "
+      "  name: 'loss' "
+      "  type: 'SoftmaxWithLoss' "
+      "  bottom: 'ip' "
+      "  bottom: 'label' "
+      "  top: 'loss' "
+      "  include { phase: TRAIN stage: 'train' } "
+      "  include { phase: TEST stage: 'val' } "
+      "} ";
+    InitNetFromProtoFileWithState(proto, phase, level, stages);
+  }
+
   int seed_;
   shared_ptr<Net<Dtype> > net_;
 };
@@ -2473,4 +2541,64 @@ TYPED_TEST(NetTest, TestForcePropagateDown) {
   }
 }
 
+TYPED_TEST(NetTest, TestAllInOneNetTrain) {
+  vector<string> stages;
+  stages.push_back("train");
+  this->InitAllInOneNet(caffe::TRAIN, 0, &stages);
+  bool found_data = false;
+  bool found_loss = false;
+  for (int i = 0; i < this->net_->layers().size(); ++i) {
+    const string& layer_name = this->net_->layer_names()[i];
+    if (layer_name == "train-data") {
+      found_data = true;
+    } else if (layer_name == "loss") {
+      found_loss = true;
+    } else {
+      ASSERT_NE(layer_name, "val-data");
+      ASSERT_NE(layer_name, "deploy-data");
+    }
+  }
+  ASSERT_TRUE(found_data);
+  ASSERT_TRUE(found_loss);
+}
+
+TYPED_TEST(NetTest, TestAllInOneNetVal) {
+  vector<string> stages;
+  stages.push_back("val");
+  this->InitAllInOneNet(caffe::TEST, 0, &stages);
+  bool found_data = false;
+  bool found_loss = false;
+  for (int i = 0; i < this->net_->layers().size(); ++i) {
+    const string& layer_name = this->net_->layer_names()[i];
+    if (layer_name == "val-data") {
+      found_data = true;
+    } else if (layer_name == "loss") {
+      found_loss = true;
+    } else {
+      ASSERT_NE(layer_name, "train-data");
+      ASSERT_NE(layer_name, "deploy-data");
+    }
+  }
+  ASSERT_TRUE(found_data);
+  ASSERT_TRUE(found_loss);
+}
+
+TYPED_TEST(NetTest, TestAllInOneNetDeploy) {
+  vector<string> stages;
+  stages.push_back("deploy");
+  this->InitAllInOneNet(caffe::TEST, 0, &stages);
+  bool found_data = false;
+  for (int i = 0; i < this->net_->layers().size(); ++i) {
+    const string& layer_name = this->net_->layer_names()[i];
+    if (layer_name == "deploy-data") {
+      found_data = true;
+    } else {
+      ASSERT_NE(layer_name, "train-data");
+      ASSERT_NE(layer_name, "val-data");
+      ASSERT_NE(layer_name, "loss");
+    }
+  }
+  ASSERT_TRUE(found_data);
+}
+
 }  // namespace caffe

From 66e84d785a72d66511bffe30c0f016af9103deb8 Mon Sep 17 00:00:00 2001
From: Luke Yeager <luke.yeager@gmail.com>
Date: Wed, 1 Jun 2016 09:56:51 -0700
Subject: [PATCH 078/264] Add phase, level and stages to tools/caffe

Adds command-line flags for phase, level and stage

train -- override level and stages for test_state from solver
test -- set level and stages
time -- set phase, level and stages
---
 tools/caffe.cpp | 39 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/tools/caffe.cpp b/tools/caffe.cpp
index 5bb60eb161d..9bf4214ad93 100644
--- a/tools/caffe.cpp
+++ b/tools/caffe.cpp
@@ -34,6 +34,13 @@ DEFINE_string(solver, "",
     "The solver definition protocol buffer text file.");
 DEFINE_string(model, "",
     "The model definition protocol buffer text file.");
+DEFINE_string(phase, "",
+    "Optional; network phase (TRAIN or TEST). Only used for 'time'.");
+DEFINE_int32(level, 0,
+    "Optional; network level.");
+DEFINE_string(stage, "",
+    "Optional; network stages (not to be confused with phase), "
+    "separated by ','.");
 DEFINE_string(snapshot, "",
     "Optional; the snapshot solver state to resume training.");
 DEFINE_string(weights, "",
@@ -101,6 +108,25 @@ static void get_gpus(vector<int>* gpus) {
   }
 }
 
+// Parse phase from flags
+caffe::Phase get_phase_from_flags(caffe::Phase default_value) {
+  if (FLAGS_phase == "")
+    return default_value;
+  if (FLAGS_phase == "TRAIN")
+    return caffe::TRAIN;
+  if (FLAGS_phase == "TEST")
+    return caffe::TEST;
+  LOG(FATAL) << "phase must be \"TRAIN\" or \"TEST\"";
+  return caffe::TRAIN;  // Avoid warning
+}
+
+// Parse stages from flags
+vector<string> get_stages_from_flags() {
+  vector<string> stages;
+  boost::split(stages, FLAGS_stage, boost::is_any_of(","));
+  return stages;
+}
+
 // caffe commands to call by
 //     caffe <command> <args>
 //
@@ -156,10 +182,16 @@ int train() {
   CHECK(!FLAGS_snapshot.size() || !FLAGS_weights.size())
       << "Give a snapshot to resume training or weights to finetune "
       "but not both.";
+  vector<string> stages = get_stages_from_flags();
 
   caffe::SolverParameter solver_param;
   caffe::ReadSolverParamsFromTextFileOrDie(FLAGS_solver, &solver_param);
 
+  solver_param.mutable_train_state()->set_level(FLAGS_level);
+  for (int i = 0; i < stages.size(); i++) {
+    solver_param.mutable_train_state()->add_stage(stages[i]);
+  }
+
   // If the gpus flag is not provided, allow the mode and device to be set
   // in the solver prototxt.
   if (FLAGS_gpu.size() == 0
@@ -229,6 +261,7 @@ RegisterBrewFunction(train);
 int test() {
   CHECK_GT(FLAGS_model.size(), 0) << "Need a model definition to score.";
   CHECK_GT(FLAGS_weights.size(), 0) << "Need model weights to score.";
+  vector<string> stages = get_stages_from_flags();
 
   // Set device id and mode
   vector<int> gpus;
@@ -247,7 +280,7 @@ int test() {
     Caffe::set_mode(Caffe::CPU);
   }
   // Instantiate the caffe net.
-  Net<float> caffe_net(FLAGS_model, caffe::TEST);
+  Net<float> caffe_net(FLAGS_model, caffe::TEST, FLAGS_level, &stages);
   caffe_net.CopyTrainedLayersFrom(FLAGS_weights);
   LOG(INFO) << "Running for " << FLAGS_iterations << " iterations.";
 
@@ -300,6 +333,8 @@ RegisterBrewFunction(test);
 // Time: benchmark the execution time of a model.
 int time() {
   CHECK_GT(FLAGS_model.size(), 0) << "Need a model definition to time.";
+  caffe::Phase phase = get_phase_from_flags(caffe::TRAIN);
+  vector<string> stages = get_stages_from_flags();
 
   // Set device id and mode
   vector<int> gpus;
@@ -313,7 +348,7 @@ int time() {
     Caffe::set_mode(Caffe::CPU);
   }
   // Instantiate the caffe net.
-  Net<float> caffe_net(FLAGS_model, caffe::TRAIN);
+  Net<float> caffe_net(FLAGS_model, phase, FLAGS_level, &stages);
 
   // Do a clean forward and backward pass, so that memory allocation are done
   // and future iterations will be more stable.

From 19adc7a79e3acacc777076143357cc0569781cd3 Mon Sep 17 00:00:00 2001
From: Luke Yeager <luke.yeager@gmail.com>
Date: Wed, 1 Jun 2016 10:02:41 -0700
Subject: [PATCH 079/264] Add level and stages to pycaffe

Uses Boost.Python's pattern matching to differentiate between
constructors
Also adds Python tests for all-in-one nets
---
 python/caffe/_caffe.cpp       |  44 +++++--
 python/caffe/test/test_net.py | 228 +++++++++++++++++++++++++++++++++-
 2 files changed, 263 insertions(+), 9 deletions(-)

diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
index 48a0c8f2e95..e2726286dfb 100644
--- a/python/caffe/_caffe.cpp
+++ b/python/caffe/_caffe.cpp
@@ -86,19 +86,42 @@ void CheckContiguousArray(PyArrayObject* arr, string name,
   }
 }
 
-// Net constructor for passing phase as int
-shared_ptr<Net<Dtype> > Net_Init(
-    string param_file, int phase) {
-  CheckFile(param_file);
+// Net constructor
+shared_ptr<Net<Dtype> > Net_Init(string network_file, int phase,
+    const int level, const bp::object& stages,
+    const bp::object& weights) {
+  CheckFile(network_file);
+
+  // Convert stages from list to vector
+  vector<string> stages_vector;
+  if (!stages.is_none()) {
+    for (int i = 0; i < len(stages); i++) {
+      stages_vector.push_back(bp::extract<string>(stages[i]));
+    }
+  }
+
+  // Initialize net
+  shared_ptr<Net<Dtype> > net(new Net<Dtype>(network_file,
+        static_cast<Phase>(phase), level, &stages_vector));
+
+  // Load weights
+  if (!weights.is_none()) {
+    std::string weights_file_str = bp::extract<std::string>(weights);
+    CheckFile(weights_file_str);
+    net->CopyTrainedLayersFrom(weights_file_str);
+  }
 
-  shared_ptr<Net<Dtype> > net(new Net<Dtype>(param_file,
-      static_cast<Phase>(phase)));
   return net;
 }
 
-// Net construct-and-load convenience constructor
+// Legacy Net construct-and-load convenience constructor
 shared_ptr<Net<Dtype> > Net_Init_Load(
     string param_file, string pretrained_param_file, int phase) {
+  LOG(WARNING) << "DEPRECATION WARNING - deprecated use of Python interface";
+  LOG(WARNING) << "Use this instead (with the named \"weights\""
+    << " parameter):";
+  LOG(WARNING) << "Net('" << param_file << "', " << phase
+    << ", weights='" << pretrained_param_file << "')";
   CheckFile(param_file);
   CheckFile(pretrained_param_file);
 
@@ -245,7 +268,12 @@ BOOST_PYTHON_MODULE(_caffe) {
 
   bp::class_<Net<Dtype>, shared_ptr<Net<Dtype> >, boost::noncopyable >("Net",
     bp::no_init)
-    .def("__init__", bp::make_constructor(&Net_Init))
+    // Constructor
+    .def("__init__", bp::make_constructor(&Net_Init,
+          bp::default_call_policies(), (bp::arg("network_file"), "phase",
+            bp::arg("level")=0, bp::arg("stages")=bp::object(),
+            bp::arg("weights")=bp::object())))
+    // Legacy constructor
     .def("__init__", bp::make_constructor(&Net_Init_Load))
     .def("_forward", &Net<Dtype>::ForwardFromTo)
     .def("_backward", &Net<Dtype>::BackwardFromTo)
diff --git a/python/caffe/test/test_net.py b/python/caffe/test/test_net.py
index 4cacfcd05bb..300aabdeea5 100644
--- a/python/caffe/test/test_net.py
+++ b/python/caffe/test/test_net.py
@@ -72,7 +72,11 @@ def test_save_and_read(self):
         f.close()
         self.net.save(f.name)
         net_file = simple_net_file(self.num_output)
-        net2 = caffe.Net(net_file, f.name, caffe.TRAIN)
+        # Test legacy constructor
+        #   should print deprecation warning
+        caffe.Net(net_file, f.name, caffe.TRAIN)
+        # Test named constructor
+        net2 = caffe.Net(net_file, caffe.TRAIN, weights=f.name)
         os.remove(net_file)
         os.remove(f.name)
         for name in self.net.params:
@@ -93,3 +97,225 @@ def test_save_hdf5(self):
             for i in range(len(self.net.params[name])):
                 self.assertEqual(abs(self.net.params[name][i].data
                     - net2.params[name][i].data).sum(), 0)
+
+class TestLevels(unittest.TestCase):
+
+    TEST_NET = """
+layer {
+  name: "data"
+  type: "DummyData"
+  top: "data"
+  dummy_data_param { shape { dim: 1 dim: 1 dim: 10 dim: 10 } }
+}
+layer {
+  name: "NoLevel"
+  type: "InnerProduct"
+  bottom: "data"
+  top: "NoLevel"
+  inner_product_param { num_output: 1 }
+}
+layer {
+  name: "Level0Only"
+  type: "InnerProduct"
+  bottom: "data"
+  top: "Level0Only"
+  include { min_level: 0 max_level: 0 }
+  inner_product_param { num_output: 1 }
+}
+layer {
+  name: "Level1Only"
+  type: "InnerProduct"
+  bottom: "data"
+  top: "Level1Only"
+  include { min_level: 1 max_level: 1 }
+  inner_product_param { num_output: 1 }
+}
+layer {
+  name: "Level>=0"
+  type: "InnerProduct"
+  bottom: "data"
+  top: "Level>=0"
+  include { min_level: 0 }
+  inner_product_param { num_output: 1 }
+}
+layer {
+  name: "Level>=1"
+  type: "InnerProduct"
+  bottom: "data"
+  top: "Level>=1"
+  include { min_level: 1 }
+  inner_product_param { num_output: 1 }
+}
+"""
+
+    def setUp(self):
+        self.f = tempfile.NamedTemporaryFile(mode='w+')
+        self.f.write(self.TEST_NET)
+        self.f.flush()
+
+    def tearDown(self):
+        self.f.close()
+
+    def check_net(self, net, blobs):
+        net_blobs = [b for b in net.blobs.keys() if 'data' not in b]
+        self.assertEqual(net_blobs, blobs)
+
+    def test_0(self):
+        net = caffe.Net(self.f.name, caffe.TEST)
+        self.check_net(net, ['NoLevel', 'Level0Only', 'Level>=0'])
+
+    def test_1(self):
+        net = caffe.Net(self.f.name, caffe.TEST, level=1)
+        self.check_net(net, ['NoLevel', 'Level1Only', 'Level>=0', 'Level>=1'])
+
+
+class TestStages(unittest.TestCase):
+
+    TEST_NET = """
+layer {
+  name: "data"
+  type: "DummyData"
+  top: "data"
+  dummy_data_param { shape { dim: 1 dim: 1 dim: 10 dim: 10 } }
+}
+layer {
+  name: "A"
+  type: "InnerProduct"
+  bottom: "data"
+  top: "A"
+  include { stage: "A" }
+  inner_product_param { num_output: 1 }
+}
+layer {
+  name: "B"
+  type: "InnerProduct"
+  bottom: "data"
+  top: "B"
+  include { stage: "B" }
+  inner_product_param { num_output: 1 }
+}
+layer {
+  name: "AorB"
+  type: "InnerProduct"
+  bottom: "data"
+  top: "AorB"
+  include { stage: "A" }
+  include { stage: "B" }
+  inner_product_param { num_output: 1 }
+}
+layer {
+  name: "AandB"
+  type: "InnerProduct"
+  bottom: "data"
+  top: "AandB"
+  include { stage: "A" stage: "B" }
+  inner_product_param { num_output: 1 }
+}
+"""
+
+    def setUp(self):
+        self.f = tempfile.NamedTemporaryFile(mode='w+')
+        self.f.write(self.TEST_NET)
+        self.f.flush()
+
+    def tearDown(self):
+        self.f.close()
+
+    def check_net(self, net, blobs):
+        net_blobs = [b for b in net.blobs.keys() if 'data' not in b]
+        self.assertEqual(net_blobs, blobs)
+
+    def test_A(self):
+        net = caffe.Net(self.f.name, caffe.TEST, stages=['A'])
+        self.check_net(net, ['A', 'AorB'])
+
+    def test_B(self):
+        net = caffe.Net(self.f.name, caffe.TEST, stages=['B'])
+        self.check_net(net, ['B', 'AorB'])
+
+    def test_AandB(self):
+        net = caffe.Net(self.f.name, caffe.TEST, stages=['A', 'B'])
+        self.check_net(net, ['A', 'B', 'AorB', 'AandB'])
+
+
+class TestAllInOne(unittest.TestCase):
+
+    TEST_NET = """
+layer {
+  name: "train_data"
+  type: "DummyData"
+  top: "data"
+  top: "label"
+  dummy_data_param {
+    shape { dim: 1 dim: 1 dim: 10 dim: 10 }
+    shape { dim: 1 dim: 1 dim: 1 dim: 1 }
+  }
+  include { phase: TRAIN stage: "train" }
+}
+layer {
+  name: "val_data"
+  type: "DummyData"
+  top: "data"
+  top: "label"
+  dummy_data_param {
+    shape { dim: 1 dim: 1 dim: 10 dim: 10 }
+    shape { dim: 1 dim: 1 dim: 1 dim: 1 }
+  }
+  include { phase: TEST stage: "val" }
+}
+layer {
+  name: "deploy_data"
+  type: "Input"
+  top: "data"
+  input_param { shape { dim: 1 dim: 1 dim: 10 dim: 10 } }
+  include { phase: TEST stage: "deploy" }
+}
+layer {
+  name: "ip"
+  type: "InnerProduct"
+  bottom: "data"
+  top: "ip"
+  inner_product_param { num_output: 2 }
+}
+layer {
+  name: "loss"
+  type: "SoftmaxWithLoss"
+  bottom: "ip"
+  bottom: "label"
+  top: "loss"
+  include: { phase: TRAIN stage: "train" }
+  include: { phase: TEST stage: "val" }
+}
+layer {
+  name: "pred"
+  type: "Softmax"
+  bottom: "ip"
+  top: "pred"
+  include: { phase: TEST stage: "deploy" }
+}
+"""
+
+    def setUp(self):
+        self.f = tempfile.NamedTemporaryFile(mode='w+')
+        self.f.write(self.TEST_NET)
+        self.f.flush()
+
+    def tearDown(self):
+        self.f.close()
+
+    def check_net(self, net, outputs):
+        self.assertEqual(list(net.blobs['data'].shape), [1,1,10,10])
+        self.assertEqual(net.outputs, outputs)
+
+    def test_train(self):
+        net = caffe.Net(self.f.name, caffe.TRAIN, stages=['train'])
+        self.check_net(net, ['loss'])
+
+    def test_val(self):
+        net = caffe.Net(self.f.name, caffe.TEST, stages=['val'])
+        self.check_net(net, ['loss'])
+
+    def test_deploy(self):
+        net = caffe.Net(self.f.name, caffe.TEST, stages=['deploy'])
+        self.check_net(net, ['pred'])
+

From dec2381cc8d6465f0997cd29b143b3c6e13416ef Mon Sep 17 00:00:00 2001
From: philkr <philkr@users.noreply.github.com>
Date: Thu, 3 Sep 2015 14:28:55 -0700
Subject: [PATCH 080/264] Exposing solver callbacks to python

---
 python/caffe/_caffe.cpp | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
index 48a0c8f2e95..334088e8a57 100644
--- a/python/caffe/_caffe.cpp
+++ b/python/caffe/_caffe.cpp
@@ -228,6 +228,27 @@ bp::object BlobVec_add_blob(bp::tuple args, bp::dict kwargs) {
   return bp::object();
 }
 
+template<typename Dtype>
+class PythonCallback: public Solver<Dtype>::Callback {
+ protected:
+  bp::object on_start_, on_gradients_ready_;
+
+ public:
+  PythonCallback(bp::object on_start, bp::object on_gradients_ready)
+    : on_start_(on_start), on_gradients_ready_(on_gradients_ready) { }
+  virtual void on_gradients_ready() {
+    on_gradients_ready_();
+  }
+  virtual void on_start() {
+    on_start_();
+  }
+};
+template<typename Dtype>
+void Solver_add_callback(Solver<Dtype> * solver, bp::object on_start,
+  bp::object on_gradients_ready) {
+  solver->add_callback(new PythonCallback<Dtype>(on_start, on_gradients_ready));
+}
+
 BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(SolveOverloads, Solve, 0, 1);
 
 BOOST_PYTHON_MODULE(_caffe) {
@@ -317,6 +338,7 @@ BOOST_PYTHON_MODULE(_caffe) {
     .add_property("test_nets", bp::make_function(&Solver<Dtype>::test_nets,
           bp::return_internal_reference<>()))
     .add_property("iter", &Solver<Dtype>::iter)
+    .def("add_callback", &Solver_add_callback<Dtype>)
     .def("solve", static_cast<void (Solver<Dtype>::*)(const char*)>(
           &Solver<Dtype>::Solve), SolveOverloads())
     .def("step", &Solver<Dtype>::Step)

From 9f1855273fa27d106b3675d32ec01acb658a80f0 Mon Sep 17 00:00:00 2001
From: Raffi Enficiaud <raffi.enficiaud@tuebingen.mpg.de>
Date: Tue, 21 Jun 2016 13:41:06 +0200
Subject: [PATCH 081/264] Fix glog upstream autoconf

---
 cmake/External/glog.cmake | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cmake/External/glog.cmake b/cmake/External/glog.cmake
index a44672f2753..f9d0549cd90 100644
--- a/cmake/External/glog.cmake
+++ b/cmake/External/glog.cmake
@@ -37,6 +37,7 @@ if (NOT __GLOG_INCLUDED)
       GIT_TAG "v0.3.4"
       UPDATE_COMMAND ""
       INSTALL_DIR ${gflags_INSTALL}
+      PATCH_COMMAND autoreconf -i ${glog_PREFIX}/src/glog
       CONFIGURE_COMMAND env "CFLAGS=${GLOG_C_FLAGS}" "CXXFLAGS=${GLOG_CXX_FLAGS}" ${glog_PREFIX}/src/glog/configure --prefix=${glog_INSTALL} --enable-shared=no --enable-static=yes --with-gflags=${GFLAGS_LIBRARY_DIRS}/..
       LOG_DOWNLOAD 1
       LOG_CONFIGURE 1

From b29d271b8cd679588618d502add8a4eae2beb853 Mon Sep 17 00:00:00 2001
From: Valentin Tolmer <valentin.tolmer@gmail.com>
Date: Tue, 21 Jun 2016 16:22:20 -0700
Subject: [PATCH 082/264] add layer_dict to the python interface

---
 python/caffe/pycaffe.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/python/caffe/pycaffe.py b/python/caffe/pycaffe.py
index ca6d050e2bd..4f84605ba00 100644
--- a/python/caffe/pycaffe.py
+++ b/python/caffe/pycaffe.py
@@ -43,6 +43,16 @@ def _Net_blob_loss_weights(self):
                                                        self._blob_loss_weights))
     return self._blob_loss_weights_dict
 
+@property
+def _Net_layer_dict(self):
+    """
+    An OrderedDict (bottom to top, i.e., input to output) of network
+    layers indexed by name
+    """
+    if not hasattr(self, '_layer_dict'):
+        self._layer_dict = OrderedDict(zip(self._layer_names, self.layers))
+    return self._layer_dict
+
 
 @property
 def _Net_params(self):
@@ -311,6 +321,7 @@ def __getitem__(self, name):
 # Attach methods to Net.
 Net.blobs = _Net_blobs
 Net.blob_loss_weights = _Net_blob_loss_weights
+Net.layer_dict = _Net_layer_dict
 Net.params = _Net_params
 Net.forward = _Net_forward
 Net.backward = _Net_backward

From 118c97ff5890e92b9aa603d925d947d45086b330 Mon Sep 17 00:00:00 2001
From: Valentin Tolmer <valentin.tolmer@gmail.com>
Date: Tue, 21 Jun 2016 17:37:55 -0700
Subject: [PATCH 083/264] add clear_param_diffs to the python net interface

---
 python/caffe/_caffe.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
index 334088e8a57..a7fb886aa06 100644
--- a/python/caffe/_caffe.cpp
+++ b/python/caffe/_caffe.cpp
@@ -271,6 +271,7 @@ BOOST_PYTHON_MODULE(_caffe) {
     .def("_forward", &Net<Dtype>::ForwardFromTo)
     .def("_backward", &Net<Dtype>::BackwardFromTo)
     .def("reshape", &Net<Dtype>::Reshape)
+    .def("clear_param_diffs", &Net<Dtype>::ClearParamDiffs)
     // The cast is to select a particular overload.
     .def("copy_from", static_cast<void (Net<Dtype>::*)(const string)>(
         &Net<Dtype>::CopyTrainedLayersFrom))

From 892c78dd7833f1818a76d4025076b34946200fa0 Mon Sep 17 00:00:00 2001
From: Valentin Tolmer <valentin.tolmer@gmail.com>
Date: Tue, 21 Jun 2016 17:42:31 -0700
Subject: [PATCH 084/264] add unit test for clear_param_diffs

---
 python/caffe/test/test_net.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/python/caffe/test/test_net.py b/python/caffe/test/test_net.py
index 4cacfcd05bb..7fb9f475d43 100644
--- a/python/caffe/test/test_net.py
+++ b/python/caffe/test/test_net.py
@@ -63,6 +63,17 @@ def test_forward_backward(self):
         self.net.forward()
         self.net.backward()
 
+    def test_clear_param_diffs(self):
+        # Run a forward/backward step to have non-zero diffs
+        self.net.forward()
+        self.net.backward()
+        diff = self.net.params["conv"][0].diff
+        # Check that we have non-zero diffs
+        self.assertTrue(diff.max() > 0)
+        self.net.clear_param_diffs()
+        # Check that the diffs are now 0
+        self.assertTrue((diff == 0).all())
+
     def test_inputs_outputs(self):
         self.assertEqual(self.net.inputs, [])
         self.assertEqual(self.net.outputs, ['loss'])

From 5417f106c14c782865e2a5484020b8e45a8b2b80 Mon Sep 17 00:00:00 2001
From: Valentin Tolmer <valentin.tolmer@gmail.com>
Date: Tue, 21 Jun 2016 16:39:30 -0700
Subject: [PATCH 085/264] add tests for pycaffe's layer_dict

---
 python/caffe/test/test_net.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/python/caffe/test/test_net.py b/python/caffe/test/test_net.py
index 4cacfcd05bb..546bd5faa0b 100644
--- a/python/caffe/test/test_net.py
+++ b/python/caffe/test/test_net.py
@@ -59,6 +59,13 @@ def test_memory(self):
         for bl in blobs:
             total += bl.data.sum() + bl.diff.sum()
 
+    def test_layer_dict(self):
+        layer_dict = self.net.layer_dict
+        self.assertEqual(list(layer_dict.keys()), list(self.net._layer_names))
+        for i, name in enumerate(self.net._layer_names):
+            self.assertEqual(layer_dict[name].type,
+                             self.net.layers[i].type)
+
     def test_forward_backward(self):
         self.net.forward()
         self.net.backward()

From bdb94577d97da5cf5b6ec046952dbe79e9c886bf Mon Sep 17 00:00:00 2001
From: Alican Bozkurt <alican@ece.neu.edu>
Date: Tue, 28 Jun 2016 16:28:33 -0400
Subject: [PATCH 086/264] add default value for rms_decay

---
 src/caffe/proto/caffe.proto | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 1556781cbc2..6940a705eb6 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -219,7 +219,7 @@ message SolverParameter {
 
   // RMSProp decay value
   // MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t)
-  optional float rms_decay = 38;
+  optional float rms_decay = 38 [default = 0.99];
 
   // If true, print information about the state of the net that may help with
   // debugging learning problems.

From 80f60dae071fca4457d7a439960385a4579f489d Mon Sep 17 00:00:00 2001
From: Alican Bozkurt <alican@ece.neu.edu>
Date: Tue, 28 Jun 2016 16:59:36 -0400
Subject: [PATCH 087/264] corrected rmsprop documentation

---
 docs/tutorial/solver.md | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/docs/tutorial/solver.md b/docs/tutorial/solver.md
index b719f715a4b..81c626386a2 100644
--- a/docs/tutorial/solver.md
+++ b/docs/tutorial/solver.md
@@ -209,18 +209,11 @@ What distinguishes the method from SGD is the weight setting $$ W $$ on which we
 The **RMSprop** (`type: "RMSProp"`), suggested by Tieleman in a Coursera course lecture, is a gradient-based optimization method (like SGD). The update formulas are
 
 $$
-(v_t)_i =
-\begin{cases}
-(v_{t-1})_i + \delta, &(\nabla L(W_t))_i(\nabla L(W_{t-1}))_i > 0\\
-(v_{t-1})_i \cdot (1-\delta), & \text{else}
-\end{cases}
+\operatorname{MS}((W_t)_i)= \delta\operatorname{MS}((W_{t-1})_i)+ (1-\delta)(\nabla L(W_t))_i^2 \\
+(W_{t+1})_i= (W_{t})_i -\alpha\frac{(\nabla L(W_t))_i}{\sqrt{\operatorname{MS}((W_t)_i)}}
 $$
 
-$$
-(W_{t+1})_i =(W_t)_i - \alpha (v_t)_i,
-$$
-
-If the gradient updates results in oscillations the gradient is reduced by times $$1-\delta$$. Otherwise it will be increased by $$\delta$$. The default value of $$\delta$$ (`rms_decay`) is set to $$\delta = 0.02$$.
+The default value of $$\delta$$ (`rms_decay`) is set to $$\delta=0.99$$.
 
 [1] T. Tieleman, and G. Hinton.
     [RMSProp: Divide the gradient by a running average of its recent magnitude](http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf).

From f0b1a9e770594f93fecda9e876faaafaede2b496 Mon Sep 17 00:00:00 2001
From: Carl Doersch <cdoersch@cs.cmu.edu>
Date: Sun, 3 Jul 2016 12:32:19 -0700
Subject: [PATCH 088/264] Add phase support for draw net

---
 python/caffe/draw.py | 32 +++++++++++++++++++++++++++-----
 python/draw_net.py   | 15 ++++++++++++++-
 2 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/python/caffe/draw.py b/python/caffe/draw.py
index 61205ca9f37..9eecf6d7b46 100644
--- a/python/caffe/draw.py
+++ b/python/caffe/draw.py
@@ -127,7 +127,7 @@ def choose_color_by_layertype(layertype):
     return color
 
 
-def get_pydot_graph(caffe_net, rankdir, label_edges=True):
+def get_pydot_graph(caffe_net, rankdir, label_edges=True, phase=None):
     """Create a data structure which represents the `caffe_net`.
 
     Parameters
@@ -137,6 +137,9 @@ def get_pydot_graph(caffe_net, rankdir, label_edges=True):
         Direction of graph layout.
     label_edges : boolean, optional
         Label the edges (default is True).
+    phase : {caffe_pb2.Phase.TRAIN, caffe_pb2.Phase.TEST, None} optional
+        Include layers from this network phase.  If None, include all layers.
+        (the default is None)
 
     Returns
     -------
@@ -148,6 +151,19 @@ def get_pydot_graph(caffe_net, rankdir, label_edges=True):
     pydot_nodes = {}
     pydot_edges = []
     for layer in caffe_net.layer:
+        if phase is not None:
+          included = False
+          if len(layer.include) == 0:
+            included = True
+          if len(layer.include) > 0 and len(layer.exclude) > 0:
+            raise ValueError('layer ' + layer.name + ' has both include '
+                             'and exclude specified.')
+          for layer_phase in layer.include:
+            included = included or layer_phase.phase == phase
+          for layer_phase in layer.exclude:
+            included = included and not layer_phase.phase == phase
+          if not included:
+            continue
         node_label = get_layer_label(layer, rankdir)
         node_name = "%s_%s" % (layer.name, layer.type)
         if (len(layer.bottom) == 1 and len(layer.top) == 1 and
@@ -186,7 +202,7 @@ def get_pydot_graph(caffe_net, rankdir, label_edges=True):
     return pydot_graph
 
 
-def draw_net(caffe_net, rankdir, ext='png'):
+def draw_net(caffe_net, rankdir, ext='png', phase=None):
     """Draws a caffe net and returns the image string encoded using the given
     extension.
 
@@ -195,16 +211,19 @@ def draw_net(caffe_net, rankdir, ext='png'):
     caffe_net : a caffe.proto.caffe_pb2.NetParameter protocol buffer.
     ext : string, optional
         The image extension (the default is 'png').
+    phase : {caffe_pb2.Phase.TRAIN, caffe_pb2.Phase.TEST, None} optional
+        Include layers from this network phase.  If None, include all layers.
+        (the default is None)
 
     Returns
     -------
     string :
         Postscript representation of the graph.
     """
-    return get_pydot_graph(caffe_net, rankdir).create(format=ext)
+    return get_pydot_graph(caffe_net, rankdir, phase=phase).create(format=ext)
 
 
-def draw_net_to_file(caffe_net, filename, rankdir='LR'):
+def draw_net_to_file(caffe_net, filename, rankdir='LR', phase=None):
     """Draws a caffe net, and saves it to file using the format given as the
     file extension. Use '.raw' to output raw text that you can manually feed
     to graphviz to draw graphs.
@@ -216,7 +235,10 @@ def draw_net_to_file(caffe_net, filename, rankdir='LR'):
         The path to a file where the networks visualization will be stored.
     rankdir : {'LR', 'TB', 'BT'}
         Direction of graph layout.
+    phase : {caffe_pb2.Phase.TRAIN, caffe_pb2.Phase.TEST, None} optional
+        Include layers from this network phase.  If None, include all layers.
+        (the default is None)
     """
     ext = filename[filename.rfind('.')+1:]
     with open(filename, 'wb') as fid:
-        fid.write(draw_net(caffe_net, rankdir, ext))
+        fid.write(draw_net(caffe_net, rankdir, ext, phase))
diff --git a/python/draw_net.py b/python/draw_net.py
index ec76a744da3..dfe70d26a71 100755
--- a/python/draw_net.py
+++ b/python/draw_net.py
@@ -28,6 +28,11 @@ def parse_args():
                               'http://www.graphviz.org/doc/info/'
                               'attrs.html#k:rankdir'),
                         default='LR')
+    parser.add_argument('--phase',
+                        help=('Which network phase to draw: can be TRAIN, '
+                              'TEST, or ALL.  If ALL, then all layers are drawn '
+                              'regardless of phase.'),
+                        default="ALL")
 
     args = parser.parse_args()
     return args
@@ -38,7 +43,15 @@ def main():
     net = caffe_pb2.NetParameter()
     text_format.Merge(open(args.input_net_proto_file).read(), net)
     print('Drawing net to %s' % args.output_image_file)
-    caffe.draw.draw_net_to_file(net, args.output_image_file, args.rankdir)
+    phase=None;
+    if args.phase == "TRAIN":
+        phase = caffe.TRAIN
+    elif args.phase == "TEST":
+        phase = caffe.TEST
+    elif args.phase != "ALL":
+        raise ValueError("Unknown phase: " + args.phase)
+    caffe.draw.draw_net_to_file(net, args.output_image_file, args.rankdir,
+                                phase)
 
 
 if __name__ == '__main__':

From f9fd20ea3893c515b19cae6fa3693b1649fb9487 Mon Sep 17 00:00:00 2001
From: Luke Yeager <luke.yeager@gmail.com>
Date: Fri, 8 Jul 2016 12:05:17 -0700
Subject: [PATCH 089/264] Fix Python installation with CMake install target

---
 python/CMakeLists.txt | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index a22641401f0..bf492a24b1c 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -22,13 +22,19 @@ if(UNIX OR APPLE)
 endif()
 
 # ---[ Install
-file(GLOB files1 *.py requirements.txt)
-install(FILES ${files1} DESTINATION python)
-
-file(GLOB files2 caffe/*.py)
-install(FILES  ${files2} DESTINATION python/caffe)
+# scripts
+file(GLOB python_files *.py requirements.txt)
+install(FILES ${python_files} DESTINATION python)
+
+# module
+install(DIRECTORY caffe
+    DESTINATION python
+    FILES_MATCHING
+    PATTERN "*.py"
+    PATTERN "ilsvrc_2012_mean.npy"
+    PATTERN "test" EXCLUDE
+    )
+
+# _caffe.so
 install(TARGETS pycaffe  DESTINATION python/caffe)
-install(DIRECTORY caffe/imagenet caffe/proto caffe/test DESTINATION python/caffe)
-
-
 

From f1a8470aa21e35a5b2bb83007f8fb7680a354815 Mon Sep 17 00:00:00 2001
From: Nishidha Panpaliya <nishidha@us.ibm.com>
Date: Tue, 17 May 2016 01:14:53 -0500
Subject: [PATCH 090/264] Fix for a random failure in this test due to floating
 point comparison. So, instead of exact match, used EXPECT_FLOAT_EQ that
 tolerates some precision while comparing two floats

---
 src/caffe/test/test_embed_layer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/caffe/test/test_embed_layer.cpp b/src/caffe/test/test_embed_layer.cpp
index dc7f5c4aa47..13f13a878d3 100644
--- a/src/caffe/test/test_embed_layer.cpp
+++ b/src/caffe/test/test_embed_layer.cpp
@@ -124,7 +124,7 @@ TYPED_TEST(EmbedLayerTest, TestForwardWithBias) {
     top_offset[4] = 0;
     bias_offset[0] = 0;
     for (int j = 0; j < kNumOutput; ++j) {
-      EXPECT_EQ(layer->blobs()[0]->data_at(weight_offset) +
+      EXPECT_FLOAT_EQ(layer->blobs()[0]->data_at(weight_offset) +
                 layer->blobs()[1]->data_at(bias_offset),
                 this->blob_top_->data_at(top_offset));
       ++top_offset[4];

From 35a9a075cdc65c86021dde4d11e3b1c05e27971b Mon Sep 17 00:00:00 2001
From: Valentin Tolmer <valentin.tolmer@gmail.com>
Date: Wed, 22 Jun 2016 15:13:54 -0700
Subject: [PATCH 091/264] add set_random_seed to the python interface

---
 python/caffe/__init__.py | 2 +-
 python/caffe/_caffe.cpp  | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/python/caffe/__init__.py b/python/caffe/__init__.py
index e2881b89c1b..35868a403a3 100644
--- a/python/caffe/__init__.py
+++ b/python/caffe/__init__.py
@@ -1,5 +1,5 @@
 from .pycaffe import Net, SGDSolver, NesterovSolver, AdaGradSolver, RMSPropSolver, AdaDeltaSolver, AdamSolver
-from ._caffe import set_mode_cpu, set_mode_gpu, set_device, Layer, get_solver, layer_type_list
+from ._caffe import set_mode_cpu, set_mode_gpu, set_device, Layer, get_solver, layer_type_list, set_random_seed
 from ._caffe import __version__
 from .proto.caffe_pb2 import TRAIN, TEST
 from .classifier import Classifier
diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
index 334088e8a57..3db55ea4340 100644
--- a/python/caffe/_caffe.cpp
+++ b/python/caffe/_caffe.cpp
@@ -51,6 +51,8 @@ const int NPY_DTYPE = NPY_FLOAT32;
 void set_mode_cpu() { Caffe::set_mode(Caffe::CPU); }
 void set_mode_gpu() { Caffe::set_mode(Caffe::GPU); }
 
+void set_random_seed(unsigned int seed) { Caffe::set_random_seed(seed); }
+
 // For convenience, check that input files can be opened, and raise an
 // exception that boost will send to Python if not (caffe could still crash
 // later if the input files are disturbed before they are actually used, but
@@ -260,6 +262,7 @@ BOOST_PYTHON_MODULE(_caffe) {
   // Caffe utility functions
   bp::def("set_mode_cpu", &set_mode_cpu);
   bp::def("set_mode_gpu", &set_mode_gpu);
+  bp::def("set_random_seed", &set_random_seed);
   bp::def("set_device", &Caffe::SetDevice);
 
   bp::def("layer_type_list", &LayerRegistry<Dtype>::LayerTypeList);

From a64cfbd08591db0b061ad7ad39c54cd45c0e252a Mon Sep 17 00:00:00 2001
From: Alessandro Giusti <lalelale@users.noreply.github.com>
Date: Mon, 11 Jul 2016 20:33:16 +0200
Subject: [PATCH 092/264] Update parse_log.py

Aligned output description in docstring with actual output returned by parse_log
---
 tools/extra/parse_log.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tools/extra/parse_log.py b/tools/extra/parse_log.py
index bb9b65ad615..375b0db73b3 100755
--- a/tools/extra/parse_log.py
+++ b/tools/extra/parse_log.py
@@ -16,13 +16,10 @@
 
 def parse_log(path_to_log):
     """Parse log file
-    Returns (train_dict_list, train_dict_names, test_dict_list, test_dict_names)
+    Returns (train_dict_list, test_dict_list)
 
     train_dict_list and test_dict_list are lists of dicts that define the table
     rows
-
-    train_dict_names and test_dict_names are ordered tuples of the column names
-    for the two dict_lists
     """
 
     regex_iteration = re.compile('Iteration (\d+)')

From 12c74460d3e7c416b869e6b4afa0e5c2e84ec29b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Malte=20St=C3=A6r=20Nissen?= <nissen@di.ku.dk>
Date: Tue, 12 Jul 2016 13:17:52 +0200
Subject: [PATCH 093/264] Support for spaces in directories when downloading
 cifar10

---
 data/cifar10/get_cifar10.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/data/cifar10/get_cifar10.sh b/data/cifar10/get_cifar10.sh
index 623c848513e..423f10989c4 100755
--- a/data/cifar10/get_cifar10.sh
+++ b/data/cifar10/get_cifar10.sh
@@ -2,7 +2,7 @@
 # This scripts downloads the CIFAR10 (binary version) data and unzips it.
 
 DIR="$( cd "$(dirname "$0")" ; pwd -P )"
-cd $DIR
+cd "$DIR"
 
 echo "Downloading..."
 

From e14b7f7ea597afe532bf1c4d4013f2c63494d7a6 Mon Sep 17 00:00:00 2001
From: Valentin Tolmer <valentin.tolmer@gmail.com>
Date: Tue, 21 Jun 2016 14:58:43 -0700
Subject: [PATCH 094/264] improve top_names and bottom_names in pycaffe

---
 python/caffe/pycaffe.py | 40 +++++++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/python/caffe/pycaffe.py b/python/caffe/pycaffe.py
index ca6d050e2bd..5bae18d9a4d 100644
--- a/python/caffe/pycaffe.py
+++ b/python/caffe/pycaffe.py
@@ -292,21 +292,31 @@ def _Net_batch(self, blobs):
                                                  padding])
         yield padded_batch
 
-
-class _Net_IdNameWrapper:
-    """
-    A simple wrapper that allows the ids propery to be accessed as a dict
-    indexed by names. Used for top and bottom names
+def _Net_get_id_name(func, field):
     """
-    def __init__(self, net, func):
-        self.net, self.func = net, func
+    Generic property that maps func to the layer names into an OrderedDict.
+
+    Used for top_names and bottom_names.
 
-    def __getitem__(self, name):
-        # Map the layer name to id
-        ids = self.func(self.net, list(self.net._layer_names).index(name))
-        # Map the blob id to name
-        id_to_name = list(self.net.blobs)
-        return [id_to_name[i] for i in ids]
+    Parameters
+    ----------
+    func: function id -> [id]
+    field: implementation field name (cache)
+
+    Returns
+    ------
+    A one-parameter function that can be set as a property.
+    """
+    @property
+    def get_id_name(self):
+        if not hasattr(self, field):
+            id_to_name = list(self.blobs)
+            res = OrderedDict([(self._layer_names[i],
+                                [id_to_name[j] for j in func(self, i)])
+                                for i in range(len(self.layers))])
+            setattr(self, field, res)
+        return getattr(self, field)
+    return get_id_name
 
 # Attach methods to Net.
 Net.blobs = _Net_blobs
@@ -320,5 +330,5 @@ def __getitem__(self, name):
 Net._batch = _Net_batch
 Net.inputs = _Net_inputs
 Net.outputs = _Net_outputs
-Net.top_names = property(lambda n: _Net_IdNameWrapper(n, Net._top_ids))
-Net.bottom_names = property(lambda n: _Net_IdNameWrapper(n, Net._bottom_ids))
+Net.top_names = _Net_get_id_name(Net._top_ids, "_top_names")
+Net.bottom_names = _Net_get_id_name(Net._bottom_ids, "_bottom_names")

From 7c50a2cb87c6b044f85ced87273d302fb21394f7 Mon Sep 17 00:00:00 2001
From: Valentin Tolmer <valentin.tolmer@gmail.com>
Date: Tue, 21 Jun 2016 17:17:05 -0700
Subject: [PATCH 095/264] add test for top/bottom names

---
 python/caffe/test/test_net.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/python/caffe/test/test_net.py b/python/caffe/test/test_net.py
index 4cacfcd05bb..96821e40c1d 100644
--- a/python/caffe/test/test_net.py
+++ b/python/caffe/test/test_net.py
@@ -3,6 +3,7 @@
 import os
 import numpy as np
 import six
+from collections import OrderedDict
 
 import caffe
 
@@ -67,6 +68,18 @@ def test_inputs_outputs(self):
         self.assertEqual(self.net.inputs, [])
         self.assertEqual(self.net.outputs, ['loss'])
 
+    def test_top_bottom_names(self):
+        self.assertEqual(self.net.top_names,
+                         OrderedDict([('data', ['data', 'label']),
+                                      ('conv', ['conv']),
+                                      ('ip', ['ip']),
+                                      ('loss', ['loss'])]))
+        self.assertEqual(self.net.bottom_names,
+                         OrderedDict([('data', []),
+                                      ('conv', ['data']),
+                                      ('ip', ['conv']),
+                                      ('loss', ['ip', 'label'])]))
+
     def test_save_and_read(self):
         f = tempfile.NamedTemporaryFile(mode='w+', delete=False)
         f.close()

From d9ad2ef90a1cbaa2b22b229539a14341efe59ee6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Malte=20St=C3=A6r=20Nissen?= <nissen@di.ku.dk>
Date: Wed, 13 Jul 2016 11:17:54 +0200
Subject: [PATCH 096/264] Support spaces in path when downloading ILSVRC12 and
 MNIST

---
 data/ilsvrc12/get_ilsvrc_aux.sh | 2 +-
 data/mnist/get_mnist.sh         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/data/ilsvrc12/get_ilsvrc_aux.sh b/data/ilsvrc12/get_ilsvrc_aux.sh
index 90935f25099..dc0d0a72790 100755
--- a/data/ilsvrc12/get_ilsvrc_aux.sh
+++ b/data/ilsvrc12/get_ilsvrc_aux.sh
@@ -8,7 +8,7 @@
 # - the training splits with labels
 
 DIR="$( cd "$(dirname "$0")" ; pwd -P )"
-cd $DIR
+cd "$DIR"
 
 echo "Downloading..."
 
diff --git a/data/mnist/get_mnist.sh b/data/mnist/get_mnist.sh
index 6d875219489..ecadffa44f7 100755
--- a/data/mnist/get_mnist.sh
+++ b/data/mnist/get_mnist.sh
@@ -2,7 +2,7 @@
 # This scripts downloads the mnist data and unzips it.
 
 DIR="$( cd "$(dirname "$0")" ; pwd -P )"
-cd $DIR
+cd "$DIR"
 
 echo "Downloading..."
 

From 93d321227f0681165b126d9ca47b211f5d2c1909 Mon Sep 17 00:00:00 2001
From: Luke Yeager <luke.yeager@gmail.com>
Date: Wed, 13 Jul 2016 15:58:29 -0700
Subject: [PATCH 097/264] Add "set -e" and $@ to example scripts

---
 examples/cifar10/create_cifar10.sh                 | 1 +
 examples/cifar10/train_full.sh                     | 7 ++++---
 examples/cifar10/train_full_sigmoid.sh             | 3 ++-
 examples/cifar10/train_full_sigmoid_bn.sh          | 3 ++-
 examples/cifar10/train_quick.sh                    | 5 +++--
 examples/imagenet/create_imagenet.sh               | 1 +
 examples/imagenet/resume_training.sh               | 4 +++-
 examples/imagenet/train_caffenet.sh                | 3 ++-
 examples/mnist/create_mnist.sh                     | 1 +
 examples/mnist/train_lenet.sh                      | 3 ++-
 examples/mnist/train_lenet_adam.sh                 | 3 ++-
 examples/mnist/train_lenet_consolidated.sh         | 3 ++-
 examples/mnist/train_lenet_rmsprop.sh              | 4 +++-
 examples/mnist/train_mnist_autoencoder.sh          | 3 ++-
 examples/mnist/train_mnist_autoencoder_adadelta.sh | 3 ++-
 examples/mnist/train_mnist_autoencoder_adagrad.sh  | 3 ++-
 examples/mnist/train_mnist_autoencoder_nesterov.sh | 3 ++-
 examples/siamese/create_mnist_siamese.sh           | 1 +
 examples/siamese/train_mnist_siamese.sh            | 3 ++-
 19 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/examples/cifar10/create_cifar10.sh b/examples/cifar10/create_cifar10.sh
index a42725cb610..7ee1d6ad0a0 100755
--- a/examples/cifar10/create_cifar10.sh
+++ b/examples/cifar10/create_cifar10.sh
@@ -1,5 +1,6 @@
 #!/usr/bin/env sh
 # This script converts the cifar data into leveldb format.
+set -e
 
 EXAMPLE=examples/cifar10
 DATA=data/cifar10
diff --git a/examples/cifar10/train_full.sh b/examples/cifar10/train_full.sh
index ef112e1f6db..06ecc2dccb0 100755
--- a/examples/cifar10/train_full.sh
+++ b/examples/cifar10/train_full.sh
@@ -1,16 +1,17 @@
 #!/usr/bin/env sh
+set -e
 
 TOOLS=./build/tools
 
 $TOOLS/caffe train \
-    --solver=examples/cifar10/cifar10_full_solver.prototxt
+    --solver=examples/cifar10/cifar10_full_solver.prototxt $@
 
 # reduce learning rate by factor of 10
 $TOOLS/caffe train \
     --solver=examples/cifar10/cifar10_full_solver_lr1.prototxt \
-    --snapshot=examples/cifar10/cifar10_full_iter_60000.solverstate.h5
+    --snapshot=examples/cifar10/cifar10_full_iter_60000.solverstate.h5 $@
 
 # reduce learning rate by factor of 10
 $TOOLS/caffe train \
     --solver=examples/cifar10/cifar10_full_solver_lr2.prototxt \
-    --snapshot=examples/cifar10/cifar10_full_iter_65000.solverstate.h5
+    --snapshot=examples/cifar10/cifar10_full_iter_65000.solverstate.h5 $@
diff --git a/examples/cifar10/train_full_sigmoid.sh b/examples/cifar10/train_full_sigmoid.sh
index 9cff06d3e34..9b5d5213b2a 100755
--- a/examples/cifar10/train_full_sigmoid.sh
+++ b/examples/cifar10/train_full_sigmoid.sh
@@ -1,7 +1,8 @@
 #!/usr/bin/env sh
+set -e
 
 TOOLS=./build/tools
 
 $TOOLS/caffe train \
-    --solver=examples/cifar10/cifar10_full_sigmoid_solver.prototxt
+    --solver=examples/cifar10/cifar10_full_sigmoid_solver.prototxt $@
 
diff --git a/examples/cifar10/train_full_sigmoid_bn.sh b/examples/cifar10/train_full_sigmoid_bn.sh
index 011387c996e..05547f3a104 100755
--- a/examples/cifar10/train_full_sigmoid_bn.sh
+++ b/examples/cifar10/train_full_sigmoid_bn.sh
@@ -1,7 +1,8 @@
 #!/usr/bin/env sh
+set -e
 
 TOOLS=./build/tools
 
 $TOOLS/caffe train \
-    --solver=examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt
+    --solver=examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt $@
 
diff --git a/examples/cifar10/train_quick.sh b/examples/cifar10/train_quick.sh
index 6b7d228879b..d2b875340ee 100755
--- a/examples/cifar10/train_quick.sh
+++ b/examples/cifar10/train_quick.sh
@@ -1,11 +1,12 @@
 #!/usr/bin/env sh
+set -e
 
 TOOLS=./build/tools
 
 $TOOLS/caffe train \
-  --solver=examples/cifar10/cifar10_quick_solver.prototxt
+  --solver=examples/cifar10/cifar10_quick_solver.prototxt $@
 
 # reduce learning rate by factor of 10 after 8 epochs
 $TOOLS/caffe train \
   --solver=examples/cifar10/cifar10_quick_solver_lr1.prototxt \
-  --snapshot=examples/cifar10/cifar10_quick_iter_4000.solverstate.h5
+  --snapshot=examples/cifar10/cifar10_quick_iter_4000.solverstate.h5 $@
diff --git a/examples/imagenet/create_imagenet.sh b/examples/imagenet/create_imagenet.sh
index e912ac43cd7..1bf08b1aa8f 100755
--- a/examples/imagenet/create_imagenet.sh
+++ b/examples/imagenet/create_imagenet.sh
@@ -1,6 +1,7 @@
 #!/usr/bin/env sh
 # Create the imagenet lmdb inputs
 # N.B. set the path to the imagenet train + val data dirs
+set -e
 
 EXAMPLE=examples/imagenet
 DATA=data/ilsvrc12
diff --git a/examples/imagenet/resume_training.sh b/examples/imagenet/resume_training.sh
index bf7945c0fd0..4aef204368e 100755
--- a/examples/imagenet/resume_training.sh
+++ b/examples/imagenet/resume_training.sh
@@ -1,5 +1,7 @@
 #!/usr/bin/env sh
+set -e
 
 ./build/tools/caffe train \
     --solver=models/bvlc_reference_caffenet/solver.prototxt \
-    --snapshot=models/bvlc_reference_caffenet/caffenet_train_10000.solverstate.h5
+    --snapshot=models/bvlc_reference_caffenet/caffenet_train_10000.solverstate.h5 \
+    $@
diff --git a/examples/imagenet/train_caffenet.sh b/examples/imagenet/train_caffenet.sh
index 94558ec5466..a5094d44ae0 100755
--- a/examples/imagenet/train_caffenet.sh
+++ b/examples/imagenet/train_caffenet.sh
@@ -1,4 +1,5 @@
 #!/usr/bin/env sh
+set -e
 
 ./build/tools/caffe train \
-    --solver=models/bvlc_reference_caffenet/solver.prototxt
+    --solver=models/bvlc_reference_caffenet/solver.prototxt $@
diff --git a/examples/mnist/create_mnist.sh b/examples/mnist/create_mnist.sh
index 06ecc27de63..f5e2e7960c5 100755
--- a/examples/mnist/create_mnist.sh
+++ b/examples/mnist/create_mnist.sh
@@ -1,6 +1,7 @@
 #!/usr/bin/env sh
 # This script converts the mnist data into lmdb/leveldb format,
 # depending on the value assigned to $BACKEND.
+set -e
 
 EXAMPLE=examples/mnist
 DATA=data/mnist
diff --git a/examples/mnist/train_lenet.sh b/examples/mnist/train_lenet.sh
index 1b6bf7d978d..f7f9b86198d 100755
--- a/examples/mnist/train_lenet.sh
+++ b/examples/mnist/train_lenet.sh
@@ -1,3 +1,4 @@
 #!/usr/bin/env sh
+set -e
 
-./build/tools/caffe train --solver=examples/mnist/lenet_solver.prototxt
+./build/tools/caffe train --solver=examples/mnist/lenet_solver.prototxt $@
diff --git a/examples/mnist/train_lenet_adam.sh b/examples/mnist/train_lenet_adam.sh
index a32ecf2d9c2..7b4e905681b 100755
--- a/examples/mnist/train_lenet_adam.sh
+++ b/examples/mnist/train_lenet_adam.sh
@@ -1,3 +1,4 @@
 #!/usr/bin/env sh
+set -e
 
-./build/tools/caffe train --solver=examples/mnist/lenet_solver_adam.prototxt
+./build/tools/caffe train --solver=examples/mnist/lenet_solver_adam.prototxt $@
diff --git a/examples/mnist/train_lenet_consolidated.sh b/examples/mnist/train_lenet_consolidated.sh
index c855467897e..c5f02666822 100755
--- a/examples/mnist/train_lenet_consolidated.sh
+++ b/examples/mnist/train_lenet_consolidated.sh
@@ -1,4 +1,5 @@
 #!/usr/bin/env sh
+set -e
 
 ./build/tools/caffe train \
-  --solver=examples/mnist/lenet_consolidated_solver.prototxt
+  --solver=examples/mnist/lenet_consolidated_solver.prototxt $@
diff --git a/examples/mnist/train_lenet_rmsprop.sh b/examples/mnist/train_lenet_rmsprop.sh
index 621cab238bf..adfa7ab0fca 100755
--- a/examples/mnist/train_lenet_rmsprop.sh
+++ b/examples/mnist/train_lenet_rmsprop.sh
@@ -1,3 +1,5 @@
 #!/usr/bin/env sh
+set -e
 
-./build/tools/caffe train --solver=examples/mnist/lenet_solver_rmsprop.prototxt
+./build/tools/caffe train \
+    --solver=examples/mnist/lenet_solver_rmsprop.prototxt $@
diff --git a/examples/mnist/train_mnist_autoencoder.sh b/examples/mnist/train_mnist_autoencoder.sh
index cfd67e82fda..724a0f14a49 100755
--- a/examples/mnist/train_mnist_autoencoder.sh
+++ b/examples/mnist/train_mnist_autoencoder.sh
@@ -1,4 +1,5 @@
 #!/usr/bin/env sh
+set -e
 
 ./build/tools/caffe train \
-  --solver=examples/mnist/mnist_autoencoder_solver.prototxt
+  --solver=examples/mnist/mnist_autoencoder_solver.prototxt $@
diff --git a/examples/mnist/train_mnist_autoencoder_adadelta.sh b/examples/mnist/train_mnist_autoencoder_adadelta.sh
index 4be0ebddedc..a660dbb9ed2 100755
--- a/examples/mnist/train_mnist_autoencoder_adadelta.sh
+++ b/examples/mnist/train_mnist_autoencoder_adadelta.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+set -e
 
 ./build/tools/caffe train \
-  --solver=examples/mnist/mnist_autoencoder_solver_adadelta.prototxt
+  --solver=examples/mnist/mnist_autoencoder_solver_adadelta.prototxt $@
diff --git a/examples/mnist/train_mnist_autoencoder_adagrad.sh b/examples/mnist/train_mnist_autoencoder_adagrad.sh
index 95fe1b17bd5..4c11dfa67ac 100755
--- a/examples/mnist/train_mnist_autoencoder_adagrad.sh
+++ b/examples/mnist/train_mnist_autoencoder_adagrad.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+set -e
 
 ./build/tools/caffe train \
-  --solver=examples/mnist/mnist_autoencoder_solver_adagrad.prototxt
+  --solver=examples/mnist/mnist_autoencoder_solver_adagrad.prototxt $@
diff --git a/examples/mnist/train_mnist_autoencoder_nesterov.sh b/examples/mnist/train_mnist_autoencoder_nesterov.sh
index cf19ea749b3..fd0559d2488 100755
--- a/examples/mnist/train_mnist_autoencoder_nesterov.sh
+++ b/examples/mnist/train_mnist_autoencoder_nesterov.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+set -e
 
 ./build/tools/caffe train \
-  --solver=examples/mnist/mnist_autoencoder_solver_nesterov.prototxt
+  --solver=examples/mnist/mnist_autoencoder_solver_nesterov.prototxt $@
diff --git a/examples/siamese/create_mnist_siamese.sh b/examples/siamese/create_mnist_siamese.sh
index 43ad6b184a7..03adce54d9b 100755
--- a/examples/siamese/create_mnist_siamese.sh
+++ b/examples/siamese/create_mnist_siamese.sh
@@ -1,5 +1,6 @@
 #!/usr/bin/env sh
 # This script converts the mnist data into leveldb format.
+set -e
 
 EXAMPLES=./build/examples/siamese
 DATA=./data/mnist
diff --git a/examples/siamese/train_mnist_siamese.sh b/examples/siamese/train_mnist_siamese.sh
index 84a30a8ac44..e01ac2ceefd 100755
--- a/examples/siamese/train_mnist_siamese.sh
+++ b/examples/siamese/train_mnist_siamese.sh
@@ -1,5 +1,6 @@
 #!/usr/bin/env sh
+set -e
 
 TOOLS=./build/tools
 
-$TOOLS/caffe train --solver=examples/siamese/mnist_siamese_solver.prototxt
+$TOOLS/caffe train --solver=examples/siamese/mnist_siamese_solver.prototxt $@

From a110ac7c2ad9e0966a02ba360327907cd2646dd4 Mon Sep 17 00:00:00 2001
From: Luke Yeager <luke.yeager@gmail.com>
Date: Fri, 15 Jul 2016 14:12:01 -0700
Subject: [PATCH 098/264] Stop setting cache timeout in TravisCI

It refers to the caching command timeout, not how long before the caches
expire as I had thought.
---
 .travis.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 4849a7ac289..3297954755d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -28,7 +28,6 @@ env:
     - BUILD_NAME="cudnn-cmake" WITH_CMAKE=true WITH_CUDA=true WITH_CUDNN=true
 
 cache:
-  timeout: 604800  # 1 week
   apt: true
   directories:
     - ~/protobuf3

From 9376bde1beba649e4c522b742064223ac9d2cab4 Mon Sep 17 00:00:00 2001
From: jasjuang <jasjuang@gmail.com>
Date: Thu, 21 Jul 2016 12:04:41 -0700
Subject: [PATCH 099/264] add in sudo make uninstall for cmake

---
 CMakeLists.txt           | 11 +++++++++++
 cmake/Uninstall.cmake.in | 26 ++++++++++++++++++++++++++
 2 files changed, 37 insertions(+)
 create mode 100644 cmake/Uninstall.cmake.in

diff --git a/CMakeLists.txt b/CMakeLists.txt
index da7142c9b3c..7b8dab2bb24 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -85,8 +85,19 @@ if(BUILD_python)
   add_dependencies(pytest pycaffe)
 endif()
 
+# ---[ uninstall target
+configure_file(
+    ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Uninstall.cmake.in
+    ${CMAKE_CURRENT_BINARY_DIR}/cmake/Uninstall.cmake
+    IMMEDIATE @ONLY)
+
+add_custom_target(uninstall
+    COMMAND ${CMAKE_COMMAND} -P
+    ${CMAKE_CURRENT_BINARY_DIR}/cmake/Uninstall.cmake)
+
 # ---[ Configuration summary
 caffe_print_configuration_summary()
 
 # ---[ Export configs generation
 caffe_generate_export_configs()
+
diff --git a/cmake/Uninstall.cmake.in b/cmake/Uninstall.cmake.in
new file mode 100644
index 00000000000..bb8e2964e46
--- /dev/null
+++ b/cmake/Uninstall.cmake.in
@@ -0,0 +1,26 @@
+if(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
+  message(FATAL_ERROR "Cannot find install manifest: @CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
+endif(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
+
+if (NOT DEFINED CMAKE_INSTALL_PREFIX)
+  set (CMAKE_INSTALL_PREFIX "@CMAKE_INSTALL_PREFIX@")
+endif ()
+ message(${CMAKE_INSTALL_PREFIX})
+
+file(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files)
+string(REGEX REPLACE "\n" ";" files "${files}")
+foreach(file ${files})
+  message(STATUS "Uninstalling $ENV{DESTDIR}${file}")
+  if(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
+    exec_program(
+      "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\""
+      OUTPUT_VARIABLE rm_out
+      RETURN_VALUE rm_retval
+      )
+    if(NOT "${rm_retval}" STREQUAL 0)
+      message(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}")
+    endif(NOT "${rm_retval}" STREQUAL 0)
+  else(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
+    message(STATUS "File $ENV{DESTDIR}${file} does not exist.")
+  endif(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
+endforeach(file)
\ No newline at end of file

From 0ad1284bf6af4ee59f782b72cdf4af0fd194af29 Mon Sep 17 00:00:00 2001
From: Ivan Shapovalov <intelfx@intelfx.name>
Date: Mon, 25 Jul 2016 09:01:24 +0300
Subject: [PATCH 100/264] CMake: link with ${HDF5_HL_LIBRARIES}

Fixes issue #3224.
---
 cmake/Dependencies.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index c7b6a17aa69..d7eb59e333a 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -26,7 +26,7 @@ include(cmake/ProtoBuf.cmake)
 # ---[ HDF5
 find_package(HDF5 COMPONENTS HL REQUIRED)
 include_directories(SYSTEM ${HDF5_INCLUDE_DIRS} ${HDF5_HL_INCLUDE_DIR})
-list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES})
+list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES} ${HDF5_HL_LIBRARIES})
 
 # ---[ LMDB
 if(USE_LMDB)

From c62e06bccafa57f5b21f90b49e81a988d50a4620 Mon Sep 17 00:00:00 2001
From: Hans Gaiser <j.c.gaiser@delftrobotics.com>
Date: Tue, 26 Jul 2016 11:44:44 +0200
Subject: [PATCH 101/264] Fix search for Atlas on arch.

---
 cmake/Modules/FindAtlas.cmake | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cmake/Modules/FindAtlas.cmake b/cmake/Modules/FindAtlas.cmake
index 6e1564351c7..9c665a47bd5 100644
--- a/cmake/Modules/FindAtlas.cmake
+++ b/cmake/Modules/FindAtlas.cmake
@@ -26,9 +26,9 @@ set(Atlas_LIB_SEARCH_PATHS
 find_path(Atlas_CBLAS_INCLUDE_DIR   NAMES cblas.h   PATHS ${Atlas_INCLUDE_SEARCH_PATHS})
 find_path(Atlas_CLAPACK_INCLUDE_DIR NAMES clapack.h PATHS ${Atlas_INCLUDE_SEARCH_PATHS})
 
-find_library(Atlas_CBLAS_LIBRARY NAMES  ptcblas_r ptcblas cblas_r cblas PATHS ${Atlas_LIB_SEARCH_PATHS})
-find_library(Atlas_BLAS_LIBRARY NAMES   atlas_r   atlas                 PATHS ${Atlas_LIB_SEARCH_PATHS})
-find_library(Atlas_LAPACK_LIBRARY NAMES alapack_r alapack lapack_atlas  PATHS ${Atlas_LIB_SEARCH_PATHS})
+find_library(Atlas_CBLAS_LIBRARY NAMES  ptcblas_r ptcblas cblas_r cblas       PATHS ${Atlas_LIB_SEARCH_PATHS})
+find_library(Atlas_BLAS_LIBRARY NAMES   atlas_r   atlas                       PATHS ${Atlas_LIB_SEARCH_PATHS})
+find_library(Atlas_LAPACK_LIBRARY NAMES lapack alapack_r alapack lapack_atlas PATHS ${Atlas_LIB_SEARCH_PATHS})
 
 set(LOOKED_FOR
   Atlas_CBLAS_INCLUDE_DIR

From 61e01654d2054531133a6d154a69b872a4479099 Mon Sep 17 00:00:00 2001
From: Fisher Yu <i@yf.io>
Date: Sat, 6 Aug 2016 23:01:45 -0400
Subject: [PATCH 102/264] num in blob is deprecated

---
 src/caffe/layers/loss_layer.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/caffe/layers/loss_layer.cpp b/src/caffe/layers/loss_layer.cpp
index c0b7a862181..afb1ce94893 100644
--- a/src/caffe/layers/loss_layer.cpp
+++ b/src/caffe/layers/loss_layer.cpp
@@ -16,8 +16,8 @@ void LossLayer<Dtype>::LayerSetUp(
 template <typename Dtype>
 void LossLayer<Dtype>::Reshape(
     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
-  CHECK_EQ(bottom[0]->num(), bottom[1]->num())
-      << "The data and label should have the same number.";
+  CHECK_EQ(bottom[0]->shape(0), bottom[1]->shape(0))
+      << "The data and label should have the same first dimension.";
   vector<int> loss_shape(0);  // Loss layers output a scalar; 0 axes.
   top[0]->Reshape(loss_shape);
 }

From d607858b90b645d8177c3970d782f0ab5c529558 Mon Sep 17 00:00:00 2001
From: Zhou Mo <cdluminate@gmail.com>
Date: Tue, 9 Aug 2016 15:13:47 +0000
Subject: [PATCH 103/264] Fix more float comparison precision issue

With reference to this commit:
f1a8470aa21e35a5b2bb83007f8fb7680a354815

This fix changes some EXPECT_EQ into EXPECT_FLOAT_EQ .
---
 src/caffe/test/test_convolution_layer.cpp     | 2 +-
 src/caffe/test/test_gradient_based_solver.cpp | 8 ++++----
 src/caffe/test/test_neuron_layer.cpp          | 8 ++++----
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/caffe/test/test_convolution_layer.cpp b/src/caffe/test/test_convolution_layer.cpp
index 9bb19d13592..85c10a29483 100644
--- a/src/caffe/test/test_convolution_layer.cpp
+++ b/src/caffe/test/test_convolution_layer.cpp
@@ -695,7 +695,7 @@ TYPED_TEST(ConvolutionLayerTest, TestNDAgainst2D) {
   }
   ASSERT_EQ(backward_result_nd.count(), backward_result_2d.count());
   for (int i = 0; i < backward_result_2d.count(); ++i) {
-    EXPECT_EQ(backward_result_2d.cpu_diff()[i],
+    EXPECT_FLOAT_EQ(backward_result_2d.cpu_diff()[i],
               backward_result_nd.cpu_diff()[i]);
   }
   ASSERT_EQ(backward_weight_result_nd.count(),
diff --git a/src/caffe/test/test_gradient_based_solver.cpp b/src/caffe/test/test_gradient_based_solver.cpp
index 975a8f0f88a..9395f4e95c6 100644
--- a/src/caffe/test/test_gradient_based_solver.cpp
+++ b/src/caffe/test/test_gradient_based_solver.cpp
@@ -538,9 +538,9 @@ class GradientBasedSolverTest : public MultiDeviceTest<TypeParam> {
     const vector<Blob<Dtype>*>& params = solver_->net()->learnable_params();
     for (int i = 0; i < params.size(); ++i) {
       for (int j = 0; j < params[i]->count(); ++j) {
-        EXPECT_EQ(param_copies[i]->cpu_data()[j], params[i]->cpu_data()[j])
+        EXPECT_FLOAT_EQ(param_copies[i]->cpu_data()[j], params[i]->cpu_data()[j])
             << "param " << i << " data differed at dim " << j;
-        EXPECT_EQ(param_copies[i]->cpu_diff()[j], params[i]->cpu_diff()[j])
+        EXPECT_FLOAT_EQ(param_copies[i]->cpu_diff()[j], params[i]->cpu_diff()[j])
             << "param " << i << " diff differed at dim " << j;
       }
     }
@@ -549,9 +549,9 @@ class GradientBasedSolverTest : public MultiDeviceTest<TypeParam> {
     const vector<shared_ptr<Blob<Dtype> > >& history = solver_->history();
     for (int i = 0; i < history.size(); ++i) {
       for (int j = 0; j < history[i]->count(); ++j) {
-        EXPECT_EQ(history_copies[i]->cpu_data()[j], history[i]->cpu_data()[j])
+        EXPECT_FLOAT_EQ(history_copies[i]->cpu_data()[j], history[i]->cpu_data()[j])
             << "history blob " << i << " data differed at dim " << j;
-        EXPECT_EQ(history_copies[i]->cpu_diff()[j], history[i]->cpu_diff()[j])
+        EXPECT_FLOAT_EQ(history_copies[i]->cpu_diff()[j], history[i]->cpu_diff()[j])
             << "history blob " << i << " diff differed at dim " << j;
       }
     }
diff --git a/src/caffe/test/test_neuron_layer.cpp b/src/caffe/test/test_neuron_layer.cpp
index 342f825cec3..57bd47b3a2e 100644
--- a/src/caffe/test/test_neuron_layer.cpp
+++ b/src/caffe/test/test_neuron_layer.cpp
@@ -791,16 +791,16 @@ TYPED_TEST(NeuronLayerTest, TestPReLUInPlace) {
   ip2.Backward(blob_middle_vec_2, propagate_down, blob_bottom_vec_2);
   // Check numbers
   for (int s = 0; s < blob_bottom_2->count(); ++s) {
-    EXPECT_EQ(this->blob_bottom_->cpu_diff()[s], blob_bottom_2->cpu_diff()[s]);
+    EXPECT_FLOAT_EQ(this->blob_bottom_->cpu_diff()[s], blob_bottom_2->cpu_diff()[s]);
   }
   for (int s = 0; s < ip.blobs()[0]->count(); ++s) {
-    EXPECT_EQ(ip.blobs()[0]->cpu_diff()[s], ip2.blobs()[0]->cpu_diff()[s]);
+    EXPECT_FLOAT_EQ(ip.blobs()[0]->cpu_diff()[s], ip2.blobs()[0]->cpu_diff()[s]);
   }
   for (int s = 0; s < ip.blobs()[1]->count(); ++s) {
-    EXPECT_EQ(ip.blobs()[1]->cpu_diff()[s], ip2.blobs()[1]->cpu_diff()[s]);
+    EXPECT_FLOAT_EQ(ip.blobs()[1]->cpu_diff()[s], ip2.blobs()[1]->cpu_diff()[s]);
   }
   for (int s = 0; s < prelu.blobs()[0]->count(); ++s) {
-    EXPECT_EQ(prelu.blobs()[0]->cpu_diff()[s],
+    EXPECT_FLOAT_EQ(prelu.blobs()[0]->cpu_diff()[s],
         prelu2.blobs()[0]->cpu_diff()[s]);
   }
 }

From 42d20fe21eeb8067b09ef5e935bb4c235dbf9f3f Mon Sep 17 00:00:00 2001
From: Zhou Mo <cdluminate@gmail.com>
Date: Wed, 10 Aug 2016 14:36:33 +0000
Subject: [PATCH 104/264] Import bash completion script for caffe from Debian
 Package.

Imported from Debian Package caffe (1.0.0~rc3+20160715-g42cd785-2).
---
 scripts/caffe | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)
 create mode 100644 scripts/caffe

diff --git a/scripts/caffe b/scripts/caffe
new file mode 100644
index 00000000000..8a0b22af6ac
--- /dev/null
+++ b/scripts/caffe
@@ -0,0 +1,73 @@
+# bash completion for Caffe's command line utility       -*- shell-script -*-
+# COPYRIGHT (C) 2015,2016 Zhou Mo <cdluminate@gmail.com>
+# License: BSD-2-Clause
+# Originally appeard at https://github.com/BVLC/caffe/issues/3149
+
+# Updated for caffe (1.0.0~rc3+20160715-g42cd785)
+_caffe()
+{
+  local cur prev words cword
+  _init_completion -s || return
+
+  local prototxts='@(prototxt)'
+  local caffemodels='@(caffemodel,binaryproto)'
+  local solverstates='@(solverstate)'
+  local caffefiles='@(prototxt|caffemodel|solverstate)'
+
+  local flags='-gpu -iterations -model -snapshot -solver -weights -sighup_effect -sigint_effect -level -stage -phase'
+  
+  if [[ $cword -eq 1 ]]; then
+    COMPREPLY=( $( compgen -W 'train test time device_query' -- "$cur" ) )
+    return 0
+  fi
+  
+  if [[ $cword -eq 2 ]]; then
+    case ${words[1]} in
+    train|test|device_query|time)
+      COMPREPLY=( $( compgen -W "$flags" -- "$cur") )
+      return 0
+      ;;
+    *)
+      return 0
+      ;;
+    esac
+  fi
+
+  case $prev in
+  -gpu|-iterations|-version|-level|-stage)
+    return 0
+    ;;
+  -solver|-model)
+    _filedir $prototxts
+    return 0
+    ;;
+  -weights)
+    _filedir $caffemodels
+    return 0
+    ;;
+  -snapshot)
+    _filedir $solverstates
+    return 0
+    ;;
+  -sighup_effect|-sigint_effect)
+    COMPREPLY=( $( compgen -W 'snapshot stop none' -- "$cur") )
+    return 0
+    ;;
+  -phase)
+    COMPREPLY=( $( compgen -W 'TRAIN TEST' -- "$cur") )
+    return 0
+    ;;
+  *)
+    COMPREPLY=( $( compgen -W "$flags" -- "$cur") )
+    return 0
+    ;;
+  esac
+
+  # file completion on relevant files
+  _filedir "$caffefiles"
+
+  return 0
+}
+complete -F _caffe caffe
+
+# vim

From 6382d67da1d2b5d9ebe92df8a20a8ac1947366ea Mon Sep 17 00:00:00 2001
From: An Tran <tranlaman@gmail.com>
Date: Fri, 12 Aug 2016 16:39:11 +0800
Subject: [PATCH 105/264] small improments in compute_image_mean

---
 tools/compute_image_mean.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tools/compute_image_mean.cpp b/tools/compute_image_mean.cpp
index 2035d515195..417f5e4c622 100644
--- a/tools/compute_image_mean.cpp
+++ b/tools/compute_image_mean.cpp
@@ -22,9 +22,11 @@ DEFINE_string(backend, "lmdb",
         "The backend {leveldb, lmdb} containing the images");
 
 int main(int argc, char** argv) {
+#ifdef USE_OPENCV
   ::google::InitGoogleLogging(argv[0]);
+  // Print output to stderr (while still logging)
+  FLAGS_alsologtostderr = 1;
 
-#ifdef USE_OPENCV
 #ifndef GFLAGS_GFLAGS_H_
   namespace gflags = google;
 #endif
@@ -65,7 +67,7 @@ int main(int argc, char** argv) {
   for (int i = 0; i < size_in_datum; ++i) {
     sum_blob.add_data(0.);
   }
-  LOG(INFO) << "Starting Iteration";
+  LOG(INFO) << "Starting iteration";
   while (cursor->valid()) {
     Datum datum;
     datum.ParseFromString(cursor->value());
@@ -114,7 +116,7 @@ int main(int argc, char** argv) {
     for (int i = 0; i < dim; ++i) {
       mean_values[c] += sum_blob.data(dim * c + i);
     }
-    LOG(INFO) << "mean_value channel [" << c << "]:" << mean_values[c] / dim;
+    LOG(INFO) << "mean_value channel [" << c << "]: " << mean_values[c] / dim;
   }
 #else
   LOG(FATAL) << "This tool requires OpenCV; compile with USE_OPENCV.";

From d4a413cbf56f43a9d5a6ea3a5568447117cefff0 Mon Sep 17 00:00:00 2001
From: Sungjun HONG <imjune@yonsei.ac.kr>
Date: Sun, 14 Aug 2016 17:51:56 +0900
Subject: [PATCH 106/264] Correct a mistake on math notation

---
 examples/net_surgery.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/net_surgery.ipynb b/examples/net_surgery.ipynb
index d50d503bfe0..217c2d1a742 100644
--- a/examples/net_surgery.ipynb
+++ b/examples/net_surgery.ipynb
@@ -5479,7 +5479,7 @@
     "\n",
     "Let's take the standard Caffe Reference ImageNet model \"CaffeNet\" and transform it into a fully convolutional net for efficient, dense inference on large inputs. This model generates a classification map that covers a given input size instead of a single classification. In particular a 8 $\\times$ 8 classification map on a 451 $\\times$ 451 input gives 64x the output in only 3x the time. The computation exploits a natural efficiency of convolutional network (convnet) structure by amortizing the computation of overlapping receptive fields.\n",
     "\n",
-    "To do so we translate the `InnerProduct` matrix multiplication layers of CaffeNet into `Convolutional` layers. This is the only change: the other layer types are agnostic to spatial size. Convolution is translation-invariant, activations are elementwise operations, and so on. The `fc6` inner product when carried out as convolution by `fc6-conv` turns into a 6 \\times 6 filter with stride 1 on `pool5`. Back in image space this gives a classification for each 227 $\\times$ 227 box with stride 32 in pixels. Remember the equation for output map / receptive field size, output = (input - kernel_size) / stride + 1, and work out the indexing details for a clear understanding."
+    "To do so we translate the `InnerProduct` matrix multiplication layers of CaffeNet into `Convolutional` layers. This is the only change: the other layer types are agnostic to spatial size. Convolution is translation-invariant, activations are elementwise operations, and so on. The `fc6` inner product when carried out as convolution by `fc6-conv` turns into a 6 $\\times$ 6 filter with stride 1 on `pool5`. Back in image space this gives a classification for each 227 $\\times$ 227 box with stride 32 in pixels. Remember the equation for output map / receptive field size, output = (input - kernel_size) / stride + 1, and work out the indexing details for a clear understanding."
    ]
   },
   {

From 5d594806aed7d44feb36cae12bacbaabfabf6fa8 Mon Sep 17 00:00:00 2001
From: Nitish Keskar <keskar.nitish@gmail.com>
Date: Mon, 15 Aug 2016 19:47:34 -0500
Subject: [PATCH 107/264] Fixing Typo In Sigmoid CIFAR-10 Examples

There was a mismatch between the iterations interval in the comment and the actual code.
---
 examples/cifar10/cifar10_full_sigmoid_solver.prototxt    | 2 +-
 examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/cifar10/cifar10_full_sigmoid_solver.prototxt b/examples/cifar10/cifar10_full_sigmoid_solver.prototxt
index 7dd3ecb9d8e..a8e5539937d 100644
--- a/examples/cifar10/cifar10_full_sigmoid_solver.prototxt
+++ b/examples/cifar10/cifar10_full_sigmoid_solver.prototxt
@@ -17,7 +17,7 @@ momentum: 0.9
 lr_policy: "step"
 gamma: 1
 stepsize: 5000
-# Display every 200 iterations
+# Display every 100 iterations
 display: 100
 # The maximum number of iterations
 max_iter: 60000
diff --git a/examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt b/examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt
index a57b280fd1e..a4dabd67ca0 100644
--- a/examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt
+++ b/examples/cifar10/cifar10_full_sigmoid_solver_bn.prototxt
@@ -17,7 +17,7 @@ momentum: 0.9
 lr_policy: "step"
 gamma: 1
 stepsize: 5000
-# Display every 200 iterations
+# Display every 100 iterations
 display: 100
 # The maximum number of iterations
 max_iter: 60000

From 9029695ee358caa82116fc192cb4d505ea936274 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Thu, 18 Aug 2016 11:03:42 -0700
Subject: [PATCH 108/264] [build] set default BLAS include for OS X 10.11

the latest hunt for the ever-elusive vecLib/Accelerate
---
 Makefile | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 403e00a38a1..24894062a6c 100644
--- a/Makefile
+++ b/Makefile
@@ -382,8 +382,11 @@ else
 		LIBRARIES += cblas
 		# 10.10 has accelerate while 10.9 has veclib
 		XCODE_CLT_VER := $(shell pkgutil --pkg-info=com.apple.pkg.CLTools_Executables | grep 'version' | sed 's/[^0-9]*\([0-9]\).*/\1/')
+		XCODE_CLT_GEQ_7 := $(shell [ $(XCODE_CLT_VER) -gt 6 ] && echo 1)
 		XCODE_CLT_GEQ_6 := $(shell [ $(XCODE_CLT_VER) -gt 5 ] && echo 1)
-		ifeq ($(XCODE_CLT_GEQ_6), 1)
+		ifeq ($(XCODE_CLT_GEQ_7), 1)
+			BLAS_INCLUDE ?= /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.11.sdk/System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/Headers
+		else ifeq ($(XCODE_CLT_GEQ_6), 1)
 			BLAS_INCLUDE ?= /System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers/
 			LDFLAGS += -framework Accelerate
 		else

From 1110d2ba7b52c35f898da8febdd53524761ecb97 Mon Sep 17 00:00:00 2001
From: Tianwei Shen <shentianweipku@gmail.com>
Date: Tue, 26 Jul 2016 00:19:35 +0800
Subject: [PATCH 109/264] make cmake find cuDNN on Mac OS

dylib instead of so on OS X
---
 cmake/Cuda.cmake | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/cmake/Cuda.cmake b/cmake/Cuda.cmake
index 286a42802b4..eeeb7325ffd 100644
--- a/cmake/Cuda.cmake
+++ b/cmake/Cuda.cmake
@@ -174,11 +174,18 @@ function(detect_cuDNN)
             PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} ${CUDA_TOOLKIT_INCLUDE}
             DOC "Path to cuDNN include directory." )
 
-  get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH)
-  find_library(CUDNN_LIBRARY NAMES libcudnn.so # libcudnn_static.a
-                             PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} ${CUDNN_INCLUDE} ${__libpath_hist}
-                             DOC "Path to cuDNN library.")
+  # dynamic libs have different suffix in mac and linux
+  if(APPLE)
+    set(CUDNN_LIB_NAME "libcudnn.dylib")
+  else()
+    set(CUDNN_LIB_NAME "libcudnn.so")
+  endif()
 
+  get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH)
+  find_library(CUDNN_LIBRARY NAMES ${CUDNN_LIB_NAME}
+   PATHS ${CUDNN_ROOT} $ENV{CUDNN_ROOT} ${CUDNN_INCLUDE} ${__libpath_hist} ${__libpath_hist}/../lib
+   DOC "Path to cuDNN library.")
+  
   if(CUDNN_INCLUDE AND CUDNN_LIBRARY)
     set(HAVE_CUDNN  TRUE PARENT_SCOPE)
     set(CUDNN_FOUND TRUE PARENT_SCOPE)

From 51c39b87738962c323c8bd05aa4c23ac97e1c030 Mon Sep 17 00:00:00 2001
From: Preston Parry <ClimbsBytes@gmail.com>
Date: Sun, 28 Aug 2016 14:32:41 -0700
Subject: [PATCH 110/264] updates tense in docs

"could" seems to imply for some reason that something is blocking one from calling the registered layers. "can" lays out more directly that a user can choose to do this.
---
 include/caffe/layer_factory.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/caffe/layer_factory.hpp b/include/caffe/layer_factory.hpp
index f385afccfee..2369c132911 100644
--- a/include/caffe/layer_factory.hpp
+++ b/include/caffe/layer_factory.hpp
@@ -1,6 +1,6 @@
 /**
  * @brief A layer factory that allows one to register layers.
- * During runtime, registered layers could be called by passing a LayerParameter
+ * During runtime, registered layers can be called by passing a LayerParameter
  * protobuffer to the CreateLayer function:
  *
  *     LayerRegistry<Dtype>::CreateLayer(param);

From 8797e7b3720d97afea24ad6f78b7811c57a3919d Mon Sep 17 00:00:00 2001
From: Preston Parry <ClimbsBytes@gmail.com>
Date: Sun, 28 Aug 2016 14:34:42 -0700
Subject: [PATCH 111/264] fixes typo- duplicate "a a"

---
 include/caffe/solver.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp
index 38259edad9f..eafcee32904 100644
--- a/include/caffe/solver.hpp
+++ b/include/caffe/solver.hpp
@@ -12,7 +12,7 @@ namespace caffe {
 /**
   * @brief Enumeration of actions that a client of the Solver may request by
   * implementing the Solver's action request function, which a
-  * a client may optionally provide in order to request early termination
+  * client may optionally provide in order to request early termination
   * or saving a snapshot without exiting. In the executable caffe, this
   * mechanism is used to allow the snapshot to be saved when stopping
   * execution with a SIGINT (Ctrl-C).

From cd54d9e0f96df65a4972306f29d042bc34c63077 Mon Sep 17 00:00:00 2001
From: Preston Parry <ClimbsBytes@gmail.com>
Date: Sun, 28 Aug 2016 14:42:57 -0700
Subject: [PATCH 112/264] changes "c++" to "C++" for consistency

---
 include/caffe/solver_factory.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/caffe/solver_factory.hpp b/include/caffe/solver_factory.hpp
index cfff721af40..a5b160739b2 100644
--- a/include/caffe/solver_factory.hpp
+++ b/include/caffe/solver_factory.hpp
@@ -15,7 +15,7 @@
  * and its type is its C++ class name, but without the "Solver" at the end
  * ("MyAwesomeSolver" -> "MyAwesome").
  *
- * If the solver is going to be created simply by its constructor, in your c++
+ * If the solver is going to be created simply by its constructor, in your C++
  * file, add the following line:
  *
  *    REGISTER_SOLVER_CLASS(MyAwesome);

From 4024b82c7c8e9f12898becf7b3947e603a4dd0bb Mon Sep 17 00:00:00 2001
From: Luke Yeager <lukeyeager@users.noreply.github.com>
Date: Mon, 29 Aug 2016 11:14:17 -0700
Subject: [PATCH 113/264] [TravisCI] - build protobuf3 GA

---
 scripts/travis/install-deps.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/travis/install-deps.sh b/scripts/travis/install-deps.sh
index ee16d36a7fc..4e86ac73915 100755
--- a/scripts/travis/install-deps.sh
+++ b/scripts/travis/install-deps.sh
@@ -56,7 +56,7 @@ else
       dh-autoreconf \
       unzip
 
-    wget https://github.com/google/protobuf/archive/v3.0.0-beta-3.tar.gz -O protobuf3.tar.gz
+    wget https://github.com/google/protobuf/archive/3.0.0-GA.tar.gz -O protobuf3.tar.gz
     tar -xzf protobuf3.tar.gz -C $PROTOBUF3_DIR --strip 1
     rm protobuf3.tar.gz
     cd $PROTOBUF3_DIR

From b9c3c06c28dafce67c89603e8b73cf18057264eb Mon Sep 17 00:00:00 2001
From: Ivan Shapovalov <intelfx@intelfx.name>
Date: Sun, 14 Aug 2016 04:52:25 +0300
Subject: [PATCH 114/264] cmake: fix usage of INCLUDE_DIR/INCLUDE_DIRS in
 Dependencies.cmake

---
 cmake/Dependencies.cmake | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index ae9ce8e436d..bf882ce96ac 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -3,7 +3,7 @@ set(Caffe_LINKER_LIBS "")
 
 # ---[ Boost
 find_package(Boost 1.46 REQUIRED COMPONENTS system thread filesystem)
-include_directories(SYSTEM ${Boost_INCLUDE_DIR})
+include_directories(SYSTEM ${Boost_INCLUDE_DIRS})
 list(APPEND Caffe_LINKER_LIBS ${Boost_LIBRARIES})
 
 # ---[ Threads
@@ -25,7 +25,7 @@ include(cmake/ProtoBuf.cmake)
 
 # ---[ HDF5
 find_package(HDF5 COMPONENTS HL REQUIRED)
-include_directories(SYSTEM ${HDF5_INCLUDE_DIRS} ${HDF5_HL_INCLUDE_DIR})
+include_directories(SYSTEM ${HDF5_INCLUDE_DIRS})
 list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES} ${HDF5_HL_LIBRARIES})
 
 # ---[ LMDB
@@ -42,7 +42,7 @@ endif()
 # ---[ LevelDB
 if(USE_LEVELDB)
   find_package(LevelDB REQUIRED)
-  include_directories(SYSTEM ${LevelDB_INCLUDE})
+  include_directories(SYSTEM ${LevelDB_INCLUDES})
   list(APPEND Caffe_LINKER_LIBS ${LevelDB_LIBRARIES})
   add_definitions(-DUSE_LEVELDB)
 endif()

From a59e647117705236d8bcef46cc6d4e0c72b42804 Mon Sep 17 00:00:00 2001
From: Ivan Shapovalov <intelfx@intelfx.name>
Date: Mon, 15 Aug 2016 20:19:09 +0300
Subject: [PATCH 115/264] cmake/Templates: properly spell OpenCV CMake config
 file name

---
 cmake/Templates/CaffeConfig.cmake.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/Templates/CaffeConfig.cmake.in b/cmake/Templates/CaffeConfig.cmake.in
index 73f57ac2d74..b58124aa343 100644
--- a/cmake/Templates/CaffeConfig.cmake.in
+++ b/cmake/Templates/CaffeConfig.cmake.in
@@ -27,7 +27,7 @@ if(@USE_OPENCV@)
 
       if(EXISTS ${Caffe_OpenCV_CONFIG_PATH} AND NOT TARGET opencv_core)
         message(STATUS "Caffe: using OpenCV config from ${Caffe_OpenCV_CONFIG_PATH}")
-        include(${Caffe_OpenCV_CONFIG_PATH}/OpenCVModules.cmake)
+	include(${Caffe_OpenCV_CONFIG_PATH}/OpenCVConfig.cmake)
       endif()
 
     else()

From ba189d907d60b17cc24b54d1a22cb68ce6c11193 Mon Sep 17 00:00:00 2001
From: Ivan Shapovalov <intelfx@intelfx.name>
Date: Sat, 20 Aug 2016 00:59:05 +0300
Subject: [PATCH 116/264] cmake: refactor deps detection, specify all
 dependencies in the exported caffe target

This is the first step towards "modern" IMPORTED-targets-only CMake
setup. The find_package modules still need to be rewritten and upstreamed in
form of config exports where possible.
---
 CMakeLists.txt                       | 24 +++++++--
 cmake/ConfigGen.cmake                | 65 +---------------------
 cmake/Cuda.cmake                     | 12 ++---
 cmake/Dependencies.cmake             | 81 +++++++++++++++-------------
 cmake/ProtoBuf.cmake                 |  4 +-
 cmake/Templates/CaffeConfig.cmake.in | 13 ++---
 python/CMakeLists.txt                |  6 +--
 src/caffe/CMakeLists.txt             | 13 +++--
 src/gtest/CMakeLists.txt             |  3 ++
 9 files changed, 94 insertions(+), 127 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index da7142c9b3c..cb25b43a458 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -54,8 +54,6 @@ if(USE_libstdcpp)
   message("-- Warning: forcing libstdc++ (controlled by USE_libstdcpp option in cmake)")
 endif()
 
-add_definitions(-DGTEST_USE_OWN_TR1_TUPLE)
-
 # ---[ Warnings
 caffe_warnings_disable(CMAKE_CXX_FLAGS -Wno-sign-compare -Wno-uninitialized)
 
@@ -64,8 +62,26 @@ configure_file(cmake/Templates/caffe_config.h.in "${PROJECT_BINARY_DIR}/caffe_co
 
 # ---[ Includes
 set(Caffe_INCLUDE_DIR ${PROJECT_SOURCE_DIR}/include)
-include_directories(${Caffe_INCLUDE_DIR} ${PROJECT_BINARY_DIR})
-include_directories(BEFORE src) # This is needed for gtest.
+set(Caffe_SRC_DIR ${PROJECT_SOURCE_DIR}/src)
+include_directories(${PROJECT_BINARY_DIR})
+
+# ---[ Includes & defines for CUDA
+
+# cuda_compile() does not have per-call dependencies or include pathes
+# (cuda_compile() has per-call flags, but we set them here too for clarity)
+#
+# list(REMOVE_ITEM ...) invocations remove PRIVATE and PUBLIC keywords from collected definitions and include pathes
+if(HAVE_CUDA)
+  # pass include pathes to cuda_include_directories()
+  set(Caffe_ALL_INCLUDE_DIRS ${Caffe_INCLUDE_DIRS})
+  list(REMOVE_ITEM Caffe_ALL_INCLUDE_DIRS PRIVATE PUBLIC)
+  cuda_include_directories(${Caffe_INCLUDE_DIR} ${Caffe_SRC_DIR} ${Caffe_ALL_INCLUDE_DIRS})
+
+  # add definitions to nvcc flags directly
+  set(Caffe_ALL_DEFINITIONS ${Caffe_DEFINITIONS})
+  list(REMOVE_ITEM Caffe_ALL_DEFINITIONS PRIVATE PUBLIC)
+  list(APPEND CUDA_NVCC_FLAGS ${Caffe_ALL_DEFINITIONS})
+endif()
 
 # ---[ Subdirectories
 add_subdirectory(src/gtest)
diff --git a/cmake/ConfigGen.cmake b/cmake/ConfigGen.cmake
index 056371110b5..077d5b283d1 100644
--- a/cmake/ConfigGen.cmake
+++ b/cmake/ConfigGen.cmake
@@ -1,31 +1,4 @@
 
-################################################################################################
-# Helper function to fetch caffe includes which will be passed to dependent projects
-# Usage:
-#   caffe_get_current_includes(<includes_list_variable>)
-function(caffe_get_current_includes includes_variable)
-  get_property(current_includes DIRECTORY PROPERTY INCLUDE_DIRECTORIES)
-  caffe_convert_absolute_paths(current_includes)
-
-  # remove at most one ${PROJECT_BINARY_DIR} include added for caffe_config.h
-  list(FIND current_includes ${PROJECT_BINARY_DIR} __index)
-  list(REMOVE_AT current_includes ${__index})
-
-  # removing numpy includes (since not required for client libs)
-  set(__toremove "")
-  foreach(__i ${current_includes})
-    if(${__i} MATCHES "python")
-      list(APPEND __toremove ${__i})
-    endif()
-  endforeach()
-  if(__toremove)
-    list(REMOVE_ITEM current_includes ${__toremove})
-  endif()
-
-  caffe_list_unique(current_includes)
-  set(${includes_variable} ${current_includes} PARENT_SCOPE)
-endfunction()
-
 ################################################################################################
 # Helper function to get all list items that begin with given prefix
 # Usage:
@@ -47,39 +20,15 @@ endfunction()
 function(caffe_generate_export_configs)
   set(install_cmake_suffix "share/Caffe")
 
-  # ---[ Configure build-tree CaffeConfig.cmake file ]---
-  caffe_get_current_includes(Caffe_INCLUDE_DIRS)
-
-  set(Caffe_DEFINITIONS "")
   if(NOT HAVE_CUDA)
     set(HAVE_CUDA FALSE)
-    list(APPEND Caffe_DEFINITIONS -DCPU_ONLY)
-  endif()
-
-  if(USE_OPENCV)
-    list(APPEND Caffe_DEFINITIONS -DUSE_OPENCV)
-  endif()
-
-  if(USE_LMDB)
-    list(APPEND Caffe_DEFINITIONS -DUSE_LMDB)
-    if (ALLOW_LMDB_NOLOCK)
-        list(APPEND Caffe_DEFINITIONS -DALLOW_LMDB_NOLOCK)
-    endif()
-  endif()
-
-  if(USE_LEVELDB)
-    list(APPEND Caffe_DEFINITIONS -DUSE_LEVELDB)
   endif()
 
   if(NOT HAVE_CUDNN)
     set(HAVE_CUDNN FALSE)
-  else()
-    list(APPEND DEFINITIONS -DUSE_CUDNN)
   endif()
 
-  if(BLAS STREQUAL "MKL" OR BLAS STREQUAL "mkl")
-    list(APPEND Caffe_DEFINITIONS -DUSE_MKL)
-  endif()
+  # ---[ Configure build-tree CaffeConfig.cmake file ]---
 
   configure_file("cmake/Templates/CaffeConfig.cmake.in" "${PROJECT_BINARY_DIR}/CaffeConfig.cmake" @ONLY)
 
@@ -89,18 +38,6 @@ function(caffe_generate_export_configs)
 
   # ---[ Configure install-tree CaffeConfig.cmake file ]---
 
-  # remove source and build dir includes
-  caffe_get_items_with_prefix(${PROJECT_SOURCE_DIR} Caffe_INCLUDE_DIRS __insource)
-  caffe_get_items_with_prefix(${PROJECT_BINARY_DIR} Caffe_INCLUDE_DIRS __inbinary)
-  list(REMOVE_ITEM Caffe_INCLUDE_DIRS ${__insource} ${__inbinary})
-
-  # add `install` include folder
-  set(lines
-     "get_filename_component(__caffe_include \"\${Caffe_CMAKE_DIR}/../../include\" ABSOLUTE)\n"
-     "list(APPEND Caffe_INCLUDE_DIRS \${__caffe_include})\n"
-     "unset(__caffe_include)\n")
-  string(REPLACE ";" "" Caffe_INSTALL_INCLUDE_DIR_APPEND_COMMAND ${lines})
-
   configure_file("cmake/Templates/CaffeConfig.cmake.in" "${PROJECT_BINARY_DIR}/cmake/CaffeConfig.cmake" @ONLY)
 
   # Install the CaffeConfig.cmake and export set to use with install-tree
diff --git a/cmake/Cuda.cmake b/cmake/Cuda.cmake
index eeeb7325ffd..c6b0de8c759 100644
--- a/cmake/Cuda.cmake
+++ b/cmake/Cuda.cmake
@@ -238,17 +238,17 @@ endif()
 
 set(HAVE_CUDA TRUE)
 message(STATUS "CUDA detected: " ${CUDA_VERSION})
-include_directories(SYSTEM ${CUDA_INCLUDE_DIRS})
-list(APPEND Caffe_LINKER_LIBS ${CUDA_CUDART_LIBRARY}
-                              ${CUDA_curand_LIBRARY} ${CUDA_CUBLAS_LIBRARIES})
+list(APPEND Caffe_INCLUDE_DIRS PUBLIC ${CUDA_INCLUDE_DIRS})
+list(APPEND Caffe_LINKER_LIBS PUBLIC ${CUDA_CUDART_LIBRARY}
+                                     ${CUDA_curand_LIBRARY} ${CUDA_CUBLAS_LIBRARIES})
 
 # cudnn detection
 if(USE_CUDNN)
   detect_cuDNN()
   if(HAVE_CUDNN)
-    add_definitions(-DUSE_CUDNN)
-    include_directories(SYSTEM ${CUDNN_INCLUDE})
-    list(APPEND Caffe_LINKER_LIBS ${CUDNN_LIBRARY})
+    list(APPEND Caffe_DEFINITIONS PUBLIC -DUSE_CUDNN)
+    list(APPEND Caffe_INCLUDE_DIRS PUBLIC ${CUDNN_INCLUDE})
+    list(APPEND Caffe_LINKER_LIBS PUBLIC ${CUDNN_LIBRARY})
   endif()
 endif()
 
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index bf882ce96ac..6a12759234f 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -1,57 +1,67 @@
 # This list is required for static linking and exported to CaffeConfig.cmake
 set(Caffe_LINKER_LIBS "")
+set(Caffe_INCLUDE_DIRS "")
+set(Caffe_DEFINITIONS "")
 
 # ---[ Boost
 find_package(Boost 1.46 REQUIRED COMPONENTS system thread filesystem)
-include_directories(SYSTEM ${Boost_INCLUDE_DIRS})
-list(APPEND Caffe_LINKER_LIBS ${Boost_LIBRARIES})
+list(APPEND Caffe_INCLUDE_DIRS PUBLIC ${Boost_INCLUDE_DIRS})
+list(APPEND Caffe_LINKER_LIBS PUBLIC ${Boost_LIBRARIES})
 
 # ---[ Threads
 find_package(Threads REQUIRED)
-list(APPEND Caffe_LINKER_LIBS ${CMAKE_THREAD_LIBS_INIT})
+list(APPEND Caffe_LINKER_LIBS PRIVATE ${CMAKE_THREAD_LIBS_INIT})
+
+# ---[ OpenMP
+if(USE_OPENMP)
+  # TODO: use something exportable here
+  find_package(OpenMP REQUIRED)
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+endif()
 
 # ---[ Google-glog
 include("cmake/External/glog.cmake")
-include_directories(SYSTEM ${GLOG_INCLUDE_DIRS})
-list(APPEND Caffe_LINKER_LIBS ${GLOG_LIBRARIES})
+list(APPEND Caffe_INCLUDE_DIRS PUBLIC ${GLOG_INCLUDE_DIRS})
+list(APPEND Caffe_LINKER_LIBS PUBLIC ${GLOG_LIBRARIES})
 
 # ---[ Google-gflags
 include("cmake/External/gflags.cmake")
-include_directories(SYSTEM ${GFLAGS_INCLUDE_DIRS})
-list(APPEND Caffe_LINKER_LIBS ${GFLAGS_LIBRARIES})
+list(APPEND Caffe_INCLUDE_DIRS PUBLIC ${GFLAGS_INCLUDE_DIRS})
+list(APPEND Caffe_LINKER_LIBS PUBLIC ${GFLAGS_LIBRARIES})
 
 # ---[ Google-protobuf
 include(cmake/ProtoBuf.cmake)
 
 # ---[ HDF5
 find_package(HDF5 COMPONENTS HL REQUIRED)
-include_directories(SYSTEM ${HDF5_INCLUDE_DIRS})
-list(APPEND Caffe_LINKER_LIBS ${HDF5_LIBRARIES} ${HDF5_HL_LIBRARIES})
+list(APPEND Caffe_INCLUDE_DIRS PUBLIC ${HDF5_INCLUDE_DIRS})
+list(APPEND Caffe_LINKER_LIBS PUBLIC ${HDF5_LIBRARIES} ${HDF5_HL_LIBRARIES})
 
 # ---[ LMDB
 if(USE_LMDB)
   find_package(LMDB REQUIRED)
-  include_directories(SYSTEM ${LMDB_INCLUDE_DIR})
-  list(APPEND Caffe_LINKER_LIBS ${LMDB_LIBRARIES})
-  add_definitions(-DUSE_LMDB)
+  list(APPEND Caffe_INCLUDE_DIRS PUBLIC ${LMDB_INCLUDE_DIR})
+  list(APPEND Caffe_LINKER_LIBS PUBLIC ${LMDB_LIBRARIES})
+  list(APPEND Caffe_DEFINITIONS PUBLIC -DUSE_LMDB)
   if(ALLOW_LMDB_NOLOCK)
-    add_definitions(-DALLOW_LMDB_NOLOCK)
+    list(APPEND Caffe_DEFINITIONS PRIVATE -DALLOW_LMDB_NOLOCK)
   endif()
 endif()
 
 # ---[ LevelDB
 if(USE_LEVELDB)
   find_package(LevelDB REQUIRED)
-  include_directories(SYSTEM ${LevelDB_INCLUDES})
-  list(APPEND Caffe_LINKER_LIBS ${LevelDB_LIBRARIES})
-  add_definitions(-DUSE_LEVELDB)
+  list(APPEND Caffe_INCLUDE_DIRS PUBLIC ${LevelDB_INCLUDES})
+  list(APPEND Caffe_LINKER_LIBS PUBLIC ${LevelDB_LIBRARIES})
+  list(APPEND Caffe_DEFINITIONS PUBLIC -DUSE_LEVELDB)
 endif()
 
 # ---[ Snappy
 if(USE_LEVELDB)
   find_package(Snappy REQUIRED)
-  include_directories(SYSTEM ${Snappy_INCLUDE_DIR})
-  list(APPEND Caffe_LINKER_LIBS ${Snappy_LIBRARIES})
+  list(APPEND Caffe_INCLUDE_DIRS PRIVATE ${Snappy_INCLUDE_DIR})
+  list(APPEND Caffe_LINKER_LIBS PRIVATE ${Snappy_LIBRARIES})
 endif()
 
 # ---[ CUDA
@@ -63,8 +73,7 @@ if(NOT HAVE_CUDA)
     message(WARNING "-- CUDA is not detected by cmake. Building without it...")
   endif()
 
-  # TODO: remove this not cross platform define in future. Use caffe_config.h instead.
-  add_definitions(-DCPU_ONLY)
+  list(APPEND Caffe_DEFINITIONS PUBLIC -DCPU_ONLY)
 endif()
 
 # ---[ OpenCV
@@ -73,10 +82,10 @@ if(USE_OPENCV)
   if(NOT OpenCV_FOUND) # if not OpenCV 3.x, then imgcodecs are not found
     find_package(OpenCV REQUIRED COMPONENTS core highgui imgproc)
   endif()
-  include_directories(SYSTEM ${OpenCV_INCLUDE_DIRS})
-  list(APPEND Caffe_LINKER_LIBS ${OpenCV_LIBS})
+  list(APPEND Caffe_INCLUDE_DIRS PUBLIC ${OpenCV_INCLUDE_DIRS})
+  list(APPEND Caffe_LINKER_LIBS PUBLIC ${OpenCV_LIBS})
   message(STATUS "OpenCV found (${OpenCV_CONFIG_PATH})")
-  add_definitions(-DUSE_OPENCV)
+  list(APPEND Caffe_DEFINITIONS PUBLIC -DUSE_OPENCV)
 endif()
 
 # ---[ BLAS
@@ -86,26 +95,26 @@ if(NOT APPLE)
 
   if(BLAS STREQUAL "Atlas" OR BLAS STREQUAL "atlas")
     find_package(Atlas REQUIRED)
-    include_directories(SYSTEM ${Atlas_INCLUDE_DIR})
-    list(APPEND Caffe_LINKER_LIBS ${Atlas_LIBRARIES})
+    list(APPEND Caffe_INCLUDE_DIRS PUBLIC ${Atlas_INCLUDE_DIR})
+    list(APPEND Caffe_LINKER_LIBS PUBLIC ${Atlas_LIBRARIES})
   elseif(BLAS STREQUAL "Open" OR BLAS STREQUAL "open")
     find_package(OpenBLAS REQUIRED)
-    include_directories(SYSTEM ${OpenBLAS_INCLUDE_DIR})
-    list(APPEND Caffe_LINKER_LIBS ${OpenBLAS_LIB})
+    list(APPEND Caffe_INCLUDE_DIRS PUBLIC ${OpenBLAS_INCLUDE_DIR})
+    list(APPEND Caffe_LINKER_LIBS PUBLIC ${OpenBLAS_LIB})
   elseif(BLAS STREQUAL "MKL" OR BLAS STREQUAL "mkl")
     find_package(MKL REQUIRED)
-    include_directories(SYSTEM ${MKL_INCLUDE_DIR})
-    list(APPEND Caffe_LINKER_LIBS ${MKL_LIBRARIES})
-    add_definitions(-DUSE_MKL)
+    list(APPEND Caffe_INCLUDE_DIRS PUBLIC ${MKL_INCLUDE_DIR})
+    list(APPEND Caffe_LINKER_LIBS PUBLIC ${MKL_LIBRARIES})
+    list(APPEND Caffe_DEFINITIONS PUBLIC -DUSE_MKL)
   endif()
 elseif(APPLE)
   find_package(vecLib REQUIRED)
-  include_directories(SYSTEM ${vecLib_INCLUDE_DIR})
-  list(APPEND Caffe_LINKER_LIBS ${vecLib_LINKER_LIBS})
+  list(APPEND Caffe_INCLUDE_DIRS PUBLIC ${vecLib_INCLUDE_DIR})
+  list(APPEND Caffe_LINKER_LIBS PUBLIC ${vecLib_LINKER_LIBS})
 
   if(VECLIB_FOUND)
     if(NOT vecLib_INCLUDE_DIR MATCHES "^/System/Library/Frameworks/vecLib.framework.*")
-      add_definitions(-DUSE_ACCELERATE)
+      list(APPEND Caffe_DEFINITIONS PUBLIC -DUSE_ACCELERATE)
     endif()
   endif()
 endif()
@@ -149,9 +158,9 @@ if(BUILD_python)
   if(PYTHONLIBS_FOUND AND NUMPY_FOUND AND Boost_PYTHON_FOUND)
     set(HAVE_PYTHON TRUE)
     if(BUILD_python_layer)
-      add_definitions(-DWITH_PYTHON_LAYER)
-      include_directories(SYSTEM ${PYTHON_INCLUDE_DIRS} ${NUMPY_INCLUDE_DIR} ${Boost_INCLUDE_DIRS})
-      list(APPEND Caffe_LINKER_LIBS ${PYTHON_LIBRARIES} ${Boost_LIBRARIES})
+      list(APPEND Caffe_DEFINITIONS PRIVATE -DWITH_PYTHON_LAYER)
+      list(APPEND Caffe_INCLUDE_DIRS PRIVATE ${PYTHON_INCLUDE_DIRS} ${NUMPY_INCLUDE_DIR} PUBLIC ${Boost_INCLUDE_DIRS})
+      list(APPEND Caffe_LINKER_LIBS PRIVATE ${PYTHON_LIBRARIES} PUBLIC ${Boost_LIBRARIES})
     endif()
   endif()
 endif()
diff --git a/cmake/ProtoBuf.cmake b/cmake/ProtoBuf.cmake
index 73f647f5fae..8005b448707 100644
--- a/cmake/ProtoBuf.cmake
+++ b/cmake/ProtoBuf.cmake
@@ -2,8 +2,8 @@
 # the standard cmake script with version and python generation support
 
 find_package( Protobuf REQUIRED )
-include_directories(SYSTEM ${PROTOBUF_INCLUDE_DIR})
-list(APPEND Caffe_LINKER_LIBS ${PROTOBUF_LIBRARIES})
+list(APPEND Caffe_INCLUDE_DIRS PUBLIC ${PROTOBUF_INCLUDE_DIR})
+list(APPEND Caffe_LINKER_LIBS PUBLIC ${PROTOBUF_LIBRARIES})
 
 # As of Ubuntu 14.04 protoc is no longer a part of libprotobuf-dev package
 # and should be installed separately as in: sudo apt-get install protobuf-compiler
diff --git a/cmake/Templates/CaffeConfig.cmake.in b/cmake/Templates/CaffeConfig.cmake.in
index b58124aa343..77c4059e560 100644
--- a/cmake/Templates/CaffeConfig.cmake.in
+++ b/cmake/Templates/CaffeConfig.cmake.in
@@ -9,9 +9,9 @@
 # After successful configuration the following variables
 # will be defined:
 #
-#   Caffe_INCLUDE_DIRS - Caffe include directories
-#   Caffe_LIBRARIES    - libraries to link against
-#   Caffe_DEFINITIONS  - a list of definitions to pass to compiler
+#   Caffe_LIBRARIES    - IMPORTED targets to link against
+#                        (There is no Caffe_INCLUDE_DIRS and Caffe_DEFINITIONS
+#                         because they are specified in the IMPORTED target interface.)
 #
 #   Caffe_HAVE_CUDA    - signals about CUDA support
 #   Caffe_HAVE_CUDNN   - signals about cuDNN support
@@ -39,9 +39,6 @@ endif()
 
 # Compute paths
 get_filename_component(Caffe_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
-set(Caffe_INCLUDE_DIRS "@Caffe_INCLUDE_DIRS@")
-
-@Caffe_INSTALL_INCLUDE_DIR_APPEND_COMMAND@
 
 # Our library dependencies
 if(NOT TARGET caffe AND NOT caffe_BINARY_DIR)
@@ -49,11 +46,9 @@ if(NOT TARGET caffe AND NOT caffe_BINARY_DIR)
 endif()
 
 # List of IMPORTED libs created by CaffeTargets.cmake
+# These targets already specify all needed definitions and include pathes
 set(Caffe_LIBRARIES caffe)
 
-# Definitions
-set(Caffe_DEFINITIONS "@Caffe_DEFINITIONS@")
-
 # Cuda support variables
 set(Caffe_CPU_ONLY @CPU_ONLY@)
 set(Caffe_HAVE_CUDA @HAVE_CUDA@)
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index bf492a24b1c..c53299d265b 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -3,13 +3,13 @@ if(NOT HAVE_PYTHON)
   return()
 endif()
 
-include_directories(${PYTHON_INCLUDE_DIRS} ${NUMPY_INCLUDE_DIR} ${Boost_INCLUDE_DIRS})
 file(GLOB_RECURSE python_srcs ${PROJECT_SOURCE_DIR}/python/*.cpp)
 
 add_library(pycaffe SHARED ${python_srcs})
-target_link_libraries(pycaffe ${Caffe_LINK} ${PYTHON_LIBRARIES} ${Boost_LIBRARIES})
-set_target_properties(pycaffe PROPERTIES PREFIX "" OUTPUT_NAME "_caffe")
 caffe_default_properties(pycaffe)
+set_target_properties(pycaffe PROPERTIES PREFIX "" OUTPUT_NAME "_caffe")
+target_include_directories(pycaffe PUBLIC ${PYTHON_INCLUDE_DIRS} ${NUMPY_INCLUDE_DIR})
+target_link_libraries(pycaffe PUBLIC ${Caffe_LINK} ${PYTHON_LIBRARIES})
 
 if(UNIX OR APPLE)
     set(__linkname "${PROJECT_SOURCE_DIR}/python/caffe/_caffe.so")
diff --git a/src/caffe/CMakeLists.txt b/src/caffe/CMakeLists.txt
index 8a80c940488..ed4d50bed5a 100644
--- a/src/caffe/CMakeLists.txt
+++ b/src/caffe/CMakeLists.txt
@@ -4,8 +4,11 @@ caffe_protobuf_generate_cpp_py(${proto_gen_folder} proto_srcs proto_hdrs proto_p
 
 # include python files either to force generation
 add_library(proto STATIC ${proto_hdrs} ${proto_srcs} ${proto_python})
-set(Caffe_LINKER_LIBS proto ${Caffe_LINKER_LIBS}) # note, crucial to prepend!
 caffe_default_properties(proto)
+target_link_libraries(proto PUBLIC ${PROTOBUF_LIBRARIES})
+target_include_directories(proto PUBLIC ${PROTOBUF_INCLUDE_DIR})
+
+list(INSERT Caffe_LINKER_LIBS 0 PUBLIC proto) # note, crucial to prepend!
 
 # --[ Caffe library
 
@@ -18,8 +21,13 @@ if(HAVE_CUDA)
 endif()
 
 add_library(caffe ${srcs})
-target_link_libraries(caffe proto ${Caffe_LINKER_LIBS})
 caffe_default_properties(caffe)
+target_link_libraries(caffe ${Caffe_LINKER_LIBS})
+target_include_directories(caffe ${Caffe_INCLUDE_DIRS}
+                                 PUBLIC
+                                 $<BUILD_INTERFACE:${Caffe_INCLUDE_DIR}>
+                                 $<INSTALL_INTERFACE:include>)
+target_compile_definitions(caffe ${Caffe_DEFINITIONS})
 set_target_properties(caffe PROPERTIES
     VERSION   ${CAFFE_TARGET_VERSION}
     SOVERSION ${CAFFE_TARGET_SOVERSION}
@@ -37,4 +45,3 @@ file(WRITE ${PROJECT_BINARY_DIR}/__init__.py)
 list(APPEND proto_python ${PROJECT_BINARY_DIR}/__init__.py)
 install(PROGRAMS ${proto_python} DESTINATION python/caffe/proto)
 
-
diff --git a/src/gtest/CMakeLists.txt b/src/gtest/CMakeLists.txt
index ef7ff7ed14b..e98254af130 100644
--- a/src/gtest/CMakeLists.txt
+++ b/src/gtest/CMakeLists.txt
@@ -1,5 +1,8 @@
 add_library(gtest STATIC EXCLUDE_FROM_ALL gtest.h gtest-all.cpp)
 caffe_default_properties(gtest)
+target_include_directories(gtest PUBLIC ${Caffe_SRC_DIR})
+target_compile_definitions(gtest PUBLIC -DGTEST_USE_OWN_TR1_TUPLE)
+
 
 #add_library(gtest_main gtest_main.cc)
 #target_link_libraries(gtest_main gtest)

From 6200b915601e1f7b2ec6d4746dc143114722ec38 Mon Sep 17 00:00:00 2001
From: Ivan Shapovalov <intelfx@intelfx.name>
Date: Sat, 20 Aug 2016 01:08:26 +0300
Subject: [PATCH 117/264] net.cpp: do not include test/test_caffe_main.hpp

---
 src/caffe/net.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index 644cb7e97ee..a3408734c12 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -17,8 +17,6 @@
 #include "caffe/util/math_functions.hpp"
 #include "caffe/util/upgrade_proto.hpp"
 
-#include "caffe/test/test_caffe_main.hpp"
-
 namespace caffe {
 
 template <typename Dtype>

From f1b9da54598923c531e1a98c4f1546169165e441 Mon Sep 17 00:00:00 2001
From: Ivan Shapovalov <intelfx@intelfx.name>
Date: Sun, 14 Aug 2016 04:57:22 +0300
Subject: [PATCH 118/264] cmake: add option to link with OpenMP

Despite Caffe itself does not use OpenMP, explicitly linking to OpenMP
should be done when one statically links to a BLAS library which uses
OpenMP internally and does not provide proper CMake imported targets
with proper dependencies (nobody this so far).
---
 CMakeLists.txt           |  1 +
 cmake/Dependencies.cmake | 17 +++++++++++++----
 src/caffe/CMakeLists.txt |  3 +++
 3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index cb25b43a458..378b285c908 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -38,6 +38,7 @@ caffe_option(USE_OPENCV "Build with OpenCV support" ON)
 caffe_option(USE_LEVELDB "Build with levelDB" ON)
 caffe_option(USE_LMDB "Build with lmdb" ON)
 caffe_option(ALLOW_LMDB_NOLOCK "Allow MDB_NOLOCK when reading LMDB files (only if necessary)" OFF)
+caffe_option(USE_OPENMP "Link with OpenMP (when your BLAS wants OpenMP and you get linker errors)" OFF)
 
 # ---[ Dependencies
 include(cmake/Dependencies.cmake)
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index 6a12759234f..290c161b8b9 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -1,7 +1,8 @@
-# This list is required for static linking and exported to CaffeConfig.cmake
+# These lists are later turned into target properties on main caffe library target
 set(Caffe_LINKER_LIBS "")
 set(Caffe_INCLUDE_DIRS "")
 set(Caffe_DEFINITIONS "")
+set(Caffe_COMPILE_OPTIONS "")
 
 # ---[ Boost
 find_package(Boost 1.46 REQUIRED COMPONENTS system thread filesystem)
@@ -14,10 +15,18 @@ list(APPEND Caffe_LINKER_LIBS PRIVATE ${CMAKE_THREAD_LIBS_INIT})
 
 # ---[ OpenMP
 if(USE_OPENMP)
-  # TODO: use something exportable here
+  # Ideally, this should be provided by the BLAS library IMPORTED target. However,
+  # nobody does this, so we need to link to OpenMP explicitly and have the maintainer
+  # to flick the switch manually as needed.
+  #
+  # Moreover, OpenMP package does not provide an IMPORTED target as well, and the
+  # suggested way of linking to OpenMP is to append to CMAKE_{C,CXX}_FLAGS.
+  # However, this naïve method will force any user of Caffe to add the same kludge
+  # into their buildsystem again, so we put these options into per-target PUBLIC
+  # compile options and link flags, so that they will be exported properly.
   find_package(OpenMP REQUIRED)
-  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+  list(APPEND Caffe_LINKER_LIBS PRIVATE ${OpenMP_CXX_FLAGS})
+  list(APPEND Caffe_COMPILE_OPTIONS PRIVATE ${OpenMP_CXX_FLAGS})
 endif()
 
 # ---[ Google-glog
diff --git a/src/caffe/CMakeLists.txt b/src/caffe/CMakeLists.txt
index ed4d50bed5a..7b25a98aa2d 100644
--- a/src/caffe/CMakeLists.txt
+++ b/src/caffe/CMakeLists.txt
@@ -28,6 +28,9 @@ target_include_directories(caffe ${Caffe_INCLUDE_DIRS}
                                  $<BUILD_INTERFACE:${Caffe_INCLUDE_DIR}>
                                  $<INSTALL_INTERFACE:include>)
 target_compile_definitions(caffe ${Caffe_DEFINITIONS})
+if(Caffe_COMPILE_OPTIONS)
+  target_compile_options(caffe ${Caffe_COMPILE_OPTIONS})
+endif()
 set_target_properties(caffe PROPERTIES
     VERSION   ${CAFFE_TARGET_VERSION}
     SOVERSION ${CAFFE_TARGET_SOVERSION}

From 6ed799cb206c6b70bdd260d62e8ff3e077f5b635 Mon Sep 17 00:00:00 2001
From: Ivan Shapovalov <intelfx@intelfx.name>
Date: Wed, 24 Aug 2016 06:28:41 +0300
Subject: [PATCH 119/264] cmake/Templates: remove duplicated #cmakedefines from
 caffe_config.h.in

Rationale: these are duplicated in CMakeLists code, and they cannot be
removed from there because many definitions need to be exported to the
library clients. See issue #4625.
---
 cmake/Templates/caffe_config.h.in | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/cmake/Templates/caffe_config.h.in b/cmake/Templates/caffe_config.h.in
index 8a31b43cabf..45465b98305 100644
--- a/cmake/Templates/caffe_config.h.in
+++ b/cmake/Templates/caffe_config.h.in
@@ -4,16 +4,6 @@
 /* Binaries directory */
 #define BINARY_FOLDER "${PROJECT_BINARY_DIR}"
 
-/* NVIDA Cuda */
-#cmakedefine HAVE_CUDA
-
-/* NVIDA cuDNN */
-#cmakedefine HAVE_CUDNN
-#cmakedefine USE_CUDNN
-
-/* NVIDA cuDNN */
-#cmakedefine CPU_ONLY
-
 /* Test device */
 #define CUDA_TEST_DEVICE ${CUDA_TEST_DEVICE}
 
@@ -27,12 +17,3 @@
   #define EXAMPLES_SOURCE_DIR "examples/"
   #define CMAKE_EXT ""
 #endif
-
-/* Matlab */
-#cmakedefine HAVE_MATLAB
-
-/* IO libraries */
-#cmakedefine USE_OPENCV
-#cmakedefine USE_LEVELDB
-#cmakedefine USE_LMDB
-#cmakedefine ALLOW_LMDB_NOLOCK

From 9bc83e32b39e2c9bbf4bf20d69d4f215d73a414e Mon Sep 17 00:00:00 2001
From: Benedikt Wilbertz <benedikt.wilbertz@gmx.de>
Date: Fri, 12 Aug 2016 22:33:06 +0200
Subject: [PATCH 120/264] fix layerSetUp of scale_layer to not add bias blob
 when already present

---
 src/caffe/layers/scale_layer.cpp | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/caffe/layers/scale_layer.cpp b/src/caffe/layers/scale_layer.cpp
index ecdbb123e31..e652dad6e10 100644
--- a/src/caffe/layers/scale_layer.cpp
+++ b/src/caffe/layers/scale_layer.cpp
@@ -56,9 +56,17 @@ void ScaleLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
     bias_bottom_vec_.resize(1);
     bias_bottom_vec_[0] = bottom[0];
     bias_layer_->SetUp(bias_bottom_vec_, top);
-    bias_param_id_ = this->blobs_.size();
-    this->blobs_.resize(bias_param_id_ + 1);
-    this->blobs_[bias_param_id_] = bias_layer_->blobs()[0];
+    if (this->blobs_.size() + bottom.size() < 3) {
+      // case: blobs.size == 1 && bottom.size == 1
+      // or blobs.size == 0 && bottom.size == 2
+      bias_param_id_ = this->blobs_.size();
+      this->blobs_.resize(bias_param_id_ + 1);
+      this->blobs_[bias_param_id_] = bias_layer_->blobs()[0];
+    } else {
+      // bias param already initialized
+      bias_param_id_ = this->blobs_.size() - 1;
+      bias_layer_->blobs()[0] = this->blobs_[bias_param_id_];
+    }
     bias_propagate_down_.resize(1, false);
   }
   this->param_propagate_down_.resize(this->blobs_.size(), true);

From cdcf2e07dba951774be7feb9d486b7f84ef0c0b1 Mon Sep 17 00:00:00 2001
From: Cyprien Noel <cyprien.noel@gmail.com>
Date: Fri, 9 Sep 2016 12:49:35 -0700
Subject: [PATCH 121/264] Benchmarking should not impact perf until timer is
 read

---
 src/caffe/util/benchmark.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/caffe/util/benchmark.cpp b/src/caffe/util/benchmark.cpp
index 1d269c351c1..d994225f97b 100644
--- a/src/caffe/util/benchmark.cpp
+++ b/src/caffe/util/benchmark.cpp
@@ -44,7 +44,6 @@ void Timer::Stop() {
     if (Caffe::mode() == Caffe::GPU) {
 #ifndef CPU_ONLY
       CUDA_CHECK(cudaEventRecord(stop_gpu_, 0));
-      CUDA_CHECK(cudaEventSynchronize(stop_gpu_));
 #else
       NO_GPU;
 #endif
@@ -66,6 +65,7 @@ float Timer::MicroSeconds() {
   }
   if (Caffe::mode() == Caffe::GPU) {
 #ifndef CPU_ONLY
+    CUDA_CHECK(cudaEventSynchronize(stop_gpu_));
     CUDA_CHECK(cudaEventElapsedTime(&elapsed_milliseconds_, start_gpu_,
                                     stop_gpu_));
     // Cuda only measure milliseconds
@@ -89,6 +89,7 @@ float Timer::MilliSeconds() {
   }
   if (Caffe::mode() == Caffe::GPU) {
 #ifndef CPU_ONLY
+    CUDA_CHECK(cudaEventSynchronize(stop_gpu_));
     CUDA_CHECK(cudaEventElapsedTime(&elapsed_milliseconds_, start_gpu_,
                                     stop_gpu_));
 #else

From 50b5697a0e0b85921e3ea38e961984ea08f014c3 Mon Sep 17 00:00:00 2001
From: Cyprien Noel <cyprien.noel@gmail.com>
Date: Fri, 9 Sep 2016 12:57:09 -0700
Subject: [PATCH 122/264] Avoids missing return values during build.

---
 src/caffe/layer_factory.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/caffe/layer_factory.cpp b/src/caffe/layer_factory.cpp
index e967bd6181c..f14253a510e 100644
--- a/src/caffe/layer_factory.cpp
+++ b/src/caffe/layer_factory.cpp
@@ -67,6 +67,7 @@ shared_ptr<Layer<Dtype> > GetConvolutionLayer(
 #endif
   } else {
     LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
+    throw;  // Avoids missing return warning
   }
 }
 
@@ -104,6 +105,7 @@ shared_ptr<Layer<Dtype> > GetPoolingLayer(const LayerParameter& param) {
 #endif
   } else {
     LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
+    throw;  // Avoids missing return warning
   }
 }
 
@@ -141,6 +143,7 @@ shared_ptr<Layer<Dtype> > GetLRNLayer(const LayerParameter& param) {
 #endif
   } else {
     LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
+    throw;  // Avoids missing return warning
   }
 }
 
@@ -164,6 +167,7 @@ shared_ptr<Layer<Dtype> > GetReLULayer(const LayerParameter& param) {
 #endif
   } else {
     LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
+    throw;  // Avoids missing return warning
   }
 }
 
@@ -187,6 +191,7 @@ shared_ptr<Layer<Dtype> > GetSigmoidLayer(const LayerParameter& param) {
 #endif
   } else {
     LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
+    throw;  // Avoids missing return warning
   }
 }
 
@@ -210,6 +215,7 @@ shared_ptr<Layer<Dtype> > GetSoftmaxLayer(const LayerParameter& param) {
 #endif
   } else {
     LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
+    throw;  // Avoids missing return warning
   }
 }
 
@@ -233,6 +239,7 @@ shared_ptr<Layer<Dtype> > GetTanHLayer(const LayerParameter& param) {
 #endif
   } else {
     LOG(FATAL) << "Layer " << param.name() << " has unknown engine.";
+    throw;  // Avoids missing return warning
   }
 }
 

From 04f9a77801af3233bacadcca178ee7d7a6406bd5 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Sat, 27 Aug 2016 20:19:00 -0700
Subject: [PATCH 123/264] [docs] clarify handling of bias and scaling by
 BiasLayer, ScaleLayer

A bias/scaling can be applied wherever desired by defining the
respective layers, and `ScaleLayer` can handle both as a memory
optimization.
---
 include/caffe/layers/batch_norm_layer.hpp |  8 +++-----
 include/caffe/layers/bias_layer.hpp       | 10 +++++-----
 include/caffe/layers/scale_layer.hpp      | 12 +++++++-----
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/include/caffe/layers/batch_norm_layer.hpp b/include/caffe/layers/batch_norm_layer.hpp
index 9b2d5126efb..c38c8410693 100644
--- a/include/caffe/layers/batch_norm_layer.hpp
+++ b/include/caffe/layers/batch_norm_layer.hpp
@@ -27,11 +27,9 @@ namespace caffe {
  * param {lr_mult: 0} three times in the layer definition.
  *
  * Note that the original paper also included a per-channel learned bias and
- * scaling factor.  It is possible (though a bit cumbersome) to implement
- * this in caffe using a single-channel DummyDataLayer filled with zeros,
- * followed by a Convolution layer with output the same size as the current.
- * This produces a channel-specific value that can be added or multiplied by
- * the BatchNorm layer's output.
+ * scaling factor. To implement this in Caffe, define a `ScaleLayer` configured
+ * with `bias_term: true` after each `BatchNormLayer` to handle both the bias
+ * and scaling factor.
  *
  * [1] S. Ioffe and C. Szegedy, "Batch Normalization: Accelerating Deep Network
  *     Training by Reducing Internal Covariate Shift." arXiv preprint
diff --git a/include/caffe/layers/bias_layer.hpp b/include/caffe/layers/bias_layer.hpp
index eedc3aaa351..9639c9cdc8a 100644
--- a/include/caffe/layers/bias_layer.hpp
+++ b/include/caffe/layers/bias_layer.hpp
@@ -10,13 +10,13 @@
 namespace caffe {
 
 /**
- * @brief Computes a sum of two input Blobs, with the shape of the
- *        latter Blob "broadcast" to match the shape of the former.
- *        Equivalent to tiling the latter Blob, then computing the elementwise
- *        sum.
+ * @brief Computes a sum of two input Blobs, with the shape of the latter Blob
+ *        "broadcast" to match the shape of the former. Equivalent to tiling
+ *        the latter Blob, then computing the elementwise sum.
  *
  * The second input may be omitted, in which case it's learned as a parameter
- * of the layer.
+ * of the layer. Note: in case bias and scaling are desired, both operations can
+ * be handled by `ScaleLayer` configured with `bias_term: true`.
  */
 template <typename Dtype>
 class BiasLayer : public Layer<Dtype> {
diff --git a/include/caffe/layers/scale_layer.hpp b/include/caffe/layers/scale_layer.hpp
index 924df2e51ab..45b714d4027 100644
--- a/include/caffe/layers/scale_layer.hpp
+++ b/include/caffe/layers/scale_layer.hpp
@@ -12,13 +12,15 @@
 namespace caffe {
 
 /**
- * @brief Computes a product of two input Blobs, with the shape of the
- *        latter Blob "broadcast" to match the shape of the former.
+ * @brief Computes the elementwise product of two input Blobs, with the shape of
+ *        the latter Blob "broadcast" to match the shape of the former.
  *        Equivalent to tiling the latter Blob, then computing the elementwise
- *        product.
+ *        product. Note: for efficiency and convenience, this layer can
+ *        additionally perform a "broadcast" sum too when `bias_term: true`
+ *        is set.
  *
- * The second input may be omitted, in which case it's learned as a parameter
- * of the layer.
+ * The latter, scale input may be omitted, in which case it's learned as
+ * parameter of the layer (as is the bias, if it is included).
  */
 template <typename Dtype>
 class ScaleLayer: public Layer<Dtype> {

From d195e605de5f6964eadeba467f5ad85d46841c87 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Fri, 9 Sep 2016 19:46:41 -0700
Subject: [PATCH 124/264] [docs] note CUDA 8 requirement for Ubuntu 16.04

---
 docs/install_apt.md | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/docs/install_apt.md b/docs/install_apt.md
index 2976e3cd07c..3de5a494e0c 100644
--- a/docs/install_apt.md
+++ b/docs/install_apt.md
@@ -9,14 +9,19 @@ title: Installation: Ubuntu
     sudo apt-get install libprotobuf-dev libleveldb-dev libsnappy-dev libopencv-dev libhdf5-serial-dev protobuf-compiler
     sudo apt-get install --no-install-recommends libboost-all-dev
 
-**CUDA**: Install via the NVIDIA package instead of `apt-get` to be certain of the library and driver versions.
-Install the library and latest driver separately; the driver bundled with the library is usually out-of-date.
+**CUDA**: Install by `apt-get` or the NVIDIA `.run` package.
+The NVIDIA package tends to follow more recent library and driver versions, but the installation is more manual.
+If installing from packages, install the library and latest driver separately; the driver bundled with the library is usually out-of-date.
 This can be skipped for CPU-only installation.
 
 **BLAS**: install ATLAS by `sudo apt-get install libatlas-base-dev` or install OpenBLAS or MKL for better CPU performance.
 
 **Python** (optional): if you use the default Python you will need to `sudo apt-get install` the `python-dev` package to have the Python headers for building the pycaffe interface.
 
+**Compatibility notes, 16.04**
+
+CUDA 8 is required on Ubuntu 16.04.
+
 **Remaining dependencies, 14.04**
 
 Everything is packaged in 14.04.

From 3b6fd1d95b374b0484f32a4f86380714c456a293 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Sat, 27 Aug 2016 20:23:13 -0700
Subject: [PATCH 125/264] [docs] identify batch norm layer blobs

---
 include/caffe/layers/batch_norm_layer.hpp | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/include/caffe/layers/batch_norm_layer.hpp b/include/caffe/layers/batch_norm_layer.hpp
index c38c8410693..a26ad1a42c7 100644
--- a/include/caffe/layers/batch_norm_layer.hpp
+++ b/include/caffe/layers/batch_norm_layer.hpp
@@ -13,18 +13,19 @@ namespace caffe {
  * @brief Normalizes the input to have 0-mean and/or unit (1) variance across
  *        the batch.
  *
- * This layer computes Batch Normalization described in [1].  For
- * each channel in the data (i.e. axis 1), it subtracts the mean and divides
- * by the variance, where both statistics are computed across both spatial
- * dimensions and across the different examples in the batch.
+ * This layer computes Batch Normalization as described in [1]. For each channel
+ * in the data (i.e. axis 1), it subtracts the mean and divides by the variance,
+ * where both statistics are computed across both spatial dimensions and across
+ * the different examples in the batch.
  *
- * By default, during training time, the network is computing global mean/
- * variance statistics via a running average, which is then used at test
- * time to allow deterministic outputs for each input.  You can manually
- * toggle whether the network is accumulating or using the statistics via the
- * use_global_stats option.  IMPORTANT: for this feature to work, you MUST
- * set the learning rate to zero for all three parameter blobs, i.e.,
- * param {lr_mult: 0} three times in the layer definition.
+ * By default, during training time, the network is computing global
+ * mean/variance statistics via a running average, which is then used at test
+ * time to allow deterministic outputs for each input. You can manually toggle
+ * whether the network is accumulating or using the statistics via the
+ * use_global_stats option. IMPORTANT: for this feature to work, you MUST set
+ * the learning rate to zero for all three blobs, i.e., param {lr_mult: 0} three
+ * times in the layer definition. For reference, these three blobs are (0)
+ * mean, (1) variance, and (2) the moving average factor.
  *
  * Note that the original paper also included a per-channel learned bias and
  * scaling factor. To implement this in Caffe, define a `ScaleLayer` configured

From c8f446f640b12b0577063eca8fab004e73c0aefc Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Mon, 29 Aug 2016 23:42:58 -0700
Subject: [PATCH 126/264] batch norm: hide statistics from solver, simplifying
 layer definition

batch norm statistics are not learnable parameters subject to solver
updates, so they must be shielded from the solver. `BatchNorm` layer now
masks its statistics for itself by zeroing parameter learning rates
instead of relying on the layer definition.

n.b. declaring `param`s for batch norm layers is no longer allowed.
---
 include/caffe/layers/batch_norm_layer.hpp | 6 ++----
 src/caffe/layers/batch_norm_layer.cpp     | 8 ++++++++
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/include/caffe/layers/batch_norm_layer.hpp b/include/caffe/layers/batch_norm_layer.hpp
index a26ad1a42c7..43f7b28be95 100644
--- a/include/caffe/layers/batch_norm_layer.hpp
+++ b/include/caffe/layers/batch_norm_layer.hpp
@@ -22,10 +22,8 @@ namespace caffe {
  * mean/variance statistics via a running average, which is then used at test
  * time to allow deterministic outputs for each input. You can manually toggle
  * whether the network is accumulating or using the statistics via the
- * use_global_stats option. IMPORTANT: for this feature to work, you MUST set
- * the learning rate to zero for all three blobs, i.e., param {lr_mult: 0} three
- * times in the layer definition. For reference, these three blobs are (0)
- * mean, (1) variance, and (2) the moving average factor.
+ * use_global_stats option. For reference, these statistics are kept in the
+ * layer's three blobs: (0) mean, (1) variance, and (2) moving average factor.
  *
  * Note that the original paper also included a per-channel learned bias and
  * scaling factor. To implement this in Caffe, define a `ScaleLayer` configured
diff --git a/src/caffe/layers/batch_norm_layer.cpp b/src/caffe/layers/batch_norm_layer.cpp
index a69d8f99316..0b1037edc63 100644
--- a/src/caffe/layers/batch_norm_layer.cpp
+++ b/src/caffe/layers/batch_norm_layer.cpp
@@ -34,6 +34,14 @@ void BatchNormLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
                 this->blobs_[i]->mutable_cpu_data());
     }
   }
+  // Mask statistics from optimization by setting local learning rates
+  // for mean, variance, and the bias correction to zero.
+  CHECK_EQ(this->layer_param_.param_size(), 0)
+      << "Cannot configure batch normalization statistics as layer parameters.";
+  for (int i = 0; i < this->blobs_.size(); ++i) {
+    ParamSpec* fixed_param_spec = this->layer_param_.add_param();
+    fixed_param_spec->set_lr_mult(0.);
+  }
 }
 
 template <typename Dtype>

From a8ec123c00723df0d0ad897e1eea32a29201c81b Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Fri, 9 Sep 2016 16:49:31 -0700
Subject: [PATCH 127/264] batch norm: auto-upgrade old layer definitions w/
 param messages

automatically strip old batch norm layer definitions including `param`
messages. the batch norm layer used to require manually masking its
state from the solver by setting `param { lr_mult: 0 }` messages for
each of its statistics. this is now handled automatically by the layer.
---
 include/caffe/util/upgrade_proto.hpp |  6 +++++
 src/caffe/util/upgrade_proto.cpp     | 34 +++++++++++++++++++++++++++-
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/include/caffe/util/upgrade_proto.hpp b/include/caffe/util/upgrade_proto.hpp
index 14e1936a8c2..b145822af32 100644
--- a/include/caffe/util/upgrade_proto.hpp
+++ b/include/caffe/util/upgrade_proto.hpp
@@ -65,6 +65,12 @@ bool NetNeedsInputUpgrade(const NetParameter& net_param);
 // Perform all necessary transformations to upgrade input fields into layers.
 void UpgradeNetInput(NetParameter* net_param);
 
+// Return true iff the Net contains batch norm layers with manual local LRs.
+bool NetNeedsBatchNormUpgrade(const NetParameter& net_param);
+
+// Perform all necessary transformations to upgrade batch norm layers.
+void UpgradeNetBatchNorm(NetParameter* net_param);
+
 // Return true iff the solver contains any old solver_type specified as enums
 bool SolverNeedsTypeUpgrade(const SolverParameter& solver_param);
 
diff --git a/src/caffe/util/upgrade_proto.cpp b/src/caffe/util/upgrade_proto.cpp
index 9e186915b43..a0aacbe92f8 100644
--- a/src/caffe/util/upgrade_proto.cpp
+++ b/src/caffe/util/upgrade_proto.cpp
@@ -14,7 +14,8 @@ namespace caffe {
 
 bool NetNeedsUpgrade(const NetParameter& net_param) {
   return NetNeedsV0ToV1Upgrade(net_param) || NetNeedsV1ToV2Upgrade(net_param)
-      || NetNeedsDataUpgrade(net_param) || NetNeedsInputUpgrade(net_param);
+      || NetNeedsDataUpgrade(net_param) || NetNeedsInputUpgrade(net_param)
+      || NetNeedsBatchNormUpgrade(net_param);
 }
 
 bool UpgradeNetAsNeeded(const string& param_file, NetParameter* param) {
@@ -71,6 +72,14 @@ bool UpgradeNetAsNeeded(const string& param_file, NetParameter* param) {
     LOG(WARNING) << "Note that future Caffe releases will only support "
                  << "input layers and not input fields.";
   }
+  // NetParameter uses old style batch norm layers; try to upgrade it.
+  if (NetNeedsBatchNormUpgrade(*param)) {
+    LOG(INFO) << "Attempting to upgrade batch norm layers using deprecated "
+              << "params: " << param_file;
+    UpgradeNetBatchNorm(param);
+    LOG(INFO) << "Successfully upgraded batch norm layers using deprecated "
+              << "params.";
+  }
   return success;
 }
 
@@ -991,6 +1000,29 @@ void UpgradeNetInput(NetParameter* net_param) {
   net_param->clear_input_dim();
 }
 
+bool NetNeedsBatchNormUpgrade(const NetParameter& net_param) {
+  for (int i = 0; i < net_param.layer_size(); ++i) {
+    // Check if BatchNorm layers declare three parameters, as required by
+    // the previous BatchNorm layer definition.
+    if (net_param.layer(i).type() == "BatchNorm"
+        && net_param.layer(i).param_size() == 3) {
+      return true;
+    }
+  }
+  return false;
+}
+
+void UpgradeNetBatchNorm(NetParameter* net_param) {
+  for (int i = 0; i < net_param->layer_size(); ++i) {
+    // Check if BatchNorm layers declare three parameters, as required by
+    // the previous BatchNorm layer definition.
+    if (net_param->layer(i).type() == "BatchNorm"
+        && net_param->layer(i).param_size() == 3) {
+      net_param->mutable_layer(i)->clear_param();
+    }
+  }
+}
+
 // Return true iff the solver contains any old solver_type specified as enums
 bool SolverNeedsTypeUpgrade(const SolverParameter& solver_param) {
   if (solver_param.has_solver_type()) {

From fc8f3eba6fa06be2f55d1b576f46664e07f5d0a6 Mon Sep 17 00:00:00 2001
From: Youssef Kashef <youssef.kashef@gmail.com>
Date: Tue, 13 Sep 2016 15:52:39 +0200
Subject: [PATCH 128/264] fix comments in matlab classification demo

---
 matlab/demo/classification_demo.m | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/matlab/demo/classification_demo.m b/matlab/demo/classification_demo.m
index 2b60332970b..435c077845f 100644
--- a/matlab/demo/classification_demo.m
+++ b/matlab/demo/classification_demo.m
@@ -8,7 +8,7 @@
 %
 % ****************************************************************************
 % For detailed documentation and usage on Caffe's Matlab interface, please
-% refer to Caffe Interface Tutorial at
+% refer to the Caffe Interface Tutorial at
 % http://caffe.berkeleyvision.org/tutorial/interfaces.html#matlab
 % ****************************************************************************
 %
@@ -24,6 +24,7 @@
 %  $ export LD_LIBRARY_PATH=/opt/intel/mkl/lib/intel64:/usr/local/cuda-5.5/lib64
 %  $ export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libstdc++.so.6
 % Or the equivalent based on where things are installed on your system
+% and what versions are installed.
 %
 % Usage:
 %  im = imread('../../examples/images/cat.jpg');
@@ -39,7 +40,7 @@
 % Data coming in from matlab needs to be in the order
 %   [width, height, channels, images]
 % where width is the fastest dimension.
-% Here is the rough matlab for putting image data into the correct
+% Here is the rough matlab code for putting image data into the correct
 % format in W x H x C with BGR channels:
 %   % permute channels from RGB to BGR
 %   im_data = im(:, :, [3, 2, 1]);
@@ -54,7 +55,7 @@
 
 % If you have multiple images, cat them with cat(4, ...)
 
-% Add caffe/matlab to you Matlab search PATH to use matcaffe
+% Add caffe/matlab to your Matlab search PATH in order to use matcaffe
 if exist('../+caffe', 'dir')
   addpath('..');
 else

From eee3be15589e81b5385c7d0d02a151c789134905 Mon Sep 17 00:00:00 2001
From: Miguel Lloreda <mlloreda@users.noreply.github.com>
Date: Thu, 15 Sep 2016 17:28:02 -0400
Subject: [PATCH 129/264] Fixed typos in examples/cpp_classification/readme

---
 examples/cpp_classification/readme.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/cpp_classification/readme.md b/examples/cpp_classification/readme.md
index 0de2885b53c..4f683aa623f 100644
--- a/examples/cpp_classification/readme.md
+++ b/examples/cpp_classification/readme.md
@@ -10,7 +10,7 @@ priority: 10
 
 Caffe, at its core, is written in C++. It is possible to use the C++
 API of Caffe to implement an image classification application similar
-to the Python code presented in one of the Notebook example. To look
+to the Python code presented in one of the Notebook examples. To look
 at a more general-purpose example of the Caffe C++ API, you should
 study the source code of the command line tool `caffe` in `tools/caffe.cpp`.
 
@@ -19,7 +19,7 @@ study the source code of the command line tool `caffe` in `tools/caffe.cpp`.
 A simple C++ code is proposed in
 `examples/cpp_classification/classification.cpp`. For the sake of
 simplicity, this example does not support oversampling of a single
-sample nor batching of multiple independant samples. This example is
+sample nor batching of multiple independent samples. This example is
 not trying to reach the maximum possible classification throughput on
 a system, but special care was given to avoid unnecessary
 pessimization while keeping the code readable.

From 2f55f42cff9147e69b1f5dff9232058d7b654eba Mon Sep 17 00:00:00 2001
From: Rok Mandeljc <rok.mandeljc@fe.uni-lj.si>
Date: Mon, 29 Jun 2015 15:48:43 +0200
Subject: [PATCH 130/264] matcaffe: allow destruction of individual networks
 and solvers

---
 matlab/+caffe/Net.m              |  3 +++
 matlab/+caffe/Solver.m           |  3 +++
 matlab/+caffe/private/caffe_.cpp | 24 ++++++++++++++++++++++++
 3 files changed, 30 insertions(+)

diff --git a/matlab/+caffe/Net.m b/matlab/+caffe/Net.m
index e6295bba1a4..349e060eb22 100644
--- a/matlab/+caffe/Net.m
+++ b/matlab/+caffe/Net.m
@@ -68,6 +68,9 @@
       self.layer_names = self.attributes.layer_names;
       self.blob_names = self.attributes.blob_names;
     end
+    function delete (self)
+      caffe_('delete_net', self.hNet_self);
+    end
     function layer = layers(self, layer_name)
       CHECK(ischar(layer_name), 'layer_name must be a string');
       layer = self.layer_vec(self.name2layer_index(layer_name));
diff --git a/matlab/+caffe/Solver.m b/matlab/+caffe/Solver.m
index f8bdc4e22b2..2d3c98b2a26 100644
--- a/matlab/+caffe/Solver.m
+++ b/matlab/+caffe/Solver.m
@@ -36,6 +36,9 @@
         self.test_nets(n) = caffe.Net(self.attributes.hNet_test_nets(n));
       end
     end
+    function delete (self)
+      caffe_('delete_solver', self.hSolver_self);
+    end
     function iter = iter(self)
       iter = caffe_('solver_get_iter', self.hSolver_self);
     end
diff --git a/matlab/+caffe/private/caffe_.cpp b/matlab/+caffe/private/caffe_.cpp
index 1b1b2bff861..bc04f4171e4 100644
--- a/matlab/+caffe/private/caffe_.cpp
+++ b/matlab/+caffe/private/caffe_.cpp
@@ -197,6 +197,17 @@ static void get_solver(MEX_ARGS) {
   mxFree(solver_file);
 }
 
+// Usage: caffe_('delete_solver', hSolver)
+static void delete_solver(MEX_ARGS) {
+  mxCHECK(nrhs == 1 && mxIsStruct(prhs[0]),
+      "Usage: caffe_('delete_solver', hSolver)");
+  Solver<float>* solver = handle_to_ptr<Solver<float> >(prhs[0]);
+  solvers_.erase(std::remove_if(solvers_.begin(), solvers_.end(),
+      [solver] (const shared_ptr< Solver<float> > &solverPtr) {
+      return solverPtr.get() == solver;
+  }), solvers_.end());
+}
+
 // Usage: caffe_('solver_get_attr', hSolver)
 static void solver_get_attr(MEX_ARGS) {
   mxCHECK(nrhs == 1 && mxIsStruct(prhs[0]),
@@ -271,6 +282,17 @@ static void get_net(MEX_ARGS) {
   mxFree(phase_name);
 }
 
+// Usage: caffe_('delete_solver', hSolver)
+static void delete_net(MEX_ARGS) {
+  mxCHECK(nrhs == 1 && mxIsStruct(prhs[0]),
+      "Usage: caffe_('delete_solver', hNet)");
+  Net<float>* net = handle_to_ptr<Net<float> >(prhs[0]);
+  nets_.erase(std::remove_if(nets_.begin(), nets_.end(),
+      [net] (const shared_ptr< Net<float> > &netPtr) {
+      return netPtr.get() == net;
+  }), nets_.end());
+}
+
 // Usage: caffe_('net_get_attr', hNet)
 static void net_get_attr(MEX_ARGS) {
   mxCHECK(nrhs == 1 && mxIsStruct(prhs[0]),
@@ -522,12 +544,14 @@ struct handler_registry {
 static handler_registry handlers[] = {
   // Public API functions
   { "get_solver",         get_solver      },
+  { "delete_solver",      delete_solver   },
   { "solver_get_attr",    solver_get_attr },
   { "solver_get_iter",    solver_get_iter },
   { "solver_restore",     solver_restore  },
   { "solver_solve",       solver_solve    },
   { "solver_step",        solver_step     },
   { "get_net",            get_net         },
+  { "delete_net",         delete_net      },
   { "net_get_attr",       net_get_attr    },
   { "net_forward",        net_forward     },
   { "net_backward",       net_backward    },

From f96ccea124314d4ea1374e906fbd709d1dc43585 Mon Sep 17 00:00:00 2001
From: Luke Yeager <lukeyeager@users.noreply.github.com>
Date: Fri, 23 Sep 2016 11:22:48 -0700
Subject: [PATCH 131/264] [TravisCI] google/protobuf renamed the 3.0 branch

---
 scripts/travis/install-deps.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/travis/install-deps.sh b/scripts/travis/install-deps.sh
index 4e86ac73915..daef5c4a04f 100755
--- a/scripts/travis/install-deps.sh
+++ b/scripts/travis/install-deps.sh
@@ -56,7 +56,7 @@ else
       dh-autoreconf \
       unzip
 
-    wget https://github.com/google/protobuf/archive/3.0.0-GA.tar.gz -O protobuf3.tar.gz
+    wget https://github.com/google/protobuf/archive/3.0.x.tar.gz -O protobuf3.tar.gz
     tar -xzf protobuf3.tar.gz -C $PROTOBUF3_DIR --strip 1
     rm protobuf3.tar.gz
     cd $PROTOBUF3_DIR

From 79a8c5210846f70108e5a2be1bedc95d9f8aea30 Mon Sep 17 00:00:00 2001
From: Ken Yu <kyu_115s@hotmail.com>
Date: Wed, 21 Sep 2016 16:19:17 +0800
Subject: [PATCH 132/264] Ignore Visual Studio Code files.

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index 53c1fb056bb..281ef3266db 100644
--- a/.gitignore
+++ b/.gitignore
@@ -47,6 +47,9 @@
 # PyCharm files
 .idea
 
+# Visual Studio Code files
+.vscode
+
 # OSX dir files
 .DS_Store
 

From ce6ac831b96725bd770eaec5c0f743e423e355fd Mon Sep 17 00:00:00 2001
From: Benedikt Wilbertz <benedikt.wilbertz@gmx.de>
Date: Thu, 29 Sep 2016 21:55:58 +0200
Subject: [PATCH 133/264] slightly relax batch norm check

---
 src/caffe/layers/batch_norm_layer.cpp | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/caffe/layers/batch_norm_layer.cpp b/src/caffe/layers/batch_norm_layer.cpp
index 0b1037edc63..e661abb11b8 100644
--- a/src/caffe/layers/batch_norm_layer.cpp
+++ b/src/caffe/layers/batch_norm_layer.cpp
@@ -36,11 +36,15 @@ void BatchNormLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   }
   // Mask statistics from optimization by setting local learning rates
   // for mean, variance, and the bias correction to zero.
-  CHECK_EQ(this->layer_param_.param_size(), 0)
-      << "Cannot configure batch normalization statistics as layer parameters.";
   for (int i = 0; i < this->blobs_.size(); ++i) {
-    ParamSpec* fixed_param_spec = this->layer_param_.add_param();
-    fixed_param_spec->set_lr_mult(0.);
+    if (this->layer_param_.param_size() == i) {
+      ParamSpec* fixed_param_spec = this->layer_param_.add_param();
+      fixed_param_spec->set_lr_mult(0.f);
+    } else {
+      CHECK_EQ(this->layer_param_.param(i).lr_mult(), 0.f)
+          << "Cannot configure batch normalization statistics as layer "
+          << "parameters.";
+    }
   }
 }
 

From 08ca70326966ad24b012ca8084c8baba5b1a23b5 Mon Sep 17 00:00:00 2001
From: Cyprien Noel <cyprien.noel@gmail.com>
Date: Fri, 30 Sep 2016 18:18:47 -0700
Subject: [PATCH 134/264] NV changed path to cudnn

---
 scripts/travis/install-deps.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/travis/install-deps.sh b/scripts/travis/install-deps.sh
index daef5c4a04f..1900b16df54 100755
--- a/scripts/travis/install-deps.sh
+++ b/scripts/travis/install-deps.sh
@@ -84,7 +84,7 @@ if $WITH_CUDA ; then
   rm $CUDA_REPO_PKG
 
   if $WITH_CUDNN ; then
-    ML_REPO_PKG=nvidia-machine-learning-repo_4.0-2_amd64.deb
+    ML_REPO_PKG=nvidia-machine-learning-repo-ubuntu1404_4.0-2_amd64.deb
     wget http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1404/x86_64/$ML_REPO_PKG
     dpkg -i $ML_REPO_PKG
   fi

From c97a964a23f0ddd455c619537e208f117ae77743 Mon Sep 17 00:00:00 2001
From: Kun Wang <wk910930@gmail.com>
Date: Wed, 5 Oct 2016 18:59:07 +0800
Subject: [PATCH 135/264] fix typo in pascal_multilabel_datalayers.py

---
 examples/pycaffe/layers/pascal_multilabel_datalayers.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/pycaffe/layers/pascal_multilabel_datalayers.py b/examples/pycaffe/layers/pascal_multilabel_datalayers.py
index 68e4fa7960a..9420cb328ce 100644
--- a/examples/pycaffe/layers/pascal_multilabel_datalayers.py
+++ b/examples/pycaffe/layers/pascal_multilabel_datalayers.py
@@ -20,7 +20,7 @@
 class PascalMultilabelDataLayerSync(caffe.Layer):
 
     """
-    This is a simple syncronous datalayer for training a multilabel model on
+    This is a simple synchronous datalayer for training a multilabel model on
     PASCAL.
     """
 
@@ -33,7 +33,7 @@ def setup(self, bottom, top):
         # params is a python dictionary with layer parameters.
         params = eval(self.param_str)
 
-        # Check the paramameters for validity.
+        # Check the parameters for validity.
         check_params(params)
 
         # store input as class variables
@@ -207,7 +207,7 @@ def check_params(params):
 
 def print_info(name, params):
     """
-    Ouput some info regarding the class
+    Output some info regarding the class
     """
     print "{} initialized for split: {}, with bs: {}, im_shape: {}.".format(
         name,

From cdd2d0ee9ed42200b6ab8b52c0213bb5916b46c4 Mon Sep 17 00:00:00 2001
From: Vincent <vincent.biret@gmail.com>
Date: Wed, 5 Oct 2016 13:12:04 +0100
Subject: [PATCH 136/264] Fix: docs/yum_install.md glog broken link

fixes the broken glog link in yum_install.md which is currently returning a 404.
---
 docs/install_yum.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/install_yum.md b/docs/install_yum.md
index 2104912e482..38bf7255c93 100644
--- a/docs/install_yum.md
+++ b/docs/install_yum.md
@@ -15,7 +15,7 @@ title: Installation: RHEL / Fedora / CentOS
 **Remaining dependencies, if not found**
 
     # glog
-    wget https://google-glog.googlecode.com/files/glog-0.3.3.tar.gz
+    wget https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/google-glog/glog-0.3.3.tar.gz
     tar zxvf glog-0.3.3.tar.gz
     cd glog-0.3.3
     ./configure

From 553a645f1d6f950bf1a36284bb13b5fc7c3bacdc Mon Sep 17 00:00:00 2001
From: Guillaume Dumont <dumont.guillaume@gmail.com>
Date: Thu, 13 Oct 2016 22:29:56 -0400
Subject: [PATCH 137/264] pytest fix: Files created with NamedTemporary files
 cannot be opened on Windows

---
 python/caffe/test/test_net.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/python/caffe/test/test_net.py b/python/caffe/test/test_net.py
index e1090934d73..a0739fbac4d 100644
--- a/python/caffe/test/test_net.py
+++ b/python/caffe/test/test_net.py
@@ -173,12 +173,12 @@ class TestLevels(unittest.TestCase):
 """
 
     def setUp(self):
-        self.f = tempfile.NamedTemporaryFile(mode='w+')
+        self.f = tempfile.NamedTemporaryFile(mode='w+', delete=False)
         self.f.write(self.TEST_NET)
-        self.f.flush()
+        self.f.close()
 
     def tearDown(self):
-        self.f.close()
+        os.remove(self.f.name)
 
     def check_net(self, net, blobs):
         net_blobs = [b for b in net.blobs.keys() if 'data' not in b]
@@ -238,12 +238,12 @@ class TestStages(unittest.TestCase):
 """
 
     def setUp(self):
-        self.f = tempfile.NamedTemporaryFile(mode='w+')
+        self.f = tempfile.NamedTemporaryFile(mode='w+', delete=False)
         self.f.write(self.TEST_NET)
-        self.f.flush()
+        self.f.close()
 
     def tearDown(self):
-        self.f.close()
+        os.remove(self.f.name)
 
     def check_net(self, net, blobs):
         net_blobs = [b for b in net.blobs.keys() if 'data' not in b]
@@ -320,12 +320,12 @@ class TestAllInOne(unittest.TestCase):
 """
 
     def setUp(self):
-        self.f = tempfile.NamedTemporaryFile(mode='w+')
+        self.f = tempfile.NamedTemporaryFile(mode='w+', delete=False)
         self.f.write(self.TEST_NET)
-        self.f.flush()
+        self.f.close()
 
     def tearDown(self):
-        self.f.close()
+        os.remove(self.f.name)
 
     def check_net(self, net, outputs):
         self.assertEqual(list(net.blobs['data'].shape), [1,1,10,10])

From 95a436c601a04af620a0e166393d3ff695905bc4 Mon Sep 17 00:00:00 2001
From: max argus <argus.max@gmail.com>
Date: Thu, 25 Aug 2016 09:20:24 +0000
Subject: [PATCH 138/264] Fix: made load_hd5 check blob dims by default.

Size checks are needed for loading parameters to avoid strange bugs
when loading data we continue to reshape.
---
 include/caffe/util/hdf5.hpp               |  4 +--
 src/caffe/layers/hdf5_data_layer.cpp      |  3 +-
 src/caffe/test/test_hdf5_output_layer.cpp | 10 ++++---
 src/caffe/test/test_hdf5data_layer.cpp    |  2 +-
 src/caffe/util/hdf5.cpp                   | 34 +++++++++++++++++++----
 5 files changed, 39 insertions(+), 14 deletions(-)

diff --git a/include/caffe/util/hdf5.hpp b/include/caffe/util/hdf5.hpp
index ce568c5eb0d..71549c1cc02 100644
--- a/include/caffe/util/hdf5.hpp
+++ b/include/caffe/util/hdf5.hpp
@@ -13,12 +13,12 @@ namespace caffe {
 template <typename Dtype>
 void hdf5_load_nd_dataset_helper(
     hid_t file_id, const char* dataset_name_, int min_dim, int max_dim,
-    Blob<Dtype>* blob);
+    Blob<Dtype>* blob, bool reshape);
 
 template <typename Dtype>
 void hdf5_load_nd_dataset(
     hid_t file_id, const char* dataset_name_, int min_dim, int max_dim,
-    Blob<Dtype>* blob);
+    Blob<Dtype>* blob, bool reshape = false);
 
 template <typename Dtype>
 void hdf5_save_nd_dataset(
diff --git a/src/caffe/layers/hdf5_data_layer.cpp b/src/caffe/layers/hdf5_data_layer.cpp
index 2f13dc641df..0099129000c 100644
--- a/src/caffe/layers/hdf5_data_layer.cpp
+++ b/src/caffe/layers/hdf5_data_layer.cpp
@@ -39,8 +39,9 @@ void HDF5DataLayer<Dtype>::LoadHDF5FileData(const char* filename) {
 
   for (int i = 0; i < top_size; ++i) {
     hdf_blobs_[i] = shared_ptr<Blob<Dtype> >(new Blob<Dtype>());
+    // Allow reshape here, as we are loading data not params
     hdf5_load_nd_dataset(file_id, this->layer_param_.top(i).c_str(),
-        MIN_DATA_DIM, MAX_DATA_DIM, hdf_blobs_[i].get());
+        MIN_DATA_DIM, MAX_DATA_DIM, hdf_blobs_[i].get(), true);
   }
 
   herr_t status = H5Fclose(file_id);
diff --git a/src/caffe/test/test_hdf5_output_layer.cpp b/src/caffe/test/test_hdf5_output_layer.cpp
index 3833ebff78e..2bc2de1e647 100644
--- a/src/caffe/test/test_hdf5_output_layer.cpp
+++ b/src/caffe/test/test_hdf5_output_layer.cpp
@@ -77,10 +77,12 @@ TYPED_TEST(HDF5OutputLayerTest, TestForward) {
                           H5P_DEFAULT);
   ASSERT_GE(file_id, 0)<< "Failed to open HDF5 file" <<
       this->input_file_name_;
+  // Allow reshape here as we are loading data not params
+  bool reshape = true;
   hdf5_load_nd_dataset(file_id, HDF5_DATA_DATASET_NAME, 0, 4,
-                       this->blob_data_);
+                       this->blob_data_, reshape);
   hdf5_load_nd_dataset(file_id, HDF5_DATA_LABEL_NAME, 0, 4,
-                       this->blob_label_);
+                       this->blob_label_, reshape);
   herr_t status = H5Fclose(file_id);
   EXPECT_GE(status, 0)<< "Failed to close HDF5 file " <<
       this->input_file_name_;
@@ -105,12 +107,12 @@ TYPED_TEST(HDF5OutputLayerTest, TestForward) {
 
   Blob<Dtype>* blob_data = new Blob<Dtype>();
   hdf5_load_nd_dataset(file_id, HDF5_DATA_DATASET_NAME, 0, 4,
-                       blob_data);
+                       blob_data, reshape);
   this->CheckBlobEqual(*(this->blob_data_), *blob_data);
 
   Blob<Dtype>* blob_label = new Blob<Dtype>();
   hdf5_load_nd_dataset(file_id, HDF5_DATA_LABEL_NAME, 0, 4,
-                       blob_label);
+                       blob_label, reshape);
   this->CheckBlobEqual(*(this->blob_label_), *blob_label);
 
   status = H5Fclose(file_id);
diff --git a/src/caffe/test/test_hdf5data_layer.cpp b/src/caffe/test/test_hdf5data_layer.cpp
index 8884ce95a23..e0fd62134c5 100644
--- a/src/caffe/test/test_hdf5data_layer.cpp
+++ b/src/caffe/test/test_hdf5data_layer.cpp
@@ -70,7 +70,7 @@ TYPED_TEST(HDF5DataLayerTest, TestRead) {
   int height = 6;
   int width = 5;
 
-  // Test that the layer setup got the correct parameters.
+  // Test that the layer setup gives correct parameters.
   HDF5DataLayer<Dtype> layer(param);
   layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
   EXPECT_EQ(this->blob_top_data_->num(), batch_size);
diff --git a/src/caffe/util/hdf5.cpp b/src/caffe/util/hdf5.cpp
index 7730e76ab87..0003f1b3988 100644
--- a/src/caffe/util/hdf5.cpp
+++ b/src/caffe/util/hdf5.cpp
@@ -9,7 +9,7 @@ namespace caffe {
 template <typename Dtype>
 void hdf5_load_nd_dataset_helper(
     hid_t file_id, const char* dataset_name_, int min_dim, int max_dim,
-    Blob<Dtype>* blob) {
+    Blob<Dtype>* blob, bool reshape) {
   // Verify that the dataset exists.
   CHECK(H5LTfind_dataset(file_id, dataset_name_))
       << "Failed to find HDF5 dataset " << dataset_name_;
@@ -56,17 +56,38 @@ void hdf5_load_nd_dataset_helper(
     LOG(FATAL) << "Datatype class unknown";
   }
 
+
   vector<int> blob_dims(dims.size());
   for (int i = 0; i < dims.size(); ++i) {
     blob_dims[i] = dims[i];
   }
-  blob->Reshape(blob_dims);
+
+  if (reshape) {
+    blob->Reshape(blob_dims);
+  } else {
+    if (blob_dims != blob->shape()) {
+      // create shape string for error message
+      ostringstream stream;
+      int count = 1;
+      for (int i = 0; i < blob_dims.size(); ++i) {
+        stream << blob_dims[i] << " ";
+        count = count * blob_dims[i];
+      }
+      stream << "(" << count << ")";
+      string source_shape_string = stream.str();
+
+      CHECK(blob_dims == blob->shape()) << "Cannot load blob from hdf5; shape "
+            << "mismatch. Source shape is " << source_shape_string
+            << " target shape is " << blob->shape_string();
+    }
+  }
 }
 
 template <>
 void hdf5_load_nd_dataset<float>(hid_t file_id, const char* dataset_name_,
-        int min_dim, int max_dim, Blob<float>* blob) {
-  hdf5_load_nd_dataset_helper(file_id, dataset_name_, min_dim, max_dim, blob);
+        int min_dim, int max_dim, Blob<float>* blob, bool reshape) {
+  hdf5_load_nd_dataset_helper(file_id, dataset_name_, min_dim, max_dim, blob,
+                              reshape);
   herr_t status = H5LTread_dataset_float(
     file_id, dataset_name_, blob->mutable_cpu_data());
   CHECK_GE(status, 0) << "Failed to read float dataset " << dataset_name_;
@@ -74,8 +95,9 @@ void hdf5_load_nd_dataset<float>(hid_t file_id, const char* dataset_name_,
 
 template <>
 void hdf5_load_nd_dataset<double>(hid_t file_id, const char* dataset_name_,
-        int min_dim, int max_dim, Blob<double>* blob) {
-  hdf5_load_nd_dataset_helper(file_id, dataset_name_, min_dim, max_dim, blob);
+        int min_dim, int max_dim, Blob<double>* blob, bool reshape) {
+  hdf5_load_nd_dataset_helper(file_id, dataset_name_, min_dim, max_dim, blob,
+                              reshape);
   herr_t status = H5LTread_dataset_double(
     file_id, dataset_name_, blob->mutable_cpu_data());
   CHECK_GE(status, 0) << "Failed to read double dataset " << dataset_name_;

From 197d11a0e1be7ad35714eb38d9b391e1cd39af39 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Thu, 27 Oct 2016 00:41:03 -0700
Subject: [PATCH 139/264] sigmoid cross-entropy loss: add GPU forward for full
 GPU mode

close #3004
---
 .../sigmoid_cross_entropy_loss_layer.hpp      |  2 ++
 .../sigmoid_cross_entropy_loss_layer.cpp      |  2 +-
 .../sigmoid_cross_entropy_loss_layer.cu       | 36 +++++++++++++++++--
 3 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/include/caffe/layers/sigmoid_cross_entropy_loss_layer.hpp b/include/caffe/layers/sigmoid_cross_entropy_loss_layer.hpp
index 598dca5ff2c..6452ea5106a 100644
--- a/include/caffe/layers/sigmoid_cross_entropy_loss_layer.hpp
+++ b/include/caffe/layers/sigmoid_cross_entropy_loss_layer.hpp
@@ -59,6 +59,8 @@ class SigmoidCrossEntropyLossLayer : public LossLayer<Dtype> {
   /// @copydoc SigmoidCrossEntropyLossLayer
   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top);
+  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
+      const vector<Blob<Dtype>*>& top);
 
   /**
    * @brief Computes the sigmoid cross-entropy loss error gradient w.r.t. the
diff --git a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp
index 10ac9470832..eb77a9c2cb8 100644
--- a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp
+++ b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp
@@ -68,7 +68,7 @@ void SigmoidCrossEntropyLossLayer<Dtype>::Backward_cpu(
 }
 
 #ifdef CPU_ONLY
-STUB_GPU_BACKWARD(SigmoidCrossEntropyLossLayer, Backward);
+STUB_GPU(SigmoidCrossEntropyLossLayer);
 #endif
 
 INSTANTIATE_CLASS(SigmoidCrossEntropyLossLayer);
diff --git a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu
index 046cb9d3a31..7cb982d2d70 100644
--- a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu
+++ b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu
@@ -5,6 +5,39 @@
 
 namespace caffe {
 
+template <typename Dtype>
+__global__ void SigmoidCrossEntropyLossForwardGPU(const int nthreads,
+          const Dtype* input_data, const Dtype* target, Dtype* loss) {
+  CUDA_KERNEL_LOOP(i, nthreads) {
+    loss[i] = input_data[i] * (target[i] - (input_data[i] >= 0)) -
+        log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0)));
+  }
+}
+
+template <typename Dtype>
+void SigmoidCrossEntropyLossLayer<Dtype>::Forward_gpu(
+    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
+  // The forward pass computes the sigmoid outputs.
+  sigmoid_bottom_vec_[0] = bottom[0];
+  sigmoid_layer_->Forward(sigmoid_bottom_vec_, sigmoid_top_vec_);
+  // Compute the loss (negative log likelihood)
+  const int count = bottom[0]->count();
+  const int num = bottom[0]->num();
+  // Stable version of loss computation from input data
+  const Dtype* input_data = bottom[0]->gpu_data();
+  const Dtype* target = bottom[1]->gpu_data();
+  // Since this memory is not used for anything until it is overwritten
+  // on the backward pass, we use it here to avoid having to allocate new GPU
+  // memory to accumulate intermediate results in the kernel.
+  Dtype* loss_data = bottom[0]->mutable_gpu_diff();
+  // NOLINT_NEXT_LINE(whitespace/operators)
+  SigmoidCrossEntropyLossForwardGPU<Dtype><<<CAFFE_GET_BLOCKS(count),
+      CAFFE_CUDA_NUM_THREADS>>>(count, input_data, target, loss_data);
+  Dtype loss;
+  caffe_gpu_asum(count, loss_data, &loss);
+  top[0]->mutable_cpu_data()[0] = loss / num;
+}
+
 template <typename Dtype>
 void SigmoidCrossEntropyLossLayer<Dtype>::Backward_gpu(
     const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,
@@ -28,7 +61,6 @@ void SigmoidCrossEntropyLossLayer<Dtype>::Backward_gpu(
   }
 }
 
-INSTANTIATE_LAYER_GPU_BACKWARD(SigmoidCrossEntropyLossLayer);
-
+INSTANTIATE_LAYER_GPU_FUNCS(SigmoidCrossEntropyLossLayer);
 
 }  // namespace caffe

From f59dc97b090259f54801d620b6b10ad1fb1542e2 Mon Sep 17 00:00:00 2001
From: nihui <shuizhuyuanluo@126.com>
Date: Tue, 1 Nov 2016 14:02:52 +0800
Subject: [PATCH 140/264] add the missing star in comment

a trival commit which adds the missing star ;)
---
 src/caffe/layers/rnn_layer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/caffe/layers/rnn_layer.cpp b/src/caffe/layers/rnn_layer.cpp
index f62ae8c77de..8c2fa22e598 100644
--- a/src/caffe/layers/rnn_layer.cpp
+++ b/src/caffe/layers/rnn_layer.cpp
@@ -215,7 +215,7 @@ void RNNLayer<Dtype>::FillUnrolledNet(NetParameter* net_param) const {
     }
 
     // Add layers to compute
-    //     o_t := \tanh( W_ho h_t + b_o)
+    //     o_t := \tanh( W_ho * h_t + b_o)
     //          = \tanh( W_ho_h_t )
     {
       LayerParameter* o_neuron_param = net_param->add_layer();

From 0d20df51901550f1b7eb2d56e0a84df5d6e2f029 Mon Sep 17 00:00:00 2001
From: baecchi <claudio.baecchi@gmail.com>
Date: Tue, 1 Nov 2016 16:15:51 +0100
Subject: [PATCH 141/264] corrected typo in accuracy_layer.hpp: MaxTopBlos ->
 MaxTopBlobs

---
 include/caffe/layers/accuracy_layer.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/caffe/layers/accuracy_layer.hpp b/include/caffe/layers/accuracy_layer.hpp
index fe2adb939e4..a9ad3225149 100644
--- a/include/caffe/layers/accuracy_layer.hpp
+++ b/include/caffe/layers/accuracy_layer.hpp
@@ -39,7 +39,7 @@ class AccuracyLayer : public Layer<Dtype> {
   // If there are two top blobs, then the second blob will contain
   // accuracies per class.
   virtual inline int MinTopBlobs() const { return 1; }
-  virtual inline int MaxTopBlos() const { return 2; }
+  virtual inline int MaxTopBlobs() const { return 2; }
 
  protected:
   /**

From 3b443eacb30d8f4b3e551707faeebeeb15e77960 Mon Sep 17 00:00:00 2001
From: "Jonathan R. Williford" <williford-nin566@neural.vision>
Date: Fri, 28 Oct 2016 10:39:44 +0200
Subject: [PATCH 142/264] Add Github issue template to curb misuse.

For information on Github issue templates, see:
https://github.com/blog/2111-issue-and-pull-request-templates

The template has been revised according to discussion with @shelhamer
and @willyd on pull request BVLC/caffe#4914.
---
 .github/ISSUE_TEMPLATE.md | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 .github/ISSUE_TEMPLATE.md

diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
new file mode 100644
index 00000000000..d78a3dc3455
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE.md
@@ -0,0 +1,19 @@
+Please use the [caffe-users list](https://groups.google.com/forum/#!forum/caffe-users) for usage, installation, or modeling questions, or other requests for help.
+_Do not post such requests to Issues._ Doing so interferes with the development of Caffe.
+
+Please read the [guidelines for contributing](https://github.com/BVLC/caffe/blob/master/CONTRIBUTING.md) before submitting this issue.
+
+### Issue summary
+
+
+### Steps to reproduce
+
+If you are having difficulty building Caffe or training a model, please ask the caffe-users mailing list. If you are reporting a build error that seems to be due to a bug in Caffe, please attach your build configuration (either Makefile.config or CMakeCache.txt) and the output of the make (or cmake) command.
+
+### Your system configuration
+Operating system:
+Compiler:
+CUDA version (if applicable):
+CUDNN version (if applicable):
+BLAS:
+Python or MATLAB version (for pycaffe and matcaffe respectively):

From 20feab5771ae5cbb257cfec85e0b98da06269068 Mon Sep 17 00:00:00 2001
From: "Jonathan R. Williford" <jonathan@neural.vision>
Date: Fri, 11 Nov 2016 07:38:14 +0000
Subject: [PATCH 143/264] Put quotes around titles in YAML front matter.

The colon produces errors unless the title is in quotes. This causes the minor
issue of the HTML title not being set.

See:
https://github.com/jekyll/jekyll/issues/549
---
 docs/install_apt.md | 2 +-
 docs/install_osx.md | 2 +-
 docs/install_yum.md | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/install_apt.md b/docs/install_apt.md
index 3de5a494e0c..e95b022761b 100644
--- a/docs/install_apt.md
+++ b/docs/install_apt.md
@@ -1,5 +1,5 @@
 ---
-title: Installation: Ubuntu
+title: "Installation: Ubuntu"
 ---
 
 # Ubuntu Installation
diff --git a/docs/install_osx.md b/docs/install_osx.md
index 6405d8ad046..a2da82f0fb2 100644
--- a/docs/install_osx.md
+++ b/docs/install_osx.md
@@ -1,5 +1,5 @@
 ---
-title: Installation: OS X
+title: "Installation: OS X"
 ---
 
 # OS X Installation
diff --git a/docs/install_yum.md b/docs/install_yum.md
index 38bf7255c93..842fbd64177 100644
--- a/docs/install_yum.md
+++ b/docs/install_yum.md
@@ -1,5 +1,5 @@
 ---
-title: Installation: RHEL / Fedora / CentOS
+title: "Installation: RHEL / Fedora / CentOS"
 ---
 
 # RHEL / Fedora / CentOS Installation

From aaf7b6b17fdded6f6489eaf84a4d336b3344c356 Mon Sep 17 00:00:00 2001
From: davidbrai <davidbrai@gmail.com>
Date: Mon, 14 Nov 2016 22:10:27 +0200
Subject: [PATCH 144/264] support solver resumes in parse_log.py

Currently parse_log.py skips all non timestamped lines
only once. When resuming a solver and appending to the
same log file, it creates more non timestamped log lines.
This change allows the script to silently skip those lines.
---
 tools/extra/parse_log.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tools/extra/parse_log.py b/tools/extra/parse_log.py
index 375b0db73b3..017306b5088 100755
--- a/tools/extra/parse_log.py
+++ b/tools/extra/parse_log.py
@@ -48,8 +48,13 @@ def parse_log(path_to_log):
                 # iteration
                 continue
 
-            time = extract_seconds.extract_datetime_from_line(line,
-                                                              logfile_year)
+            try:
+                time = extract_seconds.extract_datetime_from_line(line,
+                                                                  logfile_year)
+            except ValueError:
+                # Skip lines with bad formatting, for example when resuming solver
+                continue
+
             seconds = (time - start_time).total_seconds()
 
             learning_rate_match = regex_learning_rate.search(line)

From c6ab96596d9eae01c2c403487dc8be8e3edc8fbb Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Tue, 15 Nov 2016 11:19:37 -0800
Subject: [PATCH 145/264] sigmoid cross-entropy loss: ignore selected targets
 by `ignore_label`

sig-ce learns to ignore by zeroing out the loss/diff at targets equal to
the configured `ignore_label`.

n.b. as of now the loss/diff are not properly normalized when there are
ignored targets. sig-ce loss should adopt the same normalization options
as softmax loss.
---
 .../sigmoid_cross_entropy_loss_layer.hpp      |  5 ++++
 .../sigmoid_cross_entropy_loss_layer.cpp      | 19 +++++++++++++
 .../sigmoid_cross_entropy_loss_layer.cu       | 23 +++++++++++++++
 .../test_sigmoid_cross_entropy_loss_layer.cpp | 28 +++++++++++++++++++
 4 files changed, 75 insertions(+)

diff --git a/include/caffe/layers/sigmoid_cross_entropy_loss_layer.hpp b/include/caffe/layers/sigmoid_cross_entropy_loss_layer.hpp
index 6452ea5106a..a9fe33c8e08 100644
--- a/include/caffe/layers/sigmoid_cross_entropy_loss_layer.hpp
+++ b/include/caffe/layers/sigmoid_cross_entropy_loss_layer.hpp
@@ -105,6 +105,11 @@ class SigmoidCrossEntropyLossLayer : public LossLayer<Dtype> {
   vector<Blob<Dtype>*> sigmoid_bottom_vec_;
   /// top vector holder to call the underlying SigmoidLayer::Forward
   vector<Blob<Dtype>*> sigmoid_top_vec_;
+
+  /// Whether to ignore instances with a certain label.
+  bool has_ignore_label_;
+  /// The label indicating that an instance should be ignored.
+  int ignore_label_;
 };
 
 }  // namespace caffe
diff --git a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp
index eb77a9c2cb8..21b64c28002 100644
--- a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp
+++ b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp
@@ -14,6 +14,12 @@ void SigmoidCrossEntropyLossLayer<Dtype>::LayerSetUp(
   sigmoid_top_vec_.clear();
   sigmoid_top_vec_.push_back(sigmoid_output_.get());
   sigmoid_layer_->SetUp(sigmoid_bottom_vec_, sigmoid_top_vec_);
+
+  has_ignore_label_ =
+    this->layer_param_.loss_param().has_ignore_label();
+  if (has_ignore_label_) {
+    ignore_label_ = this->layer_param_.loss_param().ignore_label();
+  }
 }
 
 template <typename Dtype>
@@ -39,6 +45,10 @@ void SigmoidCrossEntropyLossLayer<Dtype>::Forward_cpu(
   const Dtype* target = bottom[1]->cpu_data();
   Dtype loss = 0;
   for (int i = 0; i < count; ++i) {
+    const int target_value = static_cast<int>(target[i]);
+    if (has_ignore_label_ && target_value == ignore_label_) {
+      continue;
+    }
     loss -= input_data[i] * (target[i] - (input_data[i] >= 0)) -
         log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0)));
   }
@@ -64,6 +74,15 @@ void SigmoidCrossEntropyLossLayer<Dtype>::Backward_cpu(
     // Scale down gradient
     const Dtype loss_weight = top[0]->cpu_diff()[0];
     caffe_scal(count, loss_weight / num, bottom_diff);
+    // Zero out gradient of ignored targets.
+    if (has_ignore_label_) {
+      for (int i = 0; i < count; ++i) {
+        const int target_value = static_cast<int>(target[i]);
+        if (target_value == ignore_label_) {
+          bottom_diff[i] = 0;
+        }
+      }
+    }
   }
 }
 
diff --git a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu
index 7cb982d2d70..39eb050664b 100644
--- a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu
+++ b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu
@@ -14,6 +14,17 @@ __global__ void SigmoidCrossEntropyLossForwardGPU(const int nthreads,
   }
 }
 
+template <typename Dtype>
+__global__ void SigmoidCrossEntropyLossIgnoreGPU(const int count,
+    const int ignore_label, const Dtype* target, Dtype* reference) {
+  CUDA_KERNEL_LOOP(index, count) {
+    const int target_value = static_cast<int>(target[index]);
+    if (target_value == ignore_label) {
+      reference[index] = 0;
+    }
+  }
+}
+
 template <typename Dtype>
 void SigmoidCrossEntropyLossLayer<Dtype>::Forward_gpu(
     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
@@ -33,6 +44,12 @@ void SigmoidCrossEntropyLossLayer<Dtype>::Forward_gpu(
   // NOLINT_NEXT_LINE(whitespace/operators)
   SigmoidCrossEntropyLossForwardGPU<Dtype><<<CAFFE_GET_BLOCKS(count),
       CAFFE_CUDA_NUM_THREADS>>>(count, input_data, target, loss_data);
+  // Zero out loss of ignored targets.
+  if (has_ignore_label_) {
+    // NOLINT_NEXT_LINE(whitespace/operators)
+    SigmoidCrossEntropyLossIgnoreGPU<Dtype><<<CAFFE_GET_BLOCKS(count),
+      CAFFE_CUDA_NUM_THREADS>>>(count, ignore_label_, target, loss_data);
+  }
   Dtype loss;
   caffe_gpu_asum(count, loss_data, &loss);
   top[0]->mutable_cpu_data()[0] = loss / num;
@@ -58,6 +75,12 @@ void SigmoidCrossEntropyLossLayer<Dtype>::Backward_gpu(
     // Scale down gradient
     const Dtype loss_weight = top[0]->cpu_diff()[0];
     caffe_gpu_scal(count, loss_weight / num, bottom_diff);
+    // Zero out gradient of ignored targets.
+    if (has_ignore_label_) {
+      // NOLINT_NEXT_LINE(whitespace/operators)
+      SigmoidCrossEntropyLossIgnoreGPU<Dtype><<<CAFFE_GET_BLOCKS(count),
+        CAFFE_CUDA_NUM_THREADS>>>(count, ignore_label_, target, bottom_diff);
+    }
   }
 }
 
diff --git a/src/caffe/test/test_sigmoid_cross_entropy_loss_layer.cpp b/src/caffe/test/test_sigmoid_cross_entropy_loss_layer.cpp
index 5dfd7656db2..1bd5f93796f 100644
--- a/src/caffe/test/test_sigmoid_cross_entropy_loss_layer.cpp
+++ b/src/caffe/test/test_sigmoid_cross_entropy_loss_layer.cpp
@@ -116,5 +116,33 @@ TYPED_TEST(SigmoidCrossEntropyLossLayerTest, TestGradient) {
       this->blob_top_vec_, 0);
 }
 
+TYPED_TEST(SigmoidCrossEntropyLossLayerTest, TestIgnoreGradient) {
+  typedef typename TypeParam::Dtype Dtype;
+  FillerParameter data_filler_param;
+  data_filler_param.set_std(1);
+  GaussianFiller<Dtype> data_filler(data_filler_param);
+  data_filler.Fill(this->blob_bottom_data_);
+  LayerParameter layer_param;
+  LossParameter* loss_param = layer_param.mutable_loss_param();
+  loss_param->set_ignore_label(-1);
+  Dtype* target = this->blob_bottom_targets_->mutable_cpu_data();
+  const int count = this->blob_bottom_targets_->count();
+  // Ignore half of targets, then check that diff of this half is zero,
+  // while the other half is nonzero.
+  caffe_set(count / 2, Dtype(-1), target);
+  SigmoidCrossEntropyLossLayer<Dtype> layer(layer_param);
+  layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+  layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+  vector<bool> propagate_down(2);
+  propagate_down[0] = true;
+  propagate_down[1] = false;
+  layer.Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_);
+  const Dtype* diff = this->blob_bottom_data_->cpu_diff();
+  for (int i = 0; i < count / 2; ++i) {
+    EXPECT_FLOAT_EQ(diff[i], 0.);
+    EXPECT_NE(diff[i + count / 2], 0.);
+  }
+}
+
 
 }  // namespace caffe

From 6486a7b795b70fa9a6597e975577d3ec9cc146bd Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Wed, 16 Nov 2016 13:15:06 -0800
Subject: [PATCH 146/264] docs: Guillaume Dumont is the Windows maintainer

---
 docs/installation.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/installation.md b/docs/installation.md
index 4aac7c42d27..edfddcfbbe8 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -14,7 +14,7 @@ The official Makefile and `Makefile.config` build are complemented by a [communi
 - [Ubuntu installation](install_apt.html) *the standard platform*
 - [OS X installation](install_osx.html)
 - [RHEL / CentOS / Fedora installation](install_yum.html)
-- [Windows](https://github.com/BVLC/caffe/tree/windows) *see the Windows branch led by Microsoft*
+- [Windows](https://github.com/BVLC/caffe/tree/windows) *see the Windows branch led by Guillaume Dumont*
 - [OpenCL](https://github.com/BVLC/caffe/tree/opencl) *see the OpenCL branch led by Fabian Tschopp*
 
 **Overview**:

From abcb973f5840c7b97ee3c7c910556ef0c5910baf Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Wed, 16 Nov 2016 13:26:37 -0800
Subject: [PATCH 147/264] docs: include AWS AMI pointer

---
 docs/installation.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/installation.md b/docs/installation.md
index edfddcfbbe8..3254be3dfcd 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -16,6 +16,7 @@ The official Makefile and `Makefile.config` build are complemented by a [communi
 - [RHEL / CentOS / Fedora installation](install_yum.html)
 - [Windows](https://github.com/BVLC/caffe/tree/windows) *see the Windows branch led by Guillaume Dumont*
 - [OpenCL](https://github.com/BVLC/caffe/tree/opencl) *see the OpenCL branch led by Fabian Tschopp*
+- [AWS AMI](https://github.com/bitfusionio/amis/tree/master/awsmrkt-bfboost-ubuntu14-cuda75-caffe) *pre-configured for AWS*
 
 **Overview**:
 

From 3d62e3cc9da66dbf3328567d0f30d5183b318d81 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Wed, 16 Nov 2016 20:39:42 -0800
Subject: [PATCH 148/264] sigmoid cross-entropy loss: normalize loss by
 different schemes

sig-ce loss handles all the same normalizations as the softmax loss;
refer to #3296 for more detail.

this preserves the default normalization for sig-ce loss: batch size.
---
 .../sigmoid_cross_entropy_loss_layer.hpp      | 11 ++++
 .../sigmoid_cross_entropy_loss_layer.cpp      | 60 ++++++++++++++++---
 .../sigmoid_cross_entropy_loss_layer.cu       | 57 +++++++++++-------
 src/caffe/proto/caffe.proto                   |  4 +-
 4 files changed, 102 insertions(+), 30 deletions(-)

diff --git a/include/caffe/layers/sigmoid_cross_entropy_loss_layer.hpp b/include/caffe/layers/sigmoid_cross_entropy_loss_layer.hpp
index a9fe33c8e08..3d92524421c 100644
--- a/include/caffe/layers/sigmoid_cross_entropy_loss_layer.hpp
+++ b/include/caffe/layers/sigmoid_cross_entropy_loss_layer.hpp
@@ -97,6 +97,13 @@ class SigmoidCrossEntropyLossLayer : public LossLayer<Dtype> {
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 
+  /// Read the normalization mode parameter and compute the normalizer based
+  /// on the blob size.  If normalization_mode is VALID, the count of valid
+  /// outputs will be read from valid_count, unless it is -1 in which case
+  /// all outputs are assumed to be valid.
+  virtual Dtype get_normalizer(
+      LossParameter_NormalizationMode normalization_mode, int valid_count);
+
   /// The internal SigmoidLayer used to map predictions to probabilities.
   shared_ptr<SigmoidLayer<Dtype> > sigmoid_layer_;
   /// sigmoid_output stores the output of the SigmoidLayer.
@@ -110,6 +117,10 @@ class SigmoidCrossEntropyLossLayer : public LossLayer<Dtype> {
   bool has_ignore_label_;
   /// The label indicating that an instance should be ignored.
   int ignore_label_;
+  /// How to normalize the loss.
+  LossParameter_NormalizationMode normalization_;
+  Dtype normalizer_;
+  int outer_num_, inner_num_;
 };
 
 }  // namespace caffe
diff --git a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp
index 21b64c28002..99fa3eb645a 100644
--- a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp
+++ b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp
@@ -1,3 +1,4 @@
+#include <algorithm>
 #include <vector>
 
 #include "caffe/layers/sigmoid_cross_entropy_loss_layer.hpp"
@@ -20,17 +21,60 @@ void SigmoidCrossEntropyLossLayer<Dtype>::LayerSetUp(
   if (has_ignore_label_) {
     ignore_label_ = this->layer_param_.loss_param().ignore_label();
   }
+  if (this->layer_param_.loss_param().has_normalization()) {
+    normalization_ = this->layer_param_.loss_param().normalization();
+  } else if (this->layer_param_.loss_param().has_normalize()) {
+    normalization_ = this->layer_param_.loss_param().normalize() ?
+                     LossParameter_NormalizationMode_VALID :
+                     LossParameter_NormalizationMode_BATCH_SIZE;
+  } else {
+    normalization_ = LossParameter_NormalizationMode_BATCH_SIZE;
+  }
 }
 
 template <typename Dtype>
 void SigmoidCrossEntropyLossLayer<Dtype>::Reshape(
     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
   LossLayer<Dtype>::Reshape(bottom, top);
+  outer_num_ = bottom[0]->shape(0);  // batch size
+  inner_num_ = bottom[0]->count(1);  // instance size: |output| == |target|
   CHECK_EQ(bottom[0]->count(), bottom[1]->count()) <<
       "SIGMOID_CROSS_ENTROPY_LOSS layer inputs must have the same count.";
   sigmoid_layer_->Reshape(sigmoid_bottom_vec_, sigmoid_top_vec_);
 }
 
+// TODO(shelhamer) loss normalization should be pulled up into LossLayer,
+// instead of duplicated here and in SoftMaxWithLossLayer
+template <typename Dtype>
+Dtype SigmoidCrossEntropyLossLayer<Dtype>::get_normalizer(
+    LossParameter_NormalizationMode normalization_mode, int valid_count) {
+  Dtype normalizer;
+  switch (normalization_mode) {
+    case LossParameter_NormalizationMode_FULL:
+      normalizer = Dtype(outer_num_ * inner_num_);
+      break;
+    case LossParameter_NormalizationMode_VALID:
+      if (valid_count == -1) {
+        normalizer = Dtype(outer_num_ * inner_num_);
+      } else {
+        normalizer = Dtype(valid_count);
+      }
+      break;
+    case LossParameter_NormalizationMode_BATCH_SIZE:
+      normalizer = Dtype(outer_num_);
+      break;
+    case LossParameter_NormalizationMode_NONE:
+      normalizer = Dtype(1);
+      break;
+    default:
+      LOG(FATAL) << "Unknown normalization mode: "
+          << LossParameter_NormalizationMode_Name(normalization_mode);
+  }
+  // Some users will have no labels for some examples in order to 'turn off' a
+  // particular loss in a multi-task setup. The max prevents NaNs in that case.
+  return std::max(Dtype(1.0), normalizer);
+}
+
 template <typename Dtype>
 void SigmoidCrossEntropyLossLayer<Dtype>::Forward_cpu(
     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
@@ -38,21 +82,22 @@ void SigmoidCrossEntropyLossLayer<Dtype>::Forward_cpu(
   sigmoid_bottom_vec_[0] = bottom[0];
   sigmoid_layer_->Forward(sigmoid_bottom_vec_, sigmoid_top_vec_);
   // Compute the loss (negative log likelihood)
-  const int count = bottom[0]->count();
-  const int num = bottom[0]->num();
   // Stable version of loss computation from input data
   const Dtype* input_data = bottom[0]->cpu_data();
   const Dtype* target = bottom[1]->cpu_data();
+  int valid_count = 0;
   Dtype loss = 0;
-  for (int i = 0; i < count; ++i) {
+  for (int i = 0; i < bottom[0]->count(); ++i) {
     const int target_value = static_cast<int>(target[i]);
     if (has_ignore_label_ && target_value == ignore_label_) {
       continue;
     }
     loss -= input_data[i] * (target[i] - (input_data[i] >= 0)) -
         log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0)));
+    ++valid_count;
   }
-  top[0]->mutable_cpu_data()[0] = loss / num;
+  normalizer_ = get_normalizer(normalization_, valid_count);
+  top[0]->mutable_cpu_data()[0] = loss / normalizer_;
 }
 
 template <typename Dtype>
@@ -66,14 +111,10 @@ void SigmoidCrossEntropyLossLayer<Dtype>::Backward_cpu(
   if (propagate_down[0]) {
     // First, compute the diff
     const int count = bottom[0]->count();
-    const int num = bottom[0]->num();
     const Dtype* sigmoid_output_data = sigmoid_output_->cpu_data();
     const Dtype* target = bottom[1]->cpu_data();
     Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
     caffe_sub(count, sigmoid_output_data, target, bottom_diff);
-    // Scale down gradient
-    const Dtype loss_weight = top[0]->cpu_diff()[0];
-    caffe_scal(count, loss_weight / num, bottom_diff);
     // Zero out gradient of ignored targets.
     if (has_ignore_label_) {
       for (int i = 0; i < count; ++i) {
@@ -83,6 +124,9 @@ void SigmoidCrossEntropyLossLayer<Dtype>::Backward_cpu(
         }
       }
     }
+    // Scale down gradient
+    Dtype loss_weight = top[0]->cpu_diff()[0] / normalizer_;
+    caffe_scal(count, loss_weight, bottom_diff);
   }
 }
 
diff --git a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu
index 39eb050664b..b9877e6a3f6 100644
--- a/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu
+++ b/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu
@@ -5,26 +5,38 @@
 
 namespace caffe {
 
+
 template <typename Dtype>
 __global__ void SigmoidCrossEntropyLossForwardGPU(const int nthreads,
-          const Dtype* input_data, const Dtype* target, Dtype* loss) {
+          const Dtype* input_data, const Dtype* target, Dtype* loss,
+          const bool has_ignore_label_, const int ignore_label_,
+          Dtype* counts) {
   CUDA_KERNEL_LOOP(i, nthreads) {
-    loss[i] = input_data[i] * (target[i] - (input_data[i] >= 0)) -
-        log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0)));
+    const int target_value = static_cast<int>(target[i]);
+    if (has_ignore_label_ && target_value == ignore_label_) {
+      loss[i] = 0;
+      counts[i] = 0;
+    } else {
+      loss[i] = input_data[i] * (target[i] - (input_data[i] >= 0)) -
+          log(1 + exp(input_data[i] - 2 * input_data[i] *
+          (input_data[i] >= 0)));
+      counts[i] = 1;
+    }
   }
 }
 
 template <typename Dtype>
-__global__ void SigmoidCrossEntropyLossIgnoreGPU(const int count,
-    const int ignore_label, const Dtype* target, Dtype* reference) {
-  CUDA_KERNEL_LOOP(index, count) {
-    const int target_value = static_cast<int>(target[index]);
+__global__ void SigmoidCrossEntropyLossIgnoreDiffGPU(const int count,
+    const int ignore_label, const Dtype* target, Dtype* diff) {
+  CUDA_KERNEL_LOOP(i, count) {
+    const int target_value = static_cast<int>(target[i]);
     if (target_value == ignore_label) {
-      reference[index] = 0;
+      diff[i] = 0;
     }
   }
 }
 
+
 template <typename Dtype>
 void SigmoidCrossEntropyLossLayer<Dtype>::Forward_gpu(
     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
@@ -33,7 +45,6 @@ void SigmoidCrossEntropyLossLayer<Dtype>::Forward_gpu(
   sigmoid_layer_->Forward(sigmoid_bottom_vec_, sigmoid_top_vec_);
   // Compute the loss (negative log likelihood)
   const int count = bottom[0]->count();
-  const int num = bottom[0]->num();
   // Stable version of loss computation from input data
   const Dtype* input_data = bottom[0]->gpu_data();
   const Dtype* target = bottom[1]->gpu_data();
@@ -41,18 +52,23 @@ void SigmoidCrossEntropyLossLayer<Dtype>::Forward_gpu(
   // on the backward pass, we use it here to avoid having to allocate new GPU
   // memory to accumulate intermediate results in the kernel.
   Dtype* loss_data = bottom[0]->mutable_gpu_diff();
+  Dtype* count_data = bottom[1]->mutable_gpu_diff();
+  Dtype valid_count;
   // NOLINT_NEXT_LINE(whitespace/operators)
   SigmoidCrossEntropyLossForwardGPU<Dtype><<<CAFFE_GET_BLOCKS(count),
-      CAFFE_CUDA_NUM_THREADS>>>(count, input_data, target, loss_data);
-  // Zero out loss of ignored targets.
-  if (has_ignore_label_) {
-    // NOLINT_NEXT_LINE(whitespace/operators)
-    SigmoidCrossEntropyLossIgnoreGPU<Dtype><<<CAFFE_GET_BLOCKS(count),
-      CAFFE_CUDA_NUM_THREADS>>>(count, ignore_label_, target, loss_data);
+      CAFFE_CUDA_NUM_THREADS>>>(count, input_data, target, loss_data,
+      has_ignore_label_, ignore_label_, count_data);
+  // Only launch another CUDA kernel if we actually need the valid count.
+  if (normalization_ == LossParameter_NormalizationMode_VALID &&
+      has_ignore_label_) {
+    caffe_gpu_asum(count, count_data, &valid_count);
+  } else {
+    valid_count = count;
   }
   Dtype loss;
   caffe_gpu_asum(count, loss_data, &loss);
-  top[0]->mutable_cpu_data()[0] = loss / num;
+  normalizer_ = get_normalizer(normalization_, valid_count);
+  top[0]->mutable_cpu_data()[0] = loss / normalizer_;
 }
 
 template <typename Dtype>
@@ -66,21 +82,20 @@ void SigmoidCrossEntropyLossLayer<Dtype>::Backward_gpu(
   if (propagate_down[0]) {
     // First, compute the diff
     const int count = bottom[0]->count();
-    const int num = bottom[0]->num();
     const Dtype* sigmoid_output_data = sigmoid_output_->gpu_data();
     const Dtype* target = bottom[1]->gpu_data();
     Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
     caffe_copy(count, sigmoid_output_data, bottom_diff);
     caffe_gpu_axpy(count, Dtype(-1), target, bottom_diff);
-    // Scale down gradient
-    const Dtype loss_weight = top[0]->cpu_diff()[0];
-    caffe_gpu_scal(count, loss_weight / num, bottom_diff);
     // Zero out gradient of ignored targets.
     if (has_ignore_label_) {
       // NOLINT_NEXT_LINE(whitespace/operators)
-      SigmoidCrossEntropyLossIgnoreGPU<Dtype><<<CAFFE_GET_BLOCKS(count),
+      SigmoidCrossEntropyLossIgnoreDiffGPU<Dtype><<<CAFFE_GET_BLOCKS(count),
         CAFFE_CUDA_NUM_THREADS>>>(count, ignore_label_, target, bottom_diff);
     }
+    // Scale down gradient
+    Dtype loss_weight = top[0]->cpu_diff()[0] / normalizer_;
+    caffe_gpu_scal(count, loss_weight, bottom_diff);
   }
 }
 
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 6940a705eb6..0b2768b7708 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -434,7 +434,7 @@ message LossParameter {
   optional int32 ignore_label = 1;
   // How to normalize the loss for loss layers that aggregate across batches,
   // spatial dimensions, or other dimensions.  Currently only implemented in
-  // SoftmaxWithLoss layer.
+  // SoftmaxWithLoss and SigmoidCrossEntropyLoss layers.
   enum NormalizationMode {
     // Divide by the number of examples in the batch times spatial dimensions.
     // Outputs that receive the ignore label will NOT be ignored in computing
@@ -448,6 +448,8 @@ message LossParameter {
     // Do not normalize the loss.
     NONE = 3;
   }
+  // For historical reasons, the default normalization for
+  // SigmoidCrossEntropyLoss is BATCH_SIZE and *not* VALID.
   optional NormalizationMode normalization = 3 [default = VALID];
   // Deprecated.  Ignored if normalization is specified.  If normalization
   // is not specified, then setting this to false will be equivalent to

From 2cf9dd3750073ce8a119f4a71cc41eeef63e0748 Mon Sep 17 00:00:00 2001
From: chenzy <chenzeyuczy@qq.com>
Date: Fri, 18 Nov 2016 10:28:13 +0800
Subject: [PATCH 149/264] Add missing spaces besides equal signs in
 batch_norm_layer.cpp

---
 src/caffe/layers/batch_norm_layer.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/caffe/layers/batch_norm_layer.cpp b/src/caffe/layers/batch_norm_layer.cpp
index e661abb11b8..0a08ed4cb07 100644
--- a/src/caffe/layers/batch_norm_layer.cpp
+++ b/src/caffe/layers/batch_norm_layer.cpp
@@ -27,7 +27,7 @@ void BatchNormLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
     sz.push_back(channels_);
     this->blobs_[0].reset(new Blob<Dtype>(sz));
     this->blobs_[1].reset(new Blob<Dtype>(sz));
-    sz[0]=1;
+    sz[0] = 1;
     this->blobs_[2].reset(new Blob<Dtype>(sz));
     for (int i = 0; i < 3; ++i) {
       caffe_set(this->blobs_[i]->count(), Dtype(0),
@@ -61,7 +61,7 @@ void BatchNormLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
   variance_.Reshape(sz);
   temp_.ReshapeLike(*bottom[0]);
   x_norm_.ReshapeLike(*bottom[0]);
-  sz[0]=bottom[0]->shape(0);
+  sz[0] = bottom[0]->shape(0);
   batch_sum_multiplier_.Reshape(sz);
 
   int spatial_dim = bottom[0]->count()/(channels_*bottom[0]->shape(0));

From e52451de914312b80a83459cb160c2f72a5b4fea Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Mon, 21 Nov 2016 09:35:57 -0800
Subject: [PATCH 150/264] solver: check and set type to reconcile class and
 proto

the solver checks its proto type (SolverParameter.type) on
instantiation:

- if the proto type is unspecified it's set according to the class type
  `Solver::type()`
- if the proto type and class type conflict, the solver dies loudly

this helps avoid accidental instantiation of a different solver type
than intended when the solver def and class differ. guaranteed type
information in the SolverParameter will simplify multi-solver
coordination too.
---
 include/caffe/solver.hpp                      |  2 ++
 src/caffe/solver.cpp                          | 12 ++++++++++++
 src/caffe/test/test_gradient_based_solver.cpp |  5 +++++
 3 files changed, 19 insertions(+)

diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp
index eafcee32904..ef38d6e45b5 100644
--- a/include/caffe/solver.hpp
+++ b/include/caffe/solver.hpp
@@ -108,6 +108,8 @@ class Solver {
   virtual void RestoreSolverStateFromBinaryProto(const string& state_file) = 0;
   void DisplayOutputBlobs(const int net_id);
   void UpdateSmoothedLoss(Dtype loss, int start_iter, int average_loss);
+  /// Harmonize solver class type with configured proto type.
+  void CheckType(SolverParameter* param);
 
   SolverParameter param_;
   int iter_;
diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp
index ece3913e88a..ae6a5a364b5 100644
--- a/src/caffe/solver.cpp
+++ b/src/caffe/solver.cpp
@@ -38,9 +38,21 @@ Solver<Dtype>::Solver(const string& param_file, const Solver* root_solver)
       requested_early_exit_(false) {
   SolverParameter param;
   ReadSolverParamsFromTextFileOrDie(param_file, &param);
+  CheckType(&param);
   Init(param);
 }
 
+template <typename Dtype>
+void Solver<Dtype>::CheckType(SolverParameter* param) {
+  // Harmonize solver class type with configured type to avoid confusion.
+  if (param->has_type()) {
+    CHECK_EQ(param->type(), this->type())
+        << "Solver type must agree with instantiated solver class.";
+  } else {
+    param->set_type(this->type());
+  }
+}
+
 template <typename Dtype>
 void Solver<Dtype>::Init(const SolverParameter& param) {
   CHECK(Caffe::root_solver() || root_solver_)
diff --git a/src/caffe/test/test_gradient_based_solver.cpp b/src/caffe/test/test_gradient_based_solver.cpp
index 975a8f0f88a..e81caea2548 100644
--- a/src/caffe/test/test_gradient_based_solver.cpp
+++ b/src/caffe/test/test_gradient_based_solver.cpp
@@ -694,6 +694,11 @@ TYPED_TEST(SGDSolverTest, TestSnapshotShare) {
   }
 }
 
+TYPED_TEST(SGDSolverTest, TestSolverType) {
+  this->TestLeastSquaresUpdate();
+  EXPECT_NE(this->solver_->type(), string(""));
+  EXPECT_EQ(this->solver_->type(), this->solver_->param().type());
+}
 
 template <typename TypeParam>
 class AdaGradSolverTest : public GradientBasedSolverTest<TypeParam> {

From 48e73c780295e56699ad71232a24c8b459c8fe01 Mon Sep 17 00:00:00 2001
From: Zylphrex <Zylphrex@users.noreply.github.com>
Date: Mon, 21 Nov 2016 13:11:34 -0500
Subject: [PATCH 151/264] Checks inside Xcode for latest OSX SDK (#4840)

OS X: build with latest SDK by default
---
 Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index 24894062a6c..ccc4d8b9e6a 100644
--- a/Makefile
+++ b/Makefile
@@ -192,12 +192,12 @@ ifeq ($(USE_LMDB), 1)
 	LIBRARIES += lmdb
 endif
 ifeq ($(USE_OPENCV), 1)
-	LIBRARIES += opencv_core opencv_highgui opencv_imgproc 
+	LIBRARIES += opencv_core opencv_highgui opencv_imgproc
 
 	ifeq ($(OPENCV_VERSION), 3)
 		LIBRARIES += opencv_imgcodecs
 	endif
-		
+
 endif
 PYTHON_LIBRARIES ?= boost_python python2.7
 WARNINGS := -Wall -Wno-sign-compare
@@ -385,7 +385,7 @@ else
 		XCODE_CLT_GEQ_7 := $(shell [ $(XCODE_CLT_VER) -gt 6 ] && echo 1)
 		XCODE_CLT_GEQ_6 := $(shell [ $(XCODE_CLT_VER) -gt 5 ] && echo 1)
 		ifeq ($(XCODE_CLT_GEQ_7), 1)
-			BLAS_INCLUDE ?= /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.11.sdk/System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/Headers
+			BLAS_INCLUDE ?= /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/$(shell ls /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/ | sort | tail -1)/System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/Headers
 		else ifeq ($(XCODE_CLT_GEQ_6), 1)
 			BLAS_INCLUDE ?= /System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers/
 			LDFLAGS += -framework Accelerate

From db6cf0a728cad63c93b345f2203f3ad1f5d5c2f4 Mon Sep 17 00:00:00 2001
From: Nico Galoppo <nico.galoppo@intel.com>
Date: Mon, 21 Nov 2016 11:03:52 -0800
Subject: [PATCH 152/264] Fix Python net drawing script

---
 python/caffe/draw.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/caffe/draw.py b/python/caffe/draw.py
index 9eecf6d7b46..e4fd7aacce7 100644
--- a/python/caffe/draw.py
+++ b/python/caffe/draw.py
@@ -104,11 +104,11 @@ def get_layer_label(layer, rankdir):
                       pooling_types_dict[layer.pooling_param.pool],
                       layer.type,
                       separator,
-                      layer.pooling_param.kernel_size,
+                      layer.pooling_param.kernel_size[0] if len(layer.pooling_param.kernel_size._values) else 1,
                       separator,
-                      layer.pooling_param.stride,
+                      layer.pooling_param.stride[0] if len(layer.pooling_param.stride._values) else 1,
                       separator,
-                      layer.pooling_param.pad)
+                      layer.pooling_param.pad[0] if len(layer.pooling_param.pad._values) else 0)
     else:
         node_label = '"%s%s(%s)"' % (layer.name, separator, layer.type)
     return node_label

From 2e59864d4f35bf60ddf859185f4e0d8fd940f238 Mon Sep 17 00:00:00 2001
From: hmybmny <hmybmny@gmail.com>
Date: Thu, 24 Nov 2016 18:17:13 +0800
Subject: [PATCH 153/264] fix error link

---
 docs/install_apt.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/install_apt.md b/docs/install_apt.md
index e95b022761b..bc1566b0be9 100644
--- a/docs/install_apt.md
+++ b/docs/install_apt.md
@@ -33,8 +33,8 @@ Everything is packaged in 14.04.
 These dependencies need manual installation in 12.04.
 
     # glog
-    wget https://google-glog.googlecode.com/files/glog-0.3.3.tar.gz
-    tar zxvf glog-0.3.3.tar.gz
+    wget https://github.com/google/glog/archive/v0.3.3.tar.gz
+    tar zxvf v0.3.3.tar.gz
     cd glog-0.3.3
     ./configure
     make && make install

From b644a87c842702de8291c97fa0e418797092fe41 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Fri, 25 Nov 2016 12:49:53 -0800
Subject: [PATCH 154/264] Revert "solver: check and set type to reconcile class
 and proto"

as pointed out by #5028 this does not achieve what it intended, and
furthermore causes trouble with direct solver instantiation.

revert commit e52451de914312b80a83459cb160c2f72a5b4fea
---
 include/caffe/solver.hpp                      |  2 --
 src/caffe/solver.cpp                          | 12 ------------
 src/caffe/test/test_gradient_based_solver.cpp |  5 -----
 3 files changed, 19 deletions(-)

diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp
index ef38d6e45b5..eafcee32904 100644
--- a/include/caffe/solver.hpp
+++ b/include/caffe/solver.hpp
@@ -108,8 +108,6 @@ class Solver {
   virtual void RestoreSolverStateFromBinaryProto(const string& state_file) = 0;
   void DisplayOutputBlobs(const int net_id);
   void UpdateSmoothedLoss(Dtype loss, int start_iter, int average_loss);
-  /// Harmonize solver class type with configured proto type.
-  void CheckType(SolverParameter* param);
 
   SolverParameter param_;
   int iter_;
diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp
index ae6a5a364b5..ece3913e88a 100644
--- a/src/caffe/solver.cpp
+++ b/src/caffe/solver.cpp
@@ -38,21 +38,9 @@ Solver<Dtype>::Solver(const string& param_file, const Solver* root_solver)
       requested_early_exit_(false) {
   SolverParameter param;
   ReadSolverParamsFromTextFileOrDie(param_file, &param);
-  CheckType(&param);
   Init(param);
 }
 
-template <typename Dtype>
-void Solver<Dtype>::CheckType(SolverParameter* param) {
-  // Harmonize solver class type with configured type to avoid confusion.
-  if (param->has_type()) {
-    CHECK_EQ(param->type(), this->type())
-        << "Solver type must agree with instantiated solver class.";
-  } else {
-    param->set_type(this->type());
-  }
-}
-
 template <typename Dtype>
 void Solver<Dtype>::Init(const SolverParameter& param) {
   CHECK(Caffe::root_solver() || root_solver_)
diff --git a/src/caffe/test/test_gradient_based_solver.cpp b/src/caffe/test/test_gradient_based_solver.cpp
index e81caea2548..975a8f0f88a 100644
--- a/src/caffe/test/test_gradient_based_solver.cpp
+++ b/src/caffe/test/test_gradient_based_solver.cpp
@@ -694,11 +694,6 @@ TYPED_TEST(SGDSolverTest, TestSnapshotShare) {
   }
 }
 
-TYPED_TEST(SGDSolverTest, TestSolverType) {
-  this->TestLeastSquaresUpdate();
-  EXPECT_NE(this->solver_->type(), string(""));
-  EXPECT_EQ(this->solver_->type(), this->solver_->param().type());
-}
 
 template <typename TypeParam>
 class AdaGradSolverTest : public GradientBasedSolverTest<TypeParam> {

From db6643232cc95ba79f2a21ad98ef15725ee576d6 Mon Sep 17 00:00:00 2001
From: Zhou Mo <cdluminate@gmail.com>
Date: Sun, 27 Nov 2016 09:13:42 +0000
Subject: [PATCH 155/264] fix many typos by using codespell

---
 cmake/Targets.cmake                          | 2 +-
 examples/02-fine-tuning.ipynb                | 2 +-
 examples/mnist/train_lenet_docker.sh         | 2 +-
 examples/pycaffe/tools.py                    | 4 ++--
 matlab/+caffe/private/caffe_.cpp             | 2 +-
 matlab/CMakeLists.txt                        | 2 +-
 scripts/cpp_lint.py                          | 6 +++---
 src/caffe/layers/crop_layer.cpp              | 2 +-
 src/caffe/layers/crop_layer.cu               | 2 +-
 src/caffe/layers/hdf5_data_layer.cpp         | 4 ++--
 src/caffe/proto/caffe.proto                  | 4 ++--
 src/caffe/test/CMakeLists.txt                | 2 +-
 src/caffe/test/test_euclidean_loss_layer.cpp | 2 +-
 src/gtest/gtest-all.cpp                      | 4 ++--
 src/gtest/gtest.h                            | 2 +-
 tools/extra/plot_log.gnuplot.example         | 2 +-
 16 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/cmake/Targets.cmake b/cmake/Targets.cmake
index a796d00548f..2cb11584a5c 100644
--- a/cmake/Targets.cmake
+++ b/cmake/Targets.cmake
@@ -94,7 +94,7 @@ function(caffe_pickup_caffe_sources root)
   caffe_convert_absolute_paths(test_srcs)
   caffe_convert_absolute_paths(test_cuda)
 
-  # propogate to parent scope
+  # propagate to parent scope
   set(srcs ${srcs} PARENT_SCOPE)
   set(cuda ${cuda} PARENT_SCOPE)
   set(test_srcs ${test_srcs} PARENT_SCOPE)
diff --git a/examples/02-fine-tuning.ipynb b/examples/02-fine-tuning.ipynb
index 07ca8df4d74..f44eaf9a4de 100644
--- a/examples/02-fine-tuning.ipynb
+++ b/examples/02-fine-tuning.ipynb
@@ -1141,7 +1141,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "So we did finetuning and it is awesome. Let's take a look at what kind of results we are able to get with a longer, more complete run of the style recognition dataset. Note: the below URL might be occassionally down because it is run on a research machine.\n",
+    "So we did finetuning and it is awesome. Let's take a look at what kind of results we are able to get with a longer, more complete run of the style recognition dataset. Note: the below URL might be occasionally down because it is run on a research machine.\n",
     "\n",
     "http://demo.vislab.berkeleyvision.org/"
    ]
diff --git a/examples/mnist/train_lenet_docker.sh b/examples/mnist/train_lenet_docker.sh
index 32cf1c8e4a3..e946ba0f4ad 100755
--- a/examples/mnist/train_lenet_docker.sh
+++ b/examples/mnist/train_lenet_docker.sh
@@ -25,7 +25,7 @@ set -e
 # executed.
 #
 # In order to provide additional flexibility, the following shell (environment)
-# variables can be used to controll the execution of each of the phases:
+# variables can be used to control the execution of each of the phases:
 #
 # DOWNLOAD_DATA: Enable (1) or disable (0) the downloading of the MNIST dataset
 # CREATE_LMDB: Enable (1) or disable (0) the creation of the LMDB database
diff --git a/examples/pycaffe/tools.py b/examples/pycaffe/tools.py
index 88b1834af1e..7f6c2d835fb 100644
--- a/examples/pycaffe/tools.py
+++ b/examples/pycaffe/tools.py
@@ -26,7 +26,7 @@ def set_scale(self, scale):
 
     def preprocess(self, im):
         """
-        preprocess() emulate the pre-processing occuring in the vgg16 caffe
+        preprocess() emulate the pre-processing occurring in the vgg16 caffe
         prototxt.
         """
 
@@ -75,7 +75,7 @@ def __init__(self, testnet_prototxt_path="testnet.prototxt",
         # looks:
         self.sp['display'] = '25'
         self.sp['snapshot'] = '2500'
-        self.sp['snapshot_prefix'] = '"snapshot"'  # string withing a string!
+        self.sp['snapshot_prefix'] = '"snapshot"'  # string within a string!
 
         # learning rate policy
         self.sp['lr_policy'] = '"fixed"'
diff --git a/matlab/+caffe/private/caffe_.cpp b/matlab/+caffe/private/caffe_.cpp
index 1b1b2bff861..4e466e660ff 100644
--- a/matlab/+caffe/private/caffe_.cpp
+++ b/matlab/+caffe/private/caffe_.cpp
@@ -44,7 +44,7 @@ void mxCHECK_FILE_EXIST(const char* file) {
 // The pointers to caffe::Solver and caffe::Net instances
 static vector<shared_ptr<Solver<float> > > solvers_;
 static vector<shared_ptr<Net<float> > > nets_;
-// init_key is generated at the beginning and everytime you call reset
+// init_key is generated at the beginning and every time you call reset
 static double init_key = static_cast<double>(caffe_rng_rand());
 
 /** -----------------------------------------------------------------
diff --git a/matlab/CMakeLists.txt b/matlab/CMakeLists.txt
index f420df8d412..987730d9b55 100644
--- a/matlab/CMakeLists.txt
+++ b/matlab/CMakeLists.txt
@@ -20,7 +20,7 @@ if(NOT BUILD_SHARED_LIBS AND build_using MATCHES Matlab)
   message(FATAL_ERROR "Matlab MEX interface (with default mex options file) can only be built if caffe is compiled as shared library. Please enable 'BUILD_SHARED_LIBS' in CMake. Aternativelly you can switch to Octave compiler.")
 endif()
 
-# helper function to set proper mex file extention
+# helper function to set proper mex file extension
 function(caffe_fetch_and_set_proper_mexext mexfile_variable)
   execute_process(COMMAND ${Matlab_mexext} OUTPUT_STRIP_TRAILING_WHITESPACE RESULT_VARIABLE res OUTPUT_VARIABLE ext)
   if(res MATCHES 0)
diff --git a/scripts/cpp_lint.py b/scripts/cpp_lint.py
index 14c76ecd6bf..6ec4fb76e2c 100755
--- a/scripts/cpp_lint.py
+++ b/scripts/cpp_lint.py
@@ -4460,7 +4460,7 @@ def UpdateIncludeState(filename, include_state, io=codecs):
     io: The io factory to use to read the file. Provided for testability.
 
   Returns:
-    True if a header was succesfully added. False otherwise.
+    True if a header was successfully added. False otherwise.
   """
   headerfile = None
   try:
@@ -4532,7 +4532,7 @@ def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error,
   # Let's copy the include_state so it is only messed up within this function.
   include_state = include_state.copy()
 
-  # Did we find the header for this file (if any) and succesfully load it?
+  # Did we find the header for this file (if any) and successfully load it?
   header_found = False
 
   # Use the absolute path so that matching works properly.
@@ -4833,7 +4833,7 @@ def ParseArguments(args):
       try:
           _valid_extensions = set(val.split(','))
       except ValueError:
-          PrintUsage('Extensions must be comma seperated list.')
+          PrintUsage('Extensions must be comma separated list.')
 
   if not filenames:
     PrintUsage('No files were specified.')
diff --git a/src/caffe/layers/crop_layer.cpp b/src/caffe/layers/crop_layer.cpp
index aecdcd63194..d36b61ca029 100644
--- a/src/caffe/layers/crop_layer.cpp
+++ b/src/caffe/layers/crop_layer.cpp
@@ -85,7 +85,7 @@ void CropLayer<Dtype>::crop_copy(const vector<Blob<Dtype>*>& bottom,
                 src_data, dest_data, is_forward);
     }
   } else {
-    // We are at the last dimensions, which is stored continously in memory
+    // We are at the last dimensions, which is stored continuously in memory
     for (int i = 0; i < top[0]->shape(cur_dim); ++i) {
       // prepare index vector reduced(red) and with offsets(off)
       std::vector<int> ind_red(cur_dim, 0);
diff --git a/src/caffe/layers/crop_layer.cu b/src/caffe/layers/crop_layer.cu
index f78cecbbeee..6ea32d21c28 100644
--- a/src/caffe/layers/crop_layer.cu
+++ b/src/caffe/layers/crop_layer.cu
@@ -39,7 +39,7 @@ void CropLayer<Dtype>::crop_copy_gpu(const vector<Blob<Dtype>*>& bottom,
                 src_data, dest_data, is_forward);
     }
   } else {
-    // We are at the last two dimensions, which are stored continously in memory
+    // We are at the last two dimensions, which are stored continuously in memory
     // With (N,C,H,W)
     //      (0,1,2,3) cur_dim   -> H
     //                cur_dim+1 -> W
diff --git a/src/caffe/layers/hdf5_data_layer.cpp b/src/caffe/layers/hdf5_data_layer.cpp
index 2f13dc641df..c957451ae1e 100644
--- a/src/caffe/layers/hdf5_data_layer.cpp
+++ b/src/caffe/layers/hdf5_data_layer.cpp
@@ -61,10 +61,10 @@ void HDF5DataLayer<Dtype>::LoadHDF5FileData(const char* filename) {
   // Shuffle if needed.
   if (this->layer_param_.hdf5_data_param().shuffle()) {
     std::random_shuffle(data_permutation_.begin(), data_permutation_.end());
-    DLOG(INFO) << "Successully loaded " << hdf_blobs_[0]->shape(0)
+    DLOG(INFO) << "Successfully loaded " << hdf_blobs_[0]->shape(0)
                << " rows (shuffled)";
   } else {
-    DLOG(INFO) << "Successully loaded " << hdf_blobs_[0]->shape(0) << " rows";
+    DLOG(INFO) << "Successfully loaded " << hdf_blobs_[0]->shape(0) << " rows";
   }
 }
 
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 0b2768b7708..430a0dea109 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -418,7 +418,7 @@ message TransformationParameter {
   optional uint32 crop_size = 3 [default = 0];
   // mean_file and mean_value cannot be specified at the same time
   optional string mean_file = 4;
-  // if specified can be repeated once (would substract it from all the channels)
+  // if specified can be repeated once (would subtract it from all the channels)
   // or can be repeated the same number of times as channels
   // (would subtract them from the corresponding channel)
   repeated float mean_value = 5;
@@ -1396,6 +1396,6 @@ message PReLUParameter {
 
   // Initial value of a_i. Default is a_i=0.25 for all i.
   optional FillerParameter filler = 1;
-  // Whether or not slope paramters are shared across channels.
+  // Whether or not slope parameters are shared across channels.
   optional bool channel_shared = 2 [default = false];
 }
diff --git a/src/caffe/test/CMakeLists.txt b/src/caffe/test/CMakeLists.txt
index 35a803f2f41..d8afc30b76b 100644
--- a/src/caffe/test/CMakeLists.txt
+++ b/src/caffe/test/CMakeLists.txt
@@ -1,7 +1,7 @@
 # The option allows to include in build only selected test files and exclude all others
 # Usage example:
 #  cmake -DBUILD_only_tests="common,net,blob,im2col_kernel"
-set(BUILD_only_tests "" CACHE STRING "Blank or comma-separated list of test files to build without 'test_' prefix and extention")
+set(BUILD_only_tests "" CACHE STRING "Blank or comma-separated list of test files to build without 'test_' prefix and extension")
 caffe_leave_only_selected_tests(test_srcs ${BUILD_only_tests})
 caffe_leave_only_selected_tests(test_cuda ${BUILD_only_tests})
 
diff --git a/src/caffe/test/test_euclidean_loss_layer.cpp b/src/caffe/test/test_euclidean_loss_layer.cpp
index f253f9fd393..b026f5b2077 100644
--- a/src/caffe/test/test_euclidean_loss_layer.cpp
+++ b/src/caffe/test/test_euclidean_loss_layer.cpp
@@ -39,7 +39,7 @@ class EuclideanLossLayerTest : public MultiDeviceTest<TypeParam> {
 
   void TestForward() {
     // Get the loss without a specified objective weight -- should be
-    // equivalent to explicitly specifiying a weight of 1.
+    // equivalent to explicitly specifying a weight of 1.
     LayerParameter layer_param;
     EuclideanLossLayer<Dtype> layer_weight_1(layer_param);
     layer_weight_1.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
diff --git a/src/gtest/gtest-all.cpp b/src/gtest/gtest-all.cpp
index 926197419fc..81cdb578cd5 100644
--- a/src/gtest/gtest-all.cpp
+++ b/src/gtest/gtest-all.cpp
@@ -2697,7 +2697,7 @@ AssertionResult IsHRESULTFailure(const char* expr, long hr) {  // NOLINT
 // Utility functions for encoding Unicode text (wide strings) in
 // UTF-8.
 
-// A Unicode code-point can have upto 21 bits, and is encoded in UTF-8
+// A Unicode code-point can have up to 21 bits, and is encoded in UTF-8
 // like this:
 //
 // Code-point length   Encoding
@@ -7550,7 +7550,7 @@ FilePath FilePath::RemoveExtension(const char* extension) const {
   return *this;
 }
 
-// Returns a pointer to the last occurence of a valid path separator in
+// Returns a pointer to the last occurrence of a valid path separator in
 // the FilePath. On Windows, for example, both '/' and '\' are valid path
 // separators. Returns NULL if no path separator was found.
 const char* FilePath::FindLastPathSeparator() const {
diff --git a/src/gtest/gtest.h b/src/gtest/gtest.h
index 3143bd67996..124fb2321f9 100644
--- a/src/gtest/gtest.h
+++ b/src/gtest/gtest.h
@@ -3395,7 +3395,7 @@ class GTEST_API_ FilePath {
 
   void Normalize();
 
-  // Returns a pointer to the last occurence of a valid path separator in
+  // Returns a pointer to the last occurrence of a valid path separator in
   // the FilePath. On Windows, for example, both '/' and '\' are valid path
   // separators. Returns NULL if no path separator was found.
   const char* FindLastPathSeparator() const;
diff --git a/tools/extra/plot_log.gnuplot.example b/tools/extra/plot_log.gnuplot.example
index 748b96e6925..02c68e1d24f 100644
--- a/tools/extra/plot_log.gnuplot.example
+++ b/tools/extra/plot_log.gnuplot.example
@@ -4,7 +4,7 @@
 # Be warned that the fields in the training log may change in the future.
 # You had better check the data files before designing your own plots.
 
-# Please generate the neccessary data files with 
+# Please generate the necessary data files with 
 # /path/to/caffe/tools/extra/parse_log.sh before plotting.
 # Example usage: 
 #     ./parse_log.sh mnist.log

From fa7fda78661fa795e3f6d3bbe7040e5d5d02e732 Mon Sep 17 00:00:00 2001
From: Zhou Mo <cdluminate@gmail.com>
Date: Mon, 28 Nov 2016 01:20:58 +0000
Subject: [PATCH 156/264] Make lint happy (> 80 characters)

---
 src/caffe/layers/crop_layer.cu | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/caffe/layers/crop_layer.cu b/src/caffe/layers/crop_layer.cu
index 6ea32d21c28..9ad40126468 100644
--- a/src/caffe/layers/crop_layer.cu
+++ b/src/caffe/layers/crop_layer.cu
@@ -39,10 +39,10 @@ void CropLayer<Dtype>::crop_copy_gpu(const vector<Blob<Dtype>*>& bottom,
                 src_data, dest_data, is_forward);
     }
   } else {
-    // We are at the last two dimensions, which are stored continuously in memory
-    // With (N,C,H,W)
-    //      (0,1,2,3) cur_dim   -> H
-    //                cur_dim+1 -> W
+    // We are at the last two dimensions, which are stored continuously in
+    // memory With (N,C,H,W)
+    //             (0,1,2,3) cur_dim   -> H
+    //                       cur_dim+1 -> W
     const int lines = top[0]->shape(cur_dim);
     const int height = top[0]->shape(cur_dim);
     const int width = top[0]->shape(cur_dim+1);

From cd681ecdd9383a0f84b854e6fefeb05966babce0 Mon Sep 17 00:00:00 2001
From: Zhou Mo <cdluminate@gmail.com>
Date: Mon, 28 Nov 2016 02:17:25 +0000
Subject: [PATCH 157/264] Add the missing period

---
 src/caffe/layers/crop_layer.cu | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/caffe/layers/crop_layer.cu b/src/caffe/layers/crop_layer.cu
index 9ad40126468..1ea132531cf 100644
--- a/src/caffe/layers/crop_layer.cu
+++ b/src/caffe/layers/crop_layer.cu
@@ -40,9 +40,9 @@ void CropLayer<Dtype>::crop_copy_gpu(const vector<Blob<Dtype>*>& bottom,
     }
   } else {
     // We are at the last two dimensions, which are stored continuously in
-    // memory With (N,C,H,W)
-    //             (0,1,2,3) cur_dim   -> H
-    //                       cur_dim+1 -> W
+    // memory. With (N,C,H,W)
+    //              (0,1,2,3) cur_dim   -> H
+    //                        cur_dim+1 -> W
     const int lines = top[0]->shape(cur_dim);
     const int height = top[0]->shape(cur_dim);
     const int width = top[0]->shape(cur_dim+1);

From 8cd5c3df98734f4c43e1b7f43c05401fda0a94ac Mon Sep 17 00:00:00 2001
From: Max Ehrlich <max.ehr@gmail.com>
Date: Fri, 2 Dec 2016 10:13:50 -0500
Subject: [PATCH 158/264] Add Pascal to all cuda architectures

The known gpu architectures were missing the Pascal sm_60 and sm_61 compute capabilities. When building for this GPU, but on a separate machine, like a CI server or inside a docker image, caffe would be built for at most capability sm_50 and crash when run on the Pascal GPU.
---
 cmake/Cuda.cmake | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/cmake/Cuda.cmake b/cmake/Cuda.cmake
index eeeb7325ffd..7146a24450c 100644
--- a/cmake/Cuda.cmake
+++ b/cmake/Cuda.cmake
@@ -4,7 +4,7 @@ endif()
 
 # Known NVIDIA GPU achitectures Caffe can be compiled for.
 # This list will be used for CUDA_ARCH_NAME = All option
-set(Caffe_known_gpu_archs "20 21(20) 30 35 50")
+set(Caffe_known_gpu_archs "20 21(20) 30 35 50 60 61")
 
 ################################################################################################
 # A function for automatic detection of GPUs installed  (if autodetection is enabled)
@@ -56,7 +56,7 @@ endfunction()
 #   caffe_select_nvcc_arch_flags(out_variable)
 function(caffe_select_nvcc_arch_flags out_variable)
   # List of arch names
-  set(__archs_names "Fermi" "Kepler" "Maxwell" "All" "Manual")
+  set(__archs_names "Fermi" "Kepler" "Maxwell" "Pascal" "All" "Manual")
   set(__archs_name_default "All")
   if(NOT CMAKE_CROSSCOMPILING)
     list(APPEND __archs_names "Auto")
@@ -89,6 +89,8 @@ function(caffe_select_nvcc_arch_flags out_variable)
     set(__cuda_arch_bin "30 35")
   elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell")
     set(__cuda_arch_bin "50")
+  elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal")
+    set(__cuda_arch_bin "60 61")
   elseif(${CUDA_ARCH_NAME} STREQUAL "All")
     set(__cuda_arch_bin ${Caffe_known_gpu_archs})
   elseif(${CUDA_ARCH_NAME} STREQUAL "Auto")

From de3a12f46217dcac8aae467931e6d5ffb5fbc4e2 Mon Sep 17 00:00:00 2001
From: "Young H. Oh" <garion9013@gmail.com>
Date: Thu, 8 Dec 2016 06:54:46 +0900
Subject: [PATCH 159/264] fix wrongly used marker hash

---
 tools/extra/plot_training_log.py.example | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/tools/extra/plot_training_log.py.example b/tools/extra/plot_training_log.py.example
index 79924ae5a5a..8caca6b8a67 100755
--- a/tools/extra/plot_training_log.py.example
+++ b/tools/extra/plot_training_log.py.example
@@ -90,9 +90,9 @@ def load_data(data_file, field_idx0, field_idx1):
 
 def random_marker():
     markers = mks.MarkerStyle.markers
-    num = len(markers.values())
+    num = len(markers.keys())
     idx = random.randint(0, num - 1)
-    return markers.values()[idx]
+    return markers.keys()[idx]
 
 def get_data_label(path_to_log):
     label = path_to_log[path_to_log.rfind('/')+1 : path_to_log.rfind(
@@ -126,16 +126,9 @@ def plot_chart(chart_type, path_to_png, path_to_log_list):
             plt.plot(data[0], data[1], label = label, color = color,
                      linewidth = linewidth)
         else:
-            ok = False
-            ## Some markers throw ValueError: Unrecognized marker style
-            while not ok:
-                try:
-                    marker = random_marker()
-                    plt.plot(data[0], data[1], label = label, color = color,
-                             marker = marker, linewidth = linewidth)
-                    ok = True
-                except:
-                    pass
+            marker = random_marker()
+            plt.plot(data[0], data[1], label = label, color = color,
+                     marker = marker, linewidth = linewidth)
     legend_loc = get_legend_loc(chart_type)
     plt.legend(loc = legend_loc, ncol = 1) # ajust ncol to fit the space
     plt.title(get_chart_type_description(chart_type))

From 57a5bbde4ede19c545c5932334782e3a755b2265 Mon Sep 17 00:00:00 2001
From: liyangguang <liyangguang@baidu.com>
Date: Fri, 16 Dec 2016 11:54:49 +0000
Subject: [PATCH 160/264] check leveldb iterator status for snappy format.

---
 include/caffe/util/db_leveldb.hpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/caffe/util/db_leveldb.hpp b/include/caffe/util/db_leveldb.hpp
index e9fa0d32b66..4cdb6db9558 100644
--- a/include/caffe/util/db_leveldb.hpp
+++ b/include/caffe/util/db_leveldb.hpp
@@ -14,7 +14,10 @@ namespace caffe { namespace db {
 class LevelDBCursor : public Cursor {
  public:
   explicit LevelDBCursor(leveldb::Iterator* iter)
-    : iter_(iter) { SeekToFirst(); }
+    : iter_(iter) {
+    SeekToFirst();
+    CHECK(iter_->status().ok()) << iter_->status().ToString();
+  }
   ~LevelDBCursor() { delete iter_; }
   virtual void SeekToFirst() { iter_->SeekToFirst(); }
   virtual void Next() { iter_->Next(); }

From b55fe84ca13cb7d9971505ea4d160aa5d7b6be50 Mon Sep 17 00:00:00 2001
From: Zhou Mo <cdluminate@gmail.com>
Date: Wed, 21 Dec 2016 09:00:15 +0000
Subject: [PATCH 161/264] docs: add debian installation guide

---
 docs/install_apt_debian.md | 105 +++++++++++++++++++++++++++++++++++++
 docs/installation.md       |   1 +
 2 files changed, 106 insertions(+)
 create mode 100644 docs/install_apt_debian.md

diff --git a/docs/install_apt_debian.md b/docs/install_apt_debian.md
new file mode 100644
index 00000000000..745a6f4fadb
--- /dev/null
+++ b/docs/install_apt_debian.md
@@ -0,0 +1,105 @@
+---
+title: "Installation: Debian"
+---
+
+# Debian Installation
+
+Caffe packages are available for `Debian/unstable`. Debian/stable users
+should take a look at Ubuntu installation instruction.  
+
+Only experienced linux users are recommended to try Debian/unstable (Sid).  
+
+Last update: Dec.21 2016  
+
+## Debian/unstable
+
+Apart from the installation methods based on source, Debian/unstable
+users can install pre-compiled Caffe packages via the official archive.
+
+### Binary installation
+
+Make sure that there is something like the follows in your `/etc/apt/sources.list`:
+```
+deb http://ftp2.cn.debian.org/debian sid main contrib non-free
+```
+Then we update APT cache and directly install Caffe. Note, the cpu version and
+the cuda version cannot be installed at the same time.
+```
+# apt update
+# apt install [ caffe-cpu | caffe-cuda ]
+```
+It should work out of box.
+
+#### Customizing caffe packages
+
+Some users may need to customize the Caffe package. Here is a brief
+guide of producing the customized `.deb` packages.
+
+Make sure that there is something like this in your `/etc/apt/sources.list`:
+```
+deb http://ftp2.cn.debian.org/debian sid main contrib non-free
+deb-src http://ftp2.cn.debian.org/debian sid main contrib non-free
+```
+
+Then we build caffe deb files with the following commands:
+```
+$ sudo apt update
+$ sudo apt install build-essential debhelper devscripts    # standard package building tools
+$ sudo apt build-dep [ caffe-cpu | caffe-cuda ]            # the most elegant way to pull caffe build dependencies
+$ apt source [ caffe-cpu | caffe-cuda ]               # download the source tarball and extract
+$ cd caffe-XXXX
+[ ... optional, customize caffe code/build ... ]
+$ debuild -B -j4                                      # build caffe with 4 parallel jobs (similar to make -j4)
+[ ... building ...]
+$ debc                                                # optional, if you want to check the package contents
+$ sudo debi                                           # optional, install the generated packages
+```
+The resulting deb packages can be found under the parent directory of the source tree.
+
+### Source installation
+
+Source installation under Debian/unstable is similar to that of Ubuntu, but
+here is a more elegant way to pull caffe build dependencies:
+```
+$ sudo apt build-dep [ caffe-cpu | caffe-cuda ]
+```
+Note, this requires a `deb-src` entry in your `/etc/apt/sources.list`.
+
+### Notes
+
+* Consider re-compiling OpenBLAS locally with optimization flags for sake of
+performance. This is highly recommended if you are writing a paper.
+
+* If you are installing `caffe-cuda`, APT will automatically pull some of the
+CUDA packages and the nvidia driver packages. Please take care if you have
+manually installed or hacked nvidia driver or CUDA toolkit or any other
+related stuff, because in this case it may fail.
+
+* If you encountered any problem when installing `caffe-*`, please report bug
+to Debian via Debian's bug tracking system. See https://www.debian.org/Bugs/ .
+
+* Additionally, a manpage (`man caffe`) and a bash complementation script
+(`caffe <TAB><TAB>`, `caffe train <TAB><TAB>`) are provided.
+Both of the two files are still not merged into caffe master.
+
+* The python interface is Python 3 version: `python3-caffe-{cpu,cuda}`.
+No plan to support python2.
+
+## FAQ
+
+* where is caffe-cudnn?
+
+CUDNN library seems not redistributable currently. If you really want the
+caffe-cudnn deb packages, the workaround is to install cudnn by yourself,
+and hack the packaging scripts, then build your customized package.
+
+* I installed the CPU version, How can I switch to the CUDA version?
+
+`sudo apt install caffe-cuda`, apt's dependency resolver is smart enough to deal with this.
+
+* Where is the examples, the models and other documentation stuff?
+
+```
+sudo apt install caffe-doc
+dpkg -L caffe-doc
+```
diff --git a/docs/installation.md b/docs/installation.md
index 3254be3dfcd..14ec467427b 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -12,6 +12,7 @@ The official Makefile and `Makefile.config` build are complemented by a [communi
 
 - [Docker setup](https://github.com/BVLC/caffe/tree/master/docker) *out-of-the-box brewing*
 - [Ubuntu installation](install_apt.html) *the standard platform*
+- [Debian installation](install_apt_debian.html) *deploy caffe with a single command*
 - [OS X installation](install_osx.html)
 - [RHEL / CentOS / Fedora installation](install_yum.html)
 - [Windows](https://github.com/BVLC/caffe/tree/windows) *see the Windows branch led by Guillaume Dumont*

From 2fac0d61afe290564f09067d3efa53d07ba0736f Mon Sep 17 00:00:00 2001
From: Tomasz Socha <tomasz.socha@intel.com>
Date: Thu, 8 Dec 2016 14:51:30 +0100
Subject: [PATCH 162/264] Use mkl_malloc when use mkl

---
 include/caffe/syncedmem.hpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/include/caffe/syncedmem.hpp b/include/caffe/syncedmem.hpp
index 38ee4664028..6474a6969e5 100644
--- a/include/caffe/syncedmem.hpp
+++ b/include/caffe/syncedmem.hpp
@@ -3,6 +3,10 @@
 
 #include <cstdlib>
 
+#ifdef USE_MKL
+  #include "mkl.h"
+#endif
+
 #include "caffe/common.hpp"
 
 namespace caffe {
@@ -20,7 +24,11 @@ inline void CaffeMallocHost(void** ptr, size_t size, bool* use_cuda) {
     return;
   }
 #endif
+#ifdef USE_MKL
+  *ptr = mkl_malloc(size ? size:1, 64);
+#else
   *ptr = malloc(size);
+#endif
   *use_cuda = false;
   CHECK(*ptr) << "host allocation of size " << size << " failed";
 }
@@ -32,7 +40,11 @@ inline void CaffeFreeHost(void* ptr, bool use_cuda) {
     return;
   }
 #endif
+#ifdef USE_MKL
+  mkl_free(ptr);
+#else
   free(ptr);
+#endif
 }
 
 

From 775f5b05dba28867f609c0e2b097e62176b4904a Mon Sep 17 00:00:00 2001
From: Yagnesh <yrevar@users.noreply.github.com>
Date: Wed, 21 Dec 2016 17:05:30 -0800
Subject: [PATCH 163/264] Fixed a typo

---
 examples/02-fine-tuning.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/02-fine-tuning.ipynb b/examples/02-fine-tuning.ipynb
index f44eaf9a4de..90803c989fb 100644
--- a/examples/02-fine-tuning.ipynb
+++ b/examples/02-fine-tuning.ipynb
@@ -70,7 +70,7 @@
     "\n",
     "- `get_ilsvrc_aux.sh` to download the ImageNet data mean, labels, etc.\n",
     "- `download_model_binary.py` to download the pretrained reference model\n",
-    "- `finetune_flickr_style/assemble_data.py` downloadsd the style training and testing data\n",
+    "- `finetune_flickr_style/assemble_data.py` downloads the style training and testing data\n",
     "\n",
     "We'll download just a small subset of the full dataset for this exercise: just 2000 of the 80K images, from 5 of the 20 style categories.  (To download the full dataset, set `full_dataset = True` in the cell below.)"
    ]

From 5693f3149688a2cb035858a9a9efde567763ebe7 Mon Sep 17 00:00:00 2001
From: Yagnesh <yrevar@users.noreply.github.com>
Date: Fri, 23 Dec 2016 15:31:21 -0800
Subject: [PATCH 164/264] Join path using "os.path.join" instead of "+"

(Needless to say it's much clearer, less error prone, and portable)
---
 examples/02-fine-tuning.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/02-fine-tuning.ipynb b/examples/02-fine-tuning.ipynb
index 90803c989fb..422259de424 100644
--- a/examples/02-fine-tuning.ipynb
+++ b/examples/02-fine-tuning.ipynb
@@ -146,7 +146,7 @@
    "outputs": [],
    "source": [
     "import os\n",
-    "weights = caffe_root + 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'\n",
+    "weights = os.path.join(caffe_root, 'models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel')\n",
     "assert os.path.exists(weights)"
    ]
   },

From 1fd8bd0b4a842aa5a9d7ea1ec88d4cdd7eaf3b99 Mon Sep 17 00:00:00 2001
From: Fyodor Tokarev <ftokarev@gmail.com>
Date: Fri, 30 Dec 2016 17:47:20 +0300
Subject: [PATCH 165/264] Typos in test_inner_product_layer.cpp

---
 src/caffe/test/test_inner_product_layer.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/caffe/test/test_inner_product_layer.cpp b/src/caffe/test/test_inner_product_layer.cpp
index f1ec2333fae..6d84d292b38 100644
--- a/src/caffe/test/test_inner_product_layer.cpp
+++ b/src/caffe/test/test_inner_product_layer.cpp
@@ -60,9 +60,9 @@ TYPED_TEST(InnerProductLayerTest, TestSetUp) {
   EXPECT_EQ(this->blob_top_->channels(), 10);
 }
 
-/** @brief TestSetUp while toggling tranpose flag
+/** @brief TestSetUp while toggling transpose flag
  */
-TYPED_TEST(InnerProductLayerTest, TestSetUpTranposeFalse) {
+TYPED_TEST(InnerProductLayerTest, TestSetUpTransposeFalse) {
   typedef typename TypeParam::Dtype Dtype;
   this->blob_bottom_vec_.push_back(this->blob_bottom_);
   LayerParameter layer_param;
@@ -82,9 +82,9 @@ TYPED_TEST(InnerProductLayerTest, TestSetUpTranposeFalse) {
   EXPECT_EQ(60, layer->blobs()[0]->shape(1));
 }
 
-/** @brief TestSetUp while toggling tranpose flag
+/** @brief TestSetUp while toggling transpose flag
  */
-TYPED_TEST(InnerProductLayerTest, TestSetUpTranposeTrue) {
+TYPED_TEST(InnerProductLayerTest, TestSetUpTransposeTrue) {
   typedef typename TypeParam::Dtype Dtype;
   this->blob_bottom_vec_.push_back(this->blob_bottom_);
   LayerParameter layer_param;
@@ -339,7 +339,7 @@ TYPED_TEST(InnerProductLayerTest, TestBackwardTranspose) {
     // copy bottom diffs
     Blob<Dtype>* const bottom_diff = new Blob<Dtype>();
     bottom_diff->CopyFrom(*this->blob_bottom_vec_[0], true, true);
-    // repeat original top with tranposed ip
+    // repeat original top with transposed ip
     this->blob_top_vec_.clear();
     this->blob_top_vec_.push_back(new Blob<Dtype>());
     inner_product_param->set_transpose(true);

From 4f0eb52a7ecd1bfb2c2d5906d368823eb312693c Mon Sep 17 00:00:00 2001
From: Xiaojie Deng <xiaojie.deng@intel.com>
Date: Sat, 31 Dec 2016 20:22:17 +0800
Subject: [PATCH 166/264] Fix parse_log.py and parse_log.sh for negative time
 duration if datetime in log across year boundary

---
 tools/extra/extract_seconds.py | 8 ++++++++
 tools/extra/parse_log.py       | 7 +++++++
 2 files changed, 15 insertions(+)

diff --git a/tools/extra/extract_seconds.py b/tools/extra/extract_seconds.py
index 591a51f96bd..68af69a2788 100755
--- a/tools/extra/extract_seconds.py
+++ b/tools/extra/extract_seconds.py
@@ -48,11 +48,19 @@ def extract_seconds(input_file, output_file):
     start_datetime = get_start_time(lines, log_created_year)
     assert start_datetime, 'Start time not found'
 
+    last_dt = start_datetime
     out = open(output_file, 'w')
     for line in lines:
         line = line.strip()
         if line.find('Iteration') != -1:
             dt = extract_datetime_from_line(line, log_created_year)
+
+            # if it's another year
+            if dt.month < last_dt.month:
+                log_created_year += 1
+                dt = extract_datetime_from_line(line, log_created_year)
+            last_dt = dt
+
             elapsed_seconds = (dt - start_datetime).total_seconds()
             out.write('%f\n' % elapsed_seconds)
     out.close()
diff --git a/tools/extra/parse_log.py b/tools/extra/parse_log.py
index 017306b5088..b47ffd0d842 100755
--- a/tools/extra/parse_log.py
+++ b/tools/extra/parse_log.py
@@ -38,6 +38,7 @@ def parse_log(path_to_log):
     logfile_year = extract_seconds.get_log_created_year(path_to_log)
     with open(path_to_log) as f:
         start_time = extract_seconds.get_start_time(f, logfile_year)
+        last_time = start_time
 
         for line in f:
             iteration_match = regex_iteration.search(line)
@@ -55,6 +56,12 @@ def parse_log(path_to_log):
                 # Skip lines with bad formatting, for example when resuming solver
                 continue
 
+            # if it's another year
+            if time.month < last_time.month:
+                logfile_year += 1
+                time = extract_seconds.extract_datetime_from_line(line, logfile_year)
+            last_time = time
+
             seconds = (time - start_time).total_seconds()
 
             learning_rate_match = regex_learning_rate.search(line)

From bae06073864dbe86970429d53e35335304626a70 Mon Sep 17 00:00:00 2001
From: "Jonathan R. Williford" <jonathan@neural.vision>
Date: Sun, 1 Jan 2017 18:22:09 +0000
Subject: [PATCH 167/264] Overhaul layer catalogue documentation.

Create scripts/split_caffe_proto.py file for splitting up the
caffe.proto file, so that parts of the file can be included from the
layer help pages.

Create separate pages for each layer and link each page from layers.md.
---
 docs/tutorial/layers.md                       | 562 +++---------------
 docs/tutorial/layers/absval.md                |  22 +
 docs/tutorial/layers/accuracy.md              |  21 +
 docs/tutorial/layers/argmax.md                |  19 +
 docs/tutorial/layers/batchnorm.md             |  20 +
 docs/tutorial/layers/batchreindex.md          |  16 +
 docs/tutorial/layers/bias.md                  |  19 +
 docs/tutorial/layers/bnll.md                  |  25 +
 docs/tutorial/layers/concat.md                |  40 ++
 docs/tutorial/layers/contrastiveloss.md       |  20 +
 docs/tutorial/layers/convolution.md           |  63 ++
 docs/tutorial/layers/crop.md                  |  20 +
 docs/tutorial/layers/data.md                  |  29 +
 docs/tutorial/layers/deconvolution.md         |  22 +
 docs/tutorial/layers/dropout.md               |  20 +
 docs/tutorial/layers/dummydata.md             |  20 +
 docs/tutorial/layers/eltwise.md               |  20 +
 docs/tutorial/layers/elu.md                   |  25 +
 docs/tutorial/layers/embed.md                 |  20 +
 docs/tutorial/layers/euclideanloss.md         |  16 +
 docs/tutorial/layers/exp.md                   |  24 +
 docs/tutorial/layers/filter.md                |  15 +
 docs/tutorial/layers/flatten.md               |  21 +
 docs/tutorial/layers/hdf5data.md              |  20 +
 docs/tutorial/layers/hdf5output.md            |  25 +
 docs/tutorial/layers/hingeloss.md             |  19 +
 docs/tutorial/layers/im2col.md                |  16 +
 docs/tutorial/layers/imagedata.md             |  27 +
 docs/tutorial/layers/infogainloss.md          |  24 +
 docs/tutorial/layers/innerproduct.md          |  59 ++
 docs/tutorial/layers/input.md                 |  19 +
 docs/tutorial/layers/log.md                   |  20 +
 docs/tutorial/layers/lrn.md                   |  28 +
 docs/tutorial/layers/lstm.md                  |  21 +
 docs/tutorial/layers/memorydata.md            |  25 +
 .../layers/multinomiallogisticloss.md         |  19 +
 docs/tutorial/layers/mvn.md                   |  20 +
 docs/tutorial/layers/parameter.md             |  21 +
 docs/tutorial/layers/pooling.md               |  47 ++
 docs/tutorial/layers/power.md                 |  46 ++
 docs/tutorial/layers/prelu.md                 |  20 +
 docs/tutorial/layers/python.md                |  27 +
 docs/tutorial/layers/recurrent.md             |  20 +
 docs/tutorial/layers/reduction.md             |  20 +
 docs/tutorial/layers/relu.md                  |  32 +
 docs/tutorial/layers/reshape.md               |  51 ++
 docs/tutorial/layers/rnn.md                   |  19 +
 docs/tutorial/layers/scale.md                 |  20 +
 docs/tutorial/layers/sigmoid.md               |  20 +
 .../layers/sigmoidcrossentropyloss.md         |  13 +
 docs/tutorial/layers/silence.md               |  23 +
 docs/tutorial/layers/slice.md                 |  42 ++
 docs/tutorial/layers/softmax.md               |  24 +
 docs/tutorial/layers/softmaxwithloss.md       |  33 +
 docs/tutorial/layers/split.md                 |  17 +
 docs/tutorial/layers/spp.md                   |  20 +
 docs/tutorial/layers/tanh.md                  |  18 +
 docs/tutorial/layers/threshold.md             |  18 +
 docs/tutorial/layers/tile.md                  |  20 +
 docs/tutorial/layers/windowdata.md            |  19 +
 scripts/build_docs.sh                         |   3 +
 scripts/split_caffe_proto.py                  |  35 ++
 62 files changed, 1573 insertions(+), 476 deletions(-)
 create mode 100644 docs/tutorial/layers/absval.md
 create mode 100644 docs/tutorial/layers/accuracy.md
 create mode 100644 docs/tutorial/layers/argmax.md
 create mode 100644 docs/tutorial/layers/batchnorm.md
 create mode 100644 docs/tutorial/layers/batchreindex.md
 create mode 100644 docs/tutorial/layers/bias.md
 create mode 100644 docs/tutorial/layers/bnll.md
 create mode 100644 docs/tutorial/layers/concat.md
 create mode 100644 docs/tutorial/layers/contrastiveloss.md
 create mode 100644 docs/tutorial/layers/convolution.md
 create mode 100644 docs/tutorial/layers/crop.md
 create mode 100644 docs/tutorial/layers/data.md
 create mode 100644 docs/tutorial/layers/deconvolution.md
 create mode 100644 docs/tutorial/layers/dropout.md
 create mode 100644 docs/tutorial/layers/dummydata.md
 create mode 100644 docs/tutorial/layers/eltwise.md
 create mode 100644 docs/tutorial/layers/elu.md
 create mode 100644 docs/tutorial/layers/embed.md
 create mode 100644 docs/tutorial/layers/euclideanloss.md
 create mode 100644 docs/tutorial/layers/exp.md
 create mode 100644 docs/tutorial/layers/filter.md
 create mode 100644 docs/tutorial/layers/flatten.md
 create mode 100644 docs/tutorial/layers/hdf5data.md
 create mode 100644 docs/tutorial/layers/hdf5output.md
 create mode 100644 docs/tutorial/layers/hingeloss.md
 create mode 100644 docs/tutorial/layers/im2col.md
 create mode 100644 docs/tutorial/layers/imagedata.md
 create mode 100644 docs/tutorial/layers/infogainloss.md
 create mode 100644 docs/tutorial/layers/innerproduct.md
 create mode 100644 docs/tutorial/layers/input.md
 create mode 100644 docs/tutorial/layers/log.md
 create mode 100644 docs/tutorial/layers/lrn.md
 create mode 100644 docs/tutorial/layers/lstm.md
 create mode 100644 docs/tutorial/layers/memorydata.md
 create mode 100644 docs/tutorial/layers/multinomiallogisticloss.md
 create mode 100644 docs/tutorial/layers/mvn.md
 create mode 100644 docs/tutorial/layers/parameter.md
 create mode 100644 docs/tutorial/layers/pooling.md
 create mode 100644 docs/tutorial/layers/power.md
 create mode 100644 docs/tutorial/layers/prelu.md
 create mode 100644 docs/tutorial/layers/python.md
 create mode 100644 docs/tutorial/layers/recurrent.md
 create mode 100644 docs/tutorial/layers/reduction.md
 create mode 100644 docs/tutorial/layers/relu.md
 create mode 100644 docs/tutorial/layers/reshape.md
 create mode 100644 docs/tutorial/layers/rnn.md
 create mode 100644 docs/tutorial/layers/scale.md
 create mode 100644 docs/tutorial/layers/sigmoid.md
 create mode 100644 docs/tutorial/layers/sigmoidcrossentropyloss.md
 create mode 100644 docs/tutorial/layers/silence.md
 create mode 100644 docs/tutorial/layers/slice.md
 create mode 100644 docs/tutorial/layers/softmax.md
 create mode 100644 docs/tutorial/layers/softmaxwithloss.md
 create mode 100644 docs/tutorial/layers/split.md
 create mode 100644 docs/tutorial/layers/spp.md
 create mode 100644 docs/tutorial/layers/tanh.md
 create mode 100644 docs/tutorial/layers/threshold.md
 create mode 100644 docs/tutorial/layers/tile.md
 create mode 100644 docs/tutorial/layers/windowdata.md
 create mode 100755 scripts/split_caffe_proto.py

diff --git a/docs/tutorial/layers.md b/docs/tutorial/layers.md
index 7362aac298a..a903d5ac985 100644
--- a/docs/tutorial/layers.md
+++ b/docs/tutorial/layers.md
@@ -1,186 +1,77 @@
 ---
 title: Layer Catalogue
 ---
+
 # Layers
 
 To create a Caffe model you need to define the model architecture in a protocol buffer definition file (prototxt).
 
 Caffe layers and their parameters are defined in the protocol buffer definitions for the project in [caffe.proto](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto).
 
-### Vision Layers
-
-* Header: `./include/caffe/vision_layers.hpp`
-
-Vision layers usually take *images* as input and produce other *images* as output.
-A typical "image" in the real-world may have one color channel ($$c = 1$$), as in a grayscale image, or three color channels ($$c = 3$$) as in an RGB (red, green, blue) image.
-But in this context, the distinguishing characteristic of an image is its spatial structure: usually an image has some non-trivial height $$h > 1$$ and width $$w > 1$$.
-This 2D geometry naturally lends itself to certain decisions about how to process the input.
-In particular, most of the vision layers work by applying a particular operation to some region of the input to produce a corresponding region of the output.
-In contrast, other layers (with few exceptions) ignore the spatial structure of the input, effectively treating it as "one big vector" with dimension $$chw$$.
-
-
-#### Convolution
-
-* Layer type: `Convolution`
-* CPU implementation: `./src/caffe/layers/convolution_layer.cpp`
-* CUDA GPU implementation: `./src/caffe/layers/convolution_layer.cu`
-* Parameters (`ConvolutionParameter convolution_param`)
-    - Required
-        - `num_output` (`c_o`): the number of filters
-        - `kernel_size` (or `kernel_h` and `kernel_w`): specifies height and width of each filter
-    - Strongly Recommended
-        - `weight_filler` [default `type: 'constant' value: 0`]
-    - Optional
-        - `bias_term` [default `true`]: specifies whether to learn and apply a set of additive biases to the filter outputs
-        - `pad` (or `pad_h` and `pad_w`) [default 0]: specifies the number of pixels to (implicitly) add to each side of the input
-        - `stride` (or `stride_h` and `stride_w`) [default 1]: specifies the intervals at which to apply the filters to the input
-        - `group` (g) [default 1]: If g > 1, we restrict the connectivity of each filter to a subset of the input. Specifically, the input and output channels are separated into g groups, and the $$i$$th output group channels will be only connected to the $$i$$th input group channels.
-* Input
-    - `n * c_i * h_i * w_i`
-* Output
-    - `n * c_o * h_o * w_o`, where `h_o = (h_i + 2 * pad_h - kernel_h) / stride_h + 1` and `w_o` likewise.
-* Sample (as seen in `./models/bvlc_reference_caffenet/train_val.prototxt`)
-
-      layer {
-        name: "conv1"
-        type: "Convolution"
-        bottom: "data"
-        top: "conv1"
-        # learning rate and decay multipliers for the filters
-        param { lr_mult: 1 decay_mult: 1 }
-        # learning rate and decay multipliers for the biases
-        param { lr_mult: 2 decay_mult: 0 }
-        convolution_param {
-          num_output: 96     # learn 96 filters
-          kernel_size: 11    # each filter is 11x11
-          stride: 4          # step 4 pixels between each filter application
-          weight_filler {
-            type: "gaussian" # initialize the filters from a Gaussian
-            std: 0.01        # distribution with stdev 0.01 (default mean: 0)
-          }
-          bias_filler {
-            type: "constant" # initialize the biases to zero (0)
-            value: 0
-          }
-        }
-      }
-
-The `Convolution` layer convolves the input image with a set of learnable filters, each producing one feature map in the output image.
-
-#### Pooling
-
-* Layer type: `Pooling`
-* CPU implementation: `./src/caffe/layers/pooling_layer.cpp`
-* CUDA GPU implementation: `./src/caffe/layers/pooling_layer.cu`
-* Parameters (`PoolingParameter pooling_param`)
-    - Required
-        - `kernel_size` (or `kernel_h` and `kernel_w`): specifies height and width of each filter
-    - Optional
-        - `pool` [default MAX]: the pooling method. Currently MAX, AVE, or STOCHASTIC
-        - `pad` (or `pad_h` and `pad_w`) [default 0]: specifies the number of pixels to (implicitly) add to each side of the input
-        - `stride` (or `stride_h` and `stride_w`) [default 1]: specifies the intervals at which to apply the filters to the input
-* Input
-    - `n * c * h_i * w_i`
-* Output
-    - `n * c * h_o * w_o`, where h_o and w_o are computed in the same way as convolution.
-* Sample (as seen in `./models/bvlc_reference_caffenet/train_val.prototxt`)
-
-      layer {
-        name: "pool1"
-        type: "Pooling"
-        bottom: "conv1"
-        top: "pool1"
-        pooling_param {
-          pool: MAX
-          kernel_size: 3 # pool over a 3x3 region
-          stride: 2      # step two pixels (in the bottom blob) between pooling regions
-        }
-      }
-
-#### Local Response Normalization (LRN)
-
-* Layer type: `LRN`
-* CPU Implementation: `./src/caffe/layers/lrn_layer.cpp`
-* CUDA GPU Implementation: `./src/caffe/layers/lrn_layer.cu`
-* Parameters (`LRNParameter lrn_param`)
-    - Optional
-        - `local_size` [default 5]: the number of channels to sum over (for cross channel LRN) or the side length of the square region to sum over (for within channel LRN)
-        - `alpha` [default 1]: the scaling parameter (see below)
-        - `beta` [default 5]: the exponent (see below)
-        - `norm_region` [default `ACROSS_CHANNELS`]: whether to sum over adjacent channels (`ACROSS_CHANNELS`) or nearby spatial locaitons (`WITHIN_CHANNEL`)
+## Data Layers
 
-The local response normalization layer performs a kind of "lateral inhibition" by normalizing over local input regions. In `ACROSS_CHANNELS` mode, the local regions extend across nearby channels, but have no spatial extent (i.e., they have shape `local_size x 1 x 1`). In `WITHIN_CHANNEL` mode, the local regions extend spatially, but are in separate channels (i.e., they have shape `1 x local_size x local_size`). Each input value is divided by $$(1 + (\alpha/n) \sum_i x_i^2)^\beta$$, where $$n$$ is the size of each local region, and the sum is taken over the region centered at that value (zero padding is added where necessary).
-
-#### im2col
-
-`Im2col` is a helper for doing the image-to-column transformation that you most likely do not need to know about. This is used in Caffe's original convolution to do matrix multiplication by laying out all patches into a matrix.
-
-### Loss Layers
+Data enters Caffe through data layers: they lie at the bottom of nets. Data can come from efficient databases (LevelDB or LMDB), directly from memory, or, when efficiency is not critical, from files on disk in HDF5 or common image formats.
 
-Loss drives learning by comparing an output to a target and assigning cost to minimize. The loss itself is computed by the forward pass and the gradient w.r.t. to the loss is computed by the backward pass.
+Common input preprocessing (mean subtraction, scaling, random cropping, and mirroring) is available by specifying `TransformationParameter`s by some of the layers.
+The [bias](layers/bias.html), [scale](layers/scale.html), and [crop](layers/crop.html) layers can be helpful with transforming the inputs, when `TransformationParameter` isn't available.
 
-#### Softmax
+Layers:
 
-* Layer type: `SoftmaxWithLoss`
+* [Image Data](layers/imagedata.html) - read raw images.
+* [Database](layers/data.html) - read data from LEVELDB or LMDB.
+* [HDF5 Input](layers/hdf5data.html) - read HDF5 data, allows data of arbitrary dimensions.
+* [HDF5 Output](layers/hdf5output.html) - write data as HDF5.
+* [Input](layers/input.html) - typically used for networks that are being deployed.
+* [Window Data](layers/windowdata.html) - read window data file.
+* [Memory Data](layers/memorydata.html) - read data directly from memory.
+* [Dummy Data](layers/dummydata.html) - for static data and debugging.
 
-The softmax loss layer computes the multinomial logistic loss of the softmax of its inputs. It's conceptually identical to a softmax layer followed by a multinomial logistic loss layer, but provides a more numerically stable gradient.
+Note that the [Python](layers/python.html) Layer can be useful for create custom data layers.
 
-#### Sum-of-Squares / Euclidean
+## Vision Layers
 
-* Layer type: `EuclideanLoss`
+Vision layers usually take *images* as input and produce other *images* as output, although they can take data of other types and dimensions.
+A typical "image" in the real-world may have one color channel ($$c = 1$$), as in a grayscale image, or three color channels ($$c = 3$$) as in an RGB (red, green, blue) image.
+But in this context, the distinguishing characteristic of an image is its spatial structure: usually an image has some non-trivial height $$h > 1$$ and width $$w > 1$$.
+This 2D geometry naturally lends itself to certain decisions about how to process the input.
+In particular, most of the vision layers work by applying a particular operation to some region of the input to produce a corresponding region of the output.
+In contrast, other layers (with few exceptions) ignore the spatial structure of the input, effectively treating it as "one big vector" with dimension $$chw$$.
 
-The Euclidean loss layer computes the sum of squares of differences of its two inputs, $$\frac 1 {2N} \sum_{i=1}^N \| x^1_i - x^2_i \|_2^2$$.
+Layers:
 
-#### Hinge / Margin
+* [Convolution Layer](layers/convolution.html) - convolves the input image with a set of learnable filters, each producing one feature map in the output image.
+* [Pooling Layer](layers/pooling.html) - max, average, or stochastic pooling.
+* [Spatial Pyramid Pooling (SPP)](layers/spp.html)
+* [Crop](layers/crop.html) - perform cropping transformation.
+* [Deconvolution Layer](layers/deconvolution.html) - transposed convolution.
 
-* Layer type: `HingeLoss`
-* CPU implementation: `./src/caffe/layers/hinge_loss_layer.cpp`
-* CUDA GPU implementation: none yet
-* Parameters (`HingeLossParameter hinge_loss_param`)
-    - Optional
-        - `norm` [default L1]: the norm used. Currently L1, L2
-* Inputs
-    - `n * c * h * w` Predictions
-    - `n * 1 * 1 * 1` Labels
-* Output
-    - `1 * 1 * 1 * 1` Computed Loss
-* Samples
+* [Im2Col](layers/im2col.html) - relic helper layer that is not used much anymore.
 
-      # L1 Norm
-      layer {
-        name: "loss"
-        type: "HingeLoss"
-        bottom: "pred"
-        bottom: "label"
-      }
+## Recurrent Layers
 
-      # L2 Norm
-      layer {
-        name: "loss"
-        type: "HingeLoss"
-        bottom: "pred"
-        bottom: "label"
-        top: "loss"
-        hinge_loss_param {
-          norm: L2
-        }
-      }
+Layers:
 
-The hinge loss layer computes a one-vs-all hinge or squared hinge loss.
+* [Recurrent](layers/recurrent.html)
+* [RNN](layers/rnn.html)
+* [Long-Short Term Memory (LSTM)](layers/lstm.html)
 
-#### Sigmoid Cross-Entropy
+## Common Layers
 
-`SigmoidCrossEntropyLoss`
+Layers:
 
-#### Infogain
+* [Inner Product](layers/innerproduct.html) - fully connected layer.
+* [Dropout](layers/dropout.html)
+* [Embed](layers/embed.html) - for learning embeddings of one-hot encoded vector (takes index as input).
 
-`InfogainLoss`
+## Normalization Layers
 
-#### Accuracy and Top-k
+* [Local Response Normalization (LRN)](layers/lrn.html) - performs a kind of "lateral inhibition" by normalizing over local input regions.
+* [Mean Variance Normalization (MVN)](layers/mvn.html) - performs contrast normalization / instance normalization.
+* [Batch Normalization](layers/batchnorm.html) - performs normalization over mini-batches.
 
-`Accuracy` scores the output as the accuracy of output with respect to target -- it is not actually a loss and has no backward step.
+The [bias](layers/bias.html) and [scale](layers/scale.html) layers can be helpful in combination with normalization.
 
-### Activation / Neuron Layers
+## Activation / Neuron Layers
 
 In general, activation / Neuron layers are element-wise operators, taking one bottom blob and producing one top blob of the same size. In the layers below, we will ignore the input and out sizes as they are identical:
 
@@ -189,337 +80,56 @@ In general, activation / Neuron layers are element-wise operators, taking one bo
 * Output
     - n * c * h * w
 
-#### ReLU / Rectified-Linear and Leaky-ReLU
-
-* Layer type: `ReLU`
-* CPU implementation: `./src/caffe/layers/relu_layer.cpp`
-* CUDA GPU implementation: `./src/caffe/layers/relu_layer.cu`
-* Parameters (`ReLUParameter relu_param`)
-    - Optional
-        - `negative_slope` [default 0]: specifies whether to leak the negative part by multiplying it with the slope value rather than setting it to 0.
-* Sample (as seen in `./models/bvlc_reference_caffenet/train_val.prototxt`)
-
-      layer {
-        name: "relu1"
-        type: "ReLU"
-        bottom: "conv1"
-        top: "conv1"
-      }
-
-Given an input value x, The `ReLU` layer computes the output as x if x > 0 and negative_slope * x if x <= 0. When the negative slope parameter is not set, it is equivalent to the standard ReLU function of taking max(x, 0). It also supports in-place computation, meaning that the bottom and the top blob could be the same to preserve memory consumption.
-
-#### Sigmoid
-
-* Layer type: `Sigmoid`
-* CPU implementation: `./src/caffe/layers/sigmoid_layer.cpp`
-* CUDA GPU implementation: `./src/caffe/layers/sigmoid_layer.cu`
-* Sample (as seen in `./examples/mnist/mnist_autoencoder.prototxt`)
-
-      layer {
-        name: "encode1neuron"
-        bottom: "encode1"
-        top: "encode1neuron"
-        type: "Sigmoid"
-      }
-
-The `Sigmoid` layer computes the output as sigmoid(x) for each input element x.
-
-#### TanH / Hyperbolic Tangent
-
-* Layer type: `TanH`
-* CPU implementation: `./src/caffe/layers/tanh_layer.cpp`
-* CUDA GPU implementation: `./src/caffe/layers/tanh_layer.cu`
-* Sample
-
-      layer {
-        name: "layer"
-        bottom: "in"
-        top: "out"
-        type: "TanH"
-      }
-
-The `TanH` layer computes the output as tanh(x) for each input element x.
-
-#### Absolute Value
-
-* Layer type: `AbsVal`
-* CPU implementation: `./src/caffe/layers/absval_layer.cpp`
-* CUDA GPU implementation: `./src/caffe/layers/absval_layer.cu`
-* Sample
-
-      layer {
-        name: "layer"
-        bottom: "in"
-        top: "out"
-        type: "AbsVal"
-      }
-
-The `AbsVal` layer computes the output as abs(x) for each input element x.
-
-#### Power
-
-* Layer type: `Power`
-* CPU implementation: `./src/caffe/layers/power_layer.cpp`
-* CUDA GPU implementation: `./src/caffe/layers/power_layer.cu`
-* Parameters (`PowerParameter power_param`)
-    - Optional
-        - `power` [default 1]
-        - `scale` [default 1]
-        - `shift` [default 0]
-* Sample
-
-      layer {
-        name: "layer"
-        bottom: "in"
-        top: "out"
-        type: "Power"
-        power_param {
-          power: 1
-          scale: 1
-          shift: 0
-        }
-      }
-
-The `Power` layer computes the output as (shift + scale * x) ^ power for each input element x.
-
-#### BNLL
-
-* Layer type: `BNLL`
-* CPU implementation: `./src/caffe/layers/bnll_layer.cpp`
-* CUDA GPU implementation: `./src/caffe/layers/bnll_layer.cu`
-* Sample
-
-      layer {
-        name: "layer"
-        bottom: "in"
-        top: "out"
-        type: BNLL
-      }
-
-The `BNLL` (binomial normal log likelihood) layer computes the output as log(1 + exp(x)) for each input element x.
-
-
-### Data Layers
-
-Data enters Caffe through data layers: they lie at the bottom of nets. Data can come from efficient databases (LevelDB or LMDB), directly from memory, or, when efficiency is not critical, from files on disk in HDF5 or common image formats.
-
-Common input preprocessing (mean subtraction, scaling, random cropping, and mirroring) is available by specifying `TransformationParameter`s.
-
-#### Database
+Layers:
 
-* Layer type: `Data`
-* Parameters
-    - Required
-        - `source`: the name of the directory containing the database
-        - `batch_size`: the number of inputs to process at one time
-    - Optional
-        - `rand_skip`: skip up to this number of inputs at the beginning; useful for asynchronous sgd
-        - `backend` [default `LEVELDB`]: choose whether to use a `LEVELDB` or `LMDB`
+* [ReLU / Rectified-Linear and Leaky-ReLU](layers/relu.html) - ReLU and Leaky-ReLU rectification.
+* [PReLU](layers/prelu.html) - parametric ReLU.
+* [ELU](layers/elu.html) - exponential linear rectification.
+* [Sigmoid](layers/sigmoid.html)
+* [TanH](layers/tanh.html)
+* [Absolute Value](layers/abs.html)
+* [Power](layers/power.html) - f(x) = (shift + scale * x) ^ power.
+* [Exp](layers/exp.html) - f(x) = base ^ (shift + scale * x).
+* [Log](layers/log.html) - f(x) = log(x).
+* [BNLL](layers/bnll.html) - f(x) = log(1 + exp(x)).
+* [Threshold](layers/threshold.html) - performs step function at user defined threshold.
+* [Bias](layers/bias.html) - adds a bias to a blob that can either be learned or fixed.
+* [Scale](layers/scale.html) - scales a blob by an amount that can either be learned or fixed.
 
+## Utility Layers
 
+Layers:
 
-#### In-Memory
+* [Flatten](layers/flatten.html)
+* [Reshape](layers/reshape.html)
+* [Batch Reindex](layers/batchreindex.html)
 
-* Layer type: `MemoryData`
-* Parameters
-    - Required
-        - `batch_size`, `channels`, `height`, `width`: specify the size of input chunks to read from memory
+* [Split](layers/split.html)
+* [Concat](layers/concat.html)
+* [Slicing](layers/slice.html)
+* [Eltwise](layers/eltwise.html) - element-wise operations such as product or sum between two blobs.
+* [Filter / Mask](layers/filter.html) - mask or select output using last blob.
+* [Parameter](layers/parameter.html) - enable parameters to be shared between layers.
+* [Reduction](layers/reduction.html) - reduce input blob to scalar blob using operations such as sum or mean.
+* [Silence](layers/silence.html) - prevent top-level blobs from being printed during training.
 
-The memory data layer reads data directly from memory, without copying it. In order to use it, one must call `MemoryDataLayer::Reset` (from C++) or `Net.set_input_arrays` (from Python) in order to specify a source of contiguous data (as 4D row major array), which is read one batch-sized chunk at a time.
+* [ArgMax](layers/argmax.html)
+* [Softmax](layers/softmax.html)
 
-#### HDF5 Input
+* [Python](layers/python.html) - allows custom Python layers.
 
-* Layer type: `HDF5Data`
-* Parameters
-    - Required
-        - `source`: the name of the file to read from
-        - `batch_size`
+## Loss Layers
 
-#### HDF5 Output
-
-* Layer type: `HDF5Output`
-* Parameters
-    - Required
-        - `file_name`: name of file to write to
-
-The HDF5 output layer performs the opposite function of the other layers in this section: it writes its input blobs to disk.
-
-#### Images
-
-* Layer type: `ImageData`
-* Parameters
-    - Required
-        - `source`: name of a text file, with each line giving an image filename and label
-        - `batch_size`: number of images to batch together
-    - Optional
-        - `rand_skip`
-        - `shuffle` [default false]
-        - `new_height`, `new_width`: if provided, resize all images to this size
-
-#### Windows
-
-`WindowData`
-
-#### Dummy
-
-`DummyData` is for development and debugging. See `DummyDataParameter`.
-
-### Common Layers
-
-#### Inner Product
-
-* Layer type: `InnerProduct`
-* CPU implementation: `./src/caffe/layers/inner_product_layer.cpp`
-* CUDA GPU implementation: `./src/caffe/layers/inner_product_layer.cu`
-* Parameters (`InnerProductParameter inner_product_param`)
-    - Required
-        - `num_output` (`c_o`): the number of filters
-    - Strongly recommended
-        - `weight_filler` [default `type: 'constant' value: 0`]
-    - Optional
-        - `bias_filler` [default `type: 'constant' value: 0`]
-        - `bias_term` [default `true`]: specifies whether to learn and apply a set of additive biases to the filter outputs
-* Input
-    - `n * c_i * h_i * w_i`
-* Output
-    - `n * c_o * 1 * 1`
-* Sample
-
-      layer {
-        name: "fc8"
-        type: "InnerProduct"
-        # learning rate and decay multipliers for the weights
-        param { lr_mult: 1 decay_mult: 1 }
-        # learning rate and decay multipliers for the biases
-        param { lr_mult: 2 decay_mult: 0 }
-        inner_product_param {
-          num_output: 1000
-          weight_filler {
-            type: "gaussian"
-            std: 0.01
-          }
-          bias_filler {
-            type: "constant"
-            value: 0
-          }
-        }
-        bottom: "fc7"
-        top: "fc8"
-      }
-
-The `InnerProduct` layer (also usually referred to as the fully connected layer) treats the input as a simple vector and produces an output in the form of a single vector (with the blob's height and width set to 1).
-
-#### Splitting
-
-The `Split` layer is a utility layer that splits an input blob to multiple output blobs. This is used when a blob is fed into multiple output layers.
-
-#### Flattening
-
-The `Flatten` layer is a utility layer that flattens an input of shape `n * c * h * w` to a simple vector output of shape `n * (c*h*w)`
-
-#### Reshape
-
-* Layer type: `Reshape`
-* Implementation: `./src/caffe/layers/reshape_layer.cpp`
-* Parameters (`ReshapeParameter reshape_param`)
-    - Optional: (also see detailed description below)
-        - `shape`
-
-* Input
-    - a single blob with arbitrary dimensions
-* Output
-    - the same blob, with modified dimensions, as specified by `reshape_param`
-
-* Sample
-
-        layer {
-          name: "reshape"
-          type: "Reshape"
-          bottom: "input"
-          top: "output"
-          reshape_param {
-            shape {
-              dim: 0  # copy the dimension from below
-              dim: 2
-              dim: 3
-              dim: -1 # infer it from the other dimensions
-            }
-          }
-        }
-
-The `Reshape` layer can be used to change the dimensions of its input, without changing its data. Just like the `Flatten` layer, only the dimensions are changed; no data is copied in the process.
-
-Output dimensions are specified by the `ReshapeParam` proto. Positive numbers are used directly, setting the corresponding dimension of the output blob. In addition, two special values are accepted for any of the target dimension values:
-
-* **0** means "copy the respective dimension of the bottom layer". That is, if the bottom has 2 as its 1st dimension, the top will have 2 as its 1st dimension as well, given `dim: 0` as the 1st target dimension.
-* **-1** stands for "infer this from the other dimensions". This behavior is similar to that of -1 in *numpy*'s or `[]` for *MATLAB*'s reshape: this dimension is calculated to keep the overall element count the same as in the bottom layer. At most one -1 can be used in a reshape operation.
-
-As another example, specifying `reshape_param { shape { dim: 0 dim: -1 } }` makes the layer behave in exactly the same way as the `Flatten` layer.
-
-#### Concatenation
-
-* Layer type: `Concat`
-* CPU implementation: `./src/caffe/layers/concat_layer.cpp`
-* CUDA GPU implementation: `./src/caffe/layers/concat_layer.cu`
-* Parameters (`ConcatParameter concat_param`)
-    - Optional
-        - `axis` [default 1]: 0 for concatenation along num and 1 for channels.
-* Input
-    - `n_i * c_i * h * w` for each input blob i from 1 to K.
-* Output
-    - if `axis = 0`: `(n_1 + n_2 + ... + n_K) * c_1 * h * w`, and all input `c_i` should be the same.
-    - if `axis = 1`: `n_1 * (c_1 + c_2 + ... + c_K) * h * w`, and all input `n_i` should be the same.
-* Sample
-
-      layer {
-        name: "concat"
-        bottom: "in1"
-        bottom: "in2"
-        top: "out"
-        type: "Concat"
-        concat_param {
-          axis: 1
-        }
-      }
-
-The `Concat` layer is a utility layer that concatenates its multiple input blobs to one single output blob.
-
-#### Slicing
-
-The `Slice` layer is a utility layer that slices an input layer to multiple output layers along a given dimension (currently num or channel only) with given slice indices.
-
-* Sample
-
-      layer {
-        name: "slicer_label"
-        type: "Slice"
-        bottom: "label"
-        ## Example of label with a shape N x 3 x 1 x 1
-        top: "label1"
-        top: "label2"
-        top: "label3"
-        slice_param {
-          axis: 1
-          slice_point: 1
-          slice_point: 2
-        }
-      }
-
-`axis` indicates the target axis; `slice_point` indicates indexes in the selected dimension (the number of indices must be equal to the number of top blobs minus one).
-
-
-#### Elementwise Operations
-
-`Eltwise`
-
-#### Argmax
-
-`ArgMax`
-
-#### Softmax
+Loss drives learning by comparing an output to a target and assigning cost to minimize. The loss itself is computed by the forward pass and the gradient w.r.t. to the loss is computed by the backward pass.
 
-`Softmax`
+Layers:
 
-#### Mean-Variance Normalization
+* [Multinomial Logistic Loss](layers/multinomiallogisticloss.html)
+* [Infogain Loss](layers/infogainloss.html) - a generalization of MultinomialLogisticLossLayer.
+* [Softmax with Loss](layers/softmaxwithloss.html) - computes the multinomial logistic loss of the softmax of its inputs. It's conceptually identical to a softmax layer followed by a multinomial logistic loss layer, but provides a more numerically stable gradient.
+* [Sum-of-Squares / Euclidean](layers/euclideanloss.html) - computes the sum of squares of differences of its two inputs, $$\frac 1 {2N} \sum_{i=1}^N \| x^1_i - x^2_i \|_2^2$$.
+* [Hinge / Margin](layers/hiddenloss.html) - The hinge loss layer computes a one-vs-all hinge (L1) or squared hinge loss (L2).
+* [Sigmoid Cross-Entropy Loss](layers/sigmoidcrossentropyloss.html) - computes the cross-entropy (logistic) loss, often used for predicting targets interpreted as probabilities.
+* [Accuracy / Top-k layer](layers/accuracy.html) - scores the output as an accuracy with respect to target -- it is not actually a loss and has no backward step.
+* [Contrastive Loss](layers/contrastiveloss.html)
 
-`MVN`
diff --git a/docs/tutorial/layers/absval.md b/docs/tutorial/layers/absval.md
new file mode 100644
index 00000000000..220c41189be
--- /dev/null
+++ b/docs/tutorial/layers/absval.md
@@ -0,0 +1,22 @@
+---
+title: Absolute Value Layer
+---
+
+# Absolute Value Layer
+
+* Layer type: `AbsVal`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1AbsValLayer.html)
+* Header: [`./include/caffe/layers/absval_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/absval_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/absval_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/absval_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/absval_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/absval_layer.cu)
+
+* Sample
+
+      layer {
+        name: "layer"
+        bottom: "in"
+        top: "out"
+        type: "AbsVal"
+      }
+
+The `AbsVal` layer computes the output as abs(x) for each input element x.
diff --git a/docs/tutorial/layers/accuracy.md b/docs/tutorial/layers/accuracy.md
new file mode 100644
index 00000000000..ecf84090e61
--- /dev/null
+++ b/docs/tutorial/layers/accuracy.md
@@ -0,0 +1,21 @@
+---
+title: Accuracy and Top-k
+---
+
+# Accuracy and Top-k
+
+`Accuracy` scores the output as the accuracy of output with respect to target -- it is not actually a loss and has no backward step.
+
+* Layer type: `Accuracy`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1AccuracyLayer.html)
+* Header: [`./include/caffe/layers/accuracy_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/accuracy_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/accuracy_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/accuracy_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/accuracy_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/accuracy_layer.cu)
+
+## Parameters
+* Parameters (`AccuracyParameter accuracy_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto)):
+
+{% highlight Protobuf %}
+{% include proto/AccuracyParameter.txt %}
+{% endhighlight %}
\ No newline at end of file
diff --git a/docs/tutorial/layers/argmax.md b/docs/tutorial/layers/argmax.md
new file mode 100644
index 00000000000..f5f173ac731
--- /dev/null
+++ b/docs/tutorial/layers/argmax.md
@@ -0,0 +1,19 @@
+---
+title: ArgMax Layer
+---
+
+# ArgMax Layer
+
+* Layer type: `ArgMax`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1ArgMaxLayer.html)
+* Header: [`./include/caffe/layers/argmax_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/argmax_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/argmax_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/argmax_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/argmax_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/argmax_layer.cu)
+
+## Parameters
+* Parameters (`ArgMaxParameter argmax_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto)):
+
+{% highlight Protobuf %}
+{% include proto/ArgMaxParameter.txt %}
+{% endhighlight %}
\ No newline at end of file
diff --git a/docs/tutorial/layers/batchnorm.md b/docs/tutorial/layers/batchnorm.md
new file mode 100644
index 00000000000..a5be5ce08bf
--- /dev/null
+++ b/docs/tutorial/layers/batchnorm.md
@@ -0,0 +1,20 @@
+---
+title: Batch Norm Layer
+---
+
+# Batch Norm Layer
+
+* Layer type: `BatchNorm`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1BatchNormLayer.html)
+* Header: [`./include/caffe/layers/batch_norm_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/batch_norm_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/batch_norm_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/batch_norm_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/batch_norm_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/batch_norm_layer.cu)
+
+## Parameters
+
+* Parameters (`BatchNormParameter batch_norm_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/BatchNormParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/batchreindex.md b/docs/tutorial/layers/batchreindex.md
new file mode 100644
index 00000000000..21b36c39ba5
--- /dev/null
+++ b/docs/tutorial/layers/batchreindex.md
@@ -0,0 +1,16 @@
+---
+title: Batch Reindex Layer
+---
+
+# Batch Reindex Layer
+
+* Layer type: `BatchReindex`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1BatchReindexLayer.html)
+* Header: [`./include/caffe/layers/batch_reindex_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/batch_reindex_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/batch_reindex_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/batch_reindex_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/batch_reindex_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/batch_reindex_layer.cu)
+
+
+## Parameters
+
+No parameters.
diff --git a/docs/tutorial/layers/bias.md b/docs/tutorial/layers/bias.md
new file mode 100644
index 00000000000..d3a00c2fc78
--- /dev/null
+++ b/docs/tutorial/layers/bias.md
@@ -0,0 +1,19 @@
+---
+title: Bias Layer
+---
+
+# Bias Layer
+
+* Layer type: `Bias`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1BiasLayer.html)
+* Header: [`./include/caffe/layers/bias_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/bias_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/bias_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/bias_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/bias_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/bias_layer.cu)
+
+## Parameters
+* Parameters (`BiasParameter bias_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto)):
+
+{% highlight Protobuf %}
+{% include proto/BiasParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/bnll.md b/docs/tutorial/layers/bnll.md
new file mode 100644
index 00000000000..2b68b79ff83
--- /dev/null
+++ b/docs/tutorial/layers/bnll.md
@@ -0,0 +1,25 @@
+---
+title: BNLL Layer
+---
+
+# BNLL Layer
+
+* Layer type: `BNLL`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1BNLLLayer.html)
+* Header: [`./include/caffe/layers/bnll_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/bnll_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/bnll_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/bnll_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/bnll_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/bnll_layer.cu)
+
+The `BNLL` (binomial normal log likelihood) layer computes the output as log(1 + exp(x)) for each input element x.
+
+## Parameters
+No parameters.
+
+## Sample
+
+      layer {
+        name: "layer"
+        bottom: "in"
+        top: "out"
+        type: BNLL
+      }
diff --git a/docs/tutorial/layers/concat.md b/docs/tutorial/layers/concat.md
new file mode 100644
index 00000000000..c7b253953d7
--- /dev/null
+++ b/docs/tutorial/layers/concat.md
@@ -0,0 +1,40 @@
+---
+title: Concat Layer
+---
+
+# Concat Layer
+
+* Layer type: `Concat`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1ConcatLayer.html)
+* Header: [`./include/caffe/layers/concat_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/concat_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/concat_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/concat_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/concat_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/concat_layer.cu)
+* Input
+    - `n_i * c_i * h * w` for each input blob i from 1 to K.
+* Output
+    - if `axis = 0`: `(n_1 + n_2 + ... + n_K) * c_1 * h * w`, and all input `c_i` should be the same.
+    - if `axis = 1`: `n_1 * (c_1 + c_2 + ... + c_K) * h * w`, and all input `n_i` should be the same.
+* Sample
+
+      layer {
+        name: "concat"
+        bottom: "in1"
+        bottom: "in2"
+        top: "out"
+        type: "Concat"
+        concat_param {
+          axis: 1
+        }
+      }
+
+The `Concat` layer is a utility layer that concatenates its multiple input blobs to one single output blob.
+
+## Parameters
+* Parameters (`ConcatParameter concat_param`)
+    - Optional
+        - `axis` [default 1]: 0 for concatenation along num and 1 for channels.
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto)):
+
+{% highlight Protobuf %}
+{% include proto/ConcatParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/contrastiveloss.md b/docs/tutorial/layers/contrastiveloss.md
new file mode 100644
index 00000000000..bb1859d9f37
--- /dev/null
+++ b/docs/tutorial/layers/contrastiveloss.md
@@ -0,0 +1,20 @@
+---
+title: Contrastive Loss Layer
+---
+
+# Contrastive Loss Layer
+
+* Layer type: `ContrastiveLoss`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1ContrastiveLossLayer.html)
+* Header: [`./include/caffe/layers/contrastive_loss_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/contrastive_loss_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/contrastive_loss_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/contrastive_loss_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/contrastive_loss_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/contrastive_loss_layer.cu)
+
+## Parameters
+
+* Parameters (`ContrastiveLossParameter contrastive_loss_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto)):
+
+{% highlight Protobuf %}
+{% include proto/ContrastiveLossParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/convolution.md b/docs/tutorial/layers/convolution.md
new file mode 100644
index 00000000000..cc9f4fd0449
--- /dev/null
+++ b/docs/tutorial/layers/convolution.md
@@ -0,0 +1,63 @@
+---
+title: Convolution Layer
+---
+
+# Convolution Layer
+
+* Layer type: `Convolution`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1ConvolutionLayer.html)
+* Header: [`./include/caffe/layers/conv_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/conv_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/conv_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/conv_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu)
+* Input
+    - `n * c_i * h_i * w_i`
+* Output
+    - `n * c_o * h_o * w_o`, where `h_o = (h_i + 2 * pad_h - kernel_h) / stride_h + 1` and `w_o` likewise.
+
+The `Convolution` layer convolves the input image with a set of learnable filters, each producing one feature map in the output image.
+
+## Sample
+
+Sample (as seen in [`./models/bvlc_reference_caffenet/train_val.prototxt`](https://github.com/BVLC/caffe/blob/master/models/bvlc_reference_caffenet/train_val.prototxt)):
+
+      layer {
+        name: "conv1"
+        type: "Convolution"
+        bottom: "data"
+        top: "conv1"
+        # learning rate and decay multipliers for the filters
+        param { lr_mult: 1 decay_mult: 1 }
+        # learning rate and decay multipliers for the biases
+        param { lr_mult: 2 decay_mult: 0 }
+        convolution_param {
+          num_output: 96     # learn 96 filters
+          kernel_size: 11    # each filter is 11x11
+          stride: 4          # step 4 pixels between each filter application
+          weight_filler {
+            type: "gaussian" # initialize the filters from a Gaussian
+            std: 0.01        # distribution with stdev 0.01 (default mean: 0)
+          }
+          bias_filler {
+            type: "constant" # initialize the biases to zero (0)
+            value: 0
+          }
+        }
+      }
+
+## Parameters
+* Parameters (`ConvolutionParameter convolution_param`)
+    - Required
+        - `num_output` (`c_o`): the number of filters
+        - `kernel_size` (or `kernel_h` and `kernel_w`): specifies height and width of each filter
+    - Strongly Recommended
+        - `weight_filler` [default `type: 'constant' value: 0`]
+    - Optional
+        - `bias_term` [default `true`]: specifies whether to learn and apply a set of additive biases to the filter outputs
+        - `pad` (or `pad_h` and `pad_w`) [default 0]: specifies the number of pixels to (implicitly) add to each side of the input
+        - `stride` (or `stride_h` and `stride_w`) [default 1]: specifies the intervals at which to apply the filters to the input
+        - `group` (g) [default 1]: If g > 1, we restrict the connectivity of each filter to a subset of the input. Specifically, the input and output channels are separated into g groups, and the $$i$$th output group channels will be only connected to the $$i$$th input group channels.
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto)):
+
+{% highlight Protobuf %}
+{% include proto/ConvolutionParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/crop.md b/docs/tutorial/layers/crop.md
new file mode 100644
index 00000000000..28f91241f74
--- /dev/null
+++ b/docs/tutorial/layers/crop.md
@@ -0,0 +1,20 @@
+---
+title: Crop Layer
+---
+
+# Crop Layer
+
+* Layer type: `Crop`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1CropLayer.html)
+* Header: [`./include/caffe/layers/crop_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/crop_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/crop_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/crop_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/crop_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/crop_layer.cu)
+
+## Parameters
+
+* Parameters (`CropParameter crop_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto)):
+
+{% highlight Protobuf %}
+{% include proto/CropParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/data.md b/docs/tutorial/layers/data.md
new file mode 100644
index 00000000000..58e0dcaab22
--- /dev/null
+++ b/docs/tutorial/layers/data.md
@@ -0,0 +1,29 @@
+---
+title: Database Layer
+---
+
+# Database Layer
+
+* Layer type: `Data`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1DataLayer.html)
+* Header: [`./include/caffe/layers/data_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/data_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/data_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/data_layer.cpp)
+
+
+## Parameters
+
+* Parameters (`DataParameter data_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto)):
+
+{% highlight Protobuf %}
+{% include proto/DataParameter.txt %}
+{% endhighlight %}
+
+* Parameters
+    - Required
+        - `source`: the name of the directory containing the database
+        - `batch_size`: the number of inputs to process at one time
+    - Optional
+        - `rand_skip`: skip up to this number of inputs at the beginning; useful for asynchronous sgd
+        - `backend` [default `LEVELDB`]: choose whether to use a `LEVELDB` or `LMDB`
+
diff --git a/docs/tutorial/layers/deconvolution.md b/docs/tutorial/layers/deconvolution.md
new file mode 100644
index 00000000000..2eff967d613
--- /dev/null
+++ b/docs/tutorial/layers/deconvolution.md
@@ -0,0 +1,22 @@
+---
+title: Deconvolution Layer
+---
+
+# Deconvolution Layer
+
+* Layer type: `Deconvolution`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1DeconvolutionLayer.html)
+* Header: [`./include/caffe/layers/deconv_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/deconv_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/deconv_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/deconv_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/deconv_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/deconv_layer.cu)
+
+## Parameters
+
+Uses the same parameters as the Convolution layer.
+
+* Parameters (`ConvolutionParameter convolution_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto)):
+
+{% highlight Protobuf %}
+{% include proto/ConvolutionParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/dropout.md b/docs/tutorial/layers/dropout.md
new file mode 100644
index 00000000000..d8c6f9556be
--- /dev/null
+++ b/docs/tutorial/layers/dropout.md
@@ -0,0 +1,20 @@
+---
+title: Dropout Layer
+---
+
+# Dropout Layer
+
+* Layer type: `Dropout`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1DropoutLayer.html)
+* Header: [`./include/caffe/layers/dropout_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/dropout_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/dropout_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/dropout_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/dropout_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/dropout_layer.cu)
+
+## Parameters
+
+* Parameters (`DropoutParameter dropout_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto)):
+
+{% highlight Protobuf %}
+{% include proto/DropoutParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/dummydata.md b/docs/tutorial/layers/dummydata.md
new file mode 100644
index 00000000000..d069f9c595e
--- /dev/null
+++ b/docs/tutorial/layers/dummydata.md
@@ -0,0 +1,20 @@
+---
+title: Dummy Data Layer
+---
+
+# Dummy Data Layer
+
+* Layer type: `DummyData`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1DummyDataLayer.html)
+* Header: [`./include/caffe/layers/dummy_data_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/dummy_data_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/dummy_data_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/dummy_data_layer.cpp)
+
+
+## Parameters
+
+* Parameters (`DummyDataParameter dummy_data_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto)):
+
+{% highlight Protobuf %}
+{% include proto/DummyDataParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/eltwise.md b/docs/tutorial/layers/eltwise.md
new file mode 100644
index 00000000000..70fe7910c5a
--- /dev/null
+++ b/docs/tutorial/layers/eltwise.md
@@ -0,0 +1,20 @@
+---
+title: Eltwise Layer
+---
+
+# Eltwise Layer
+
+* Layer type: `Eltwise`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1EltwiseLayer.html)
+* Header: [`./include/caffe/layers/eltwise_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/eltwise_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/eltwise_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/eltwise_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/eltwise_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/eltwise_layer.cu)
+
+## Parameters
+
+* Parameters (`EltwiseParameter eltwise_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto)):
+
+{% highlight Protobuf %}
+{% include proto/EltwiseParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/elu.md b/docs/tutorial/layers/elu.md
new file mode 100644
index 00000000000..11db0f0e3d6
--- /dev/null
+++ b/docs/tutorial/layers/elu.md
@@ -0,0 +1,25 @@
+---
+title: ELU Layer
+---
+
+# ELU Layer
+
+* Layer type: `ELU`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1ELULayer.html)
+* Header: [`./include/caffe/layers/elu_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/elu_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/elu_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/elu_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/elu_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/elu_layer.cu)
+
+## References
+
+* Clevert, Djork-Arne, Thomas Unterthiner, and Sepp Hochreiter.
+  "Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)" [arXiv:1511.07289](https://arxiv.org/abs/1511.07289). (2015).
+
+## Parameters
+
+* Parameters (`ELUParameter elu_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/ELUParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/embed.md b/docs/tutorial/layers/embed.md
new file mode 100644
index 00000000000..271636d8d97
--- /dev/null
+++ b/docs/tutorial/layers/embed.md
@@ -0,0 +1,20 @@
+---
+title: Embed Layer
+---
+
+# Embed Layer
+
+* Layer type: `Embed`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1EmbedLayer.html)
+* Header: [`./include/caffe/layers/embed_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/embed_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/embed_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/embed_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/embed_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/embed_layer.cu)
+
+## Parameters
+
+* Parameters (`EmbedParameter embed_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/EmbedParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/euclideanloss.md b/docs/tutorial/layers/euclideanloss.md
new file mode 100644
index 00000000000..c1b72084c14
--- /dev/null
+++ b/docs/tutorial/layers/euclideanloss.md
@@ -0,0 +1,16 @@
+---
+title: Euclidean Loss Layer
+---
+# Sum-of-Squares / Euclidean Loss Layer
+
+* Layer type: `EuclideanLoss`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1EuclideanLossLayer.html)
+* Header: [`./include/caffe/layers/euclidean_loss_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/euclidean_loss_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/euclidean_loss_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/euclidean_loss_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/euclidean_loss_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/euclidean_loss_layer.cu)
+
+The Euclidean loss layer computes the sum of squares of differences of its two inputs, $$\frac 1 {2N} \sum_{i=1}^N \| x^1_i - x^2_i \|_2^2$$.
+
+## Parameters
+
+Does not take any parameters.
diff --git a/docs/tutorial/layers/exp.md b/docs/tutorial/layers/exp.md
new file mode 100644
index 00000000000..ef2500ec214
--- /dev/null
+++ b/docs/tutorial/layers/exp.md
@@ -0,0 +1,24 @@
+---
+title: Exponential Layer
+---
+
+# Exponential Layer
+
+* Layer type: `Exp`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1ExpLayer.html)
+* Header: [`./include/caffe/layers/exp_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/exp_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/exp_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/exp_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/exp_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/exp_layer.cu)
+
+## Parameters
+
+* Parameters (`Parameter exp_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/ExpParameter.txt %}
+{% endhighlight %}
+
+## See also
+
+* [Power layer](power.html)
diff --git a/docs/tutorial/layers/filter.md b/docs/tutorial/layers/filter.md
new file mode 100644
index 00000000000..aeda9ee66f8
--- /dev/null
+++ b/docs/tutorial/layers/filter.md
@@ -0,0 +1,15 @@
+---
+title: Filter Layer
+---
+
+# Filter Layer
+
+* Layer type: `Filter`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1FilterLayer.html)
+* Header: [`./include/caffe/layers/filter_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/filter_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/filter_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/filter_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/filter_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/filter_layer.cu)
+
+## Parameters
+
+Does not take any parameters.
diff --git a/docs/tutorial/layers/flatten.md b/docs/tutorial/layers/flatten.md
new file mode 100644
index 00000000000..ecf08262707
--- /dev/null
+++ b/docs/tutorial/layers/flatten.md
@@ -0,0 +1,21 @@
+---
+title: Flatten Layer
+---
+
+# Flatten Layer
+
+* Layer type: `Flatten`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1FlattenLayer.html)
+* Header: [`./include/caffe/layers/flatten_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/flatten_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/flatten_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/flatten_layer.cpp)
+
+The `Flatten` layer is a utility layer that flattens an input of shape `n * c * h * w` to a simple vector output of shape `n * (c*h*w)`.
+
+## Parameters
+
+* Parameters (`FlattenParameter flatten_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/FlattenParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/hdf5data.md b/docs/tutorial/layers/hdf5data.md
new file mode 100644
index 00000000000..d6b7ea24d2e
--- /dev/null
+++ b/docs/tutorial/layers/hdf5data.md
@@ -0,0 +1,20 @@
+---
+title: HDF5 Data Layer
+---
+
+# HDF5 Data Layer
+
+* Layer type: `HDF5Data`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1HDF5DataLayer.html)
+* Header: [`./include/caffe/layers/hdf5_data_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/hdf5_data_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/hdf5_data_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/hdf5_data_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/hdf5_data_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/hdf5_data_layer.cu)
+
+## Parameters
+
+* Parameters (`HDF5DataParameter hdf5_data_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/HDF5DataParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/hdf5output.md b/docs/tutorial/layers/hdf5output.md
new file mode 100644
index 00000000000..cfbe4ddb771
--- /dev/null
+++ b/docs/tutorial/layers/hdf5output.md
@@ -0,0 +1,25 @@
+---
+title: HDF5 Output Layer
+---
+
+# HDF5 Output Layer
+
+* Layer type: `HDF5Output`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1HDF5OutputLayer.html)
+* Header: [`./include/caffe/layers/hdf5_output_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/hdf5_output_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/hdf5_output_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/hdf5_output_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/hdf5_output_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/hdf5_output_layer.cu)
+
+The HDF5 output layer performs the opposite function of the other layers in this section: it writes its input blobs to disk.
+
+## Parameters
+
+* Parameters (`HDF5OutputParameter hdf5_output_param`)
+    - Required
+        - `file_name`: name of file to write to
+
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/HDF5OutputParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/hingeloss.md b/docs/tutorial/layers/hingeloss.md
new file mode 100644
index 00000000000..ef4fd95e29d
--- /dev/null
+++ b/docs/tutorial/layers/hingeloss.md
@@ -0,0 +1,19 @@
+---
+title: Hinge Loss Layer
+---
+
+# Hinge (L1, L2) Loss Layer
+
+* Layer type: `HingeLoss`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1HingeLossLayer.html)
+* Header: [`./include/caffe/layers/hinge_loss_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/hinge_loss_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/hinge_loss_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/hinge_loss_layer.cpp)
+
+## Parameters
+
+* Parameters (`HingeLossParameter hinge_loss_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/HingeLossParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/im2col.md b/docs/tutorial/layers/im2col.md
new file mode 100644
index 00000000000..0badc1cdd93
--- /dev/null
+++ b/docs/tutorial/layers/im2col.md
@@ -0,0 +1,16 @@
+---
+title: Im2col Layer
+---
+
+# im2col
+
+* File type: `Im2col`
+* Header: [`./include/caffe/layers/im2col_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/im2col_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/im2col_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/im2col_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/im2col_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/im2col_layer.cu)
+
+`Im2col` is a helper for doing the image-to-column transformation that you most
+likely do not need to know about. This is used in Caffe's original convolution
+to do matrix multiplication by laying out all patches into a matrix.
+
+
diff --git a/docs/tutorial/layers/imagedata.md b/docs/tutorial/layers/imagedata.md
new file mode 100644
index 00000000000..82c8a600be3
--- /dev/null
+++ b/docs/tutorial/layers/imagedata.md
@@ -0,0 +1,27 @@
+---
+title: ImageData Layer
+---
+
+# ImageData Layer
+
+* Layer type: `ImageData`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1ImageDataLayer.html)
+* Header: [`./include/caffe/layers/image_data_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/image_data_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/image_data_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/image_data_layer.cpp)
+
+## Parameters
+
+* Parameters (`ImageDataParameter image_data_parameter`)
+    - Required
+        - `source`: name of a text file, with each line giving an image filename and label
+        - `batch_size`: number of images to batch together
+    - Optional
+        - `rand_skip`
+        - `shuffle` [default false]
+        - `new_height`, `new_width`: if provided, resize all images to this size
+
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/ImageDataParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/infogainloss.md b/docs/tutorial/layers/infogainloss.md
new file mode 100644
index 00000000000..86140b6cca7
--- /dev/null
+++ b/docs/tutorial/layers/infogainloss.md
@@ -0,0 +1,24 @@
+---
+title: Infogain Loss Layer
+---
+
+# Infogain Loss Layer
+
+* Layer type: `InfogainLoss`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1InfogainLossLayer.html)
+* Header: [`./include/caffe/layers/infogain_loss_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/infogain_loss_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/infogain_loss_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/infogain_loss_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/infogain_loss_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/infogain_loss_layer.cu)
+
+A generalization of [MultinomialLogisticLossLayer](layers/multinomiallogisticloss.md) that takes an "information gain" (infogain) matrix specifying the "value" of all label pairs.
+
+Equivalent to the [MultinomialLogisticLossLayer](layers/multinomiallogisticloss.md) if the infogain matrix is the identity.
+
+## Parameters
+
+* Parameters (`Parameter infogain_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/InfogainLossParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/innerproduct.md b/docs/tutorial/layers/innerproduct.md
new file mode 100644
index 00000000000..98b9bea81f5
--- /dev/null
+++ b/docs/tutorial/layers/innerproduct.md
@@ -0,0 +1,59 @@
+---
+title: Inner Product / Fully Connected Layer
+---
+
+# Inner Product / Fully Connected Layer
+
+* Layer type: `InnerProduct`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1InnerProductLayer.html)
+* Header: [`./include/caffe/layers/inner_product_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/inner_product_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/inner_product_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/inner_product_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/inner_product_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/inner_product_layer.cu)
+
+* Input
+    - `n * c_i * h_i * w_i`
+* Output
+    - `n * c_o * 1 * 1`
+* Sample
+
+      layer {
+        name: "fc8"
+        type: "InnerProduct"
+        # learning rate and decay multipliers for the weights
+        param { lr_mult: 1 decay_mult: 1 }
+        # learning rate and decay multipliers for the biases
+        param { lr_mult: 2 decay_mult: 0 }
+        inner_product_param {
+          num_output: 1000
+          weight_filler {
+            type: "gaussian"
+            std: 0.01
+          }
+          bias_filler {
+            type: "constant"
+            value: 0
+          }
+        }
+        bottom: "fc7"
+        top: "fc8"
+      }
+
+The `InnerProduct` layer (also usually referred to as the fully connected layer) treats the input as a simple vector and produces an output in the form of a single vector (with the blob's height and width set to 1).
+
+
+## Parameters
+
+* Parameters (`InnerProductParameter inner_product_param`)
+    - Required
+        - `num_output` (`c_o`): the number of filters
+    - Strongly recommended
+        - `weight_filler` [default `type: 'constant' value: 0`]
+    - Optional
+        - `bias_filler` [default `type: 'constant' value: 0`]
+        - `bias_term` [default `true`]: specifies whether to learn and apply a set of additive biases to the filter outputs
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/InnerProductParameter.txt %}
+{% endhighlight %}
+ 
diff --git a/docs/tutorial/layers/input.md b/docs/tutorial/layers/input.md
new file mode 100644
index 00000000000..b74c35d2fb5
--- /dev/null
+++ b/docs/tutorial/layers/input.md
@@ -0,0 +1,19 @@
+---
+title: Input Layer
+---
+
+# Input Layer
+
+* Layer type: `Input`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1InputLayer.html)
+* Header: [`./include/caffe/layers/input_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/input_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/input_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/input_layer.cpp)
+
+## Parameters
+
+* Parameters (`InputParameter input_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto)):
+
+{% highlight Protobuf %}
+{% include proto/InputParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/log.md b/docs/tutorial/layers/log.md
new file mode 100644
index 00000000000..df52037489c
--- /dev/null
+++ b/docs/tutorial/layers/log.md
@@ -0,0 +1,20 @@
+---
+title: Log Layer
+---
+
+# Log Layer
+
+* Layer type: `Log`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1LogLayer.html)
+* Header: [`./include/caffe/layers/log_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/log_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/log_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/log_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/log_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/log_layer.cu)
+
+## Parameters
+
+* Parameters (`Parameter log_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/LogParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/lrn.md b/docs/tutorial/layers/lrn.md
new file mode 100644
index 00000000000..387311c2251
--- /dev/null
+++ b/docs/tutorial/layers/lrn.md
@@ -0,0 +1,28 @@
+---
+title: Local Response Normalization (LRN)
+---
+
+# Local Response Normalization (LRN)
+
+* Layer type: `LRN`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1LRNLayer.html)
+* Header: [`./include/caffe/layers/lrn_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/lrn_layer.hpp)
+* CPU Implementation: [`./src/caffe/layers/lrn_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/lrn_layer.cpp)
+* CUDA GPU Implementation: [`./src/caffe/layers/lrn_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/lrn_layer.cu)
+* Parameters (`LRNParameter lrn_param`)
+    - Optional
+        - `local_size` [default 5]: the number of channels to sum over (for cross channel LRN) or the side length of the square region to sum over (for within channel LRN)
+        - `alpha` [default 1]: the scaling parameter (see below)
+        - `beta` [default 5]: the exponent (see below)
+        - `norm_region` [default `ACROSS_CHANNELS`]: whether to sum over adjacent channels (`ACROSS_CHANNELS`) or nearby spatial locaitons (`WITHIN_CHANNEL`)
+
+The local response normalization layer performs a kind of "lateral inhibition" by normalizing over local input regions. In `ACROSS_CHANNELS` mode, the local regions extend across nearby channels, but have no spatial extent (i.e., they have shape `local_size x 1 x 1`). In `WITHIN_CHANNEL` mode, the local regions extend spatially, but are in separate channels (i.e., they have shape `1 x local_size x local_size`). Each input value is divided by $$(1 + (\alpha/n) \sum_i x_i^2)^\beta$$, where $$n$$ is the size of each local region, and the sum is taken over the region centered at that value (zero padding is added where necessary).
+
+## Parameters
+
+* Parameters (`Parameter lrn_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/BatchNormParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/lstm.md b/docs/tutorial/layers/lstm.md
new file mode 100644
index 00000000000..8e4095e950b
--- /dev/null
+++ b/docs/tutorial/layers/lstm.md
@@ -0,0 +1,21 @@
+---
+title: LSTM Layer
+---
+
+# LSTM Layer
+
+* Layer type: `LSTM`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1LSTMLayer.html)
+* Header: [`./include/caffe/layers/lstm_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/lstm_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/lstm_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/lstm_layer.cpp)
+* CPU implementation (helper): [`./src/caffe/layers/lstm_unit_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/lstm_unit_layer.cpp)
+* CUDA GPU implementation (helper): [`./src/caffe/layers/lstm_unit_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/lstm_unit_layer.cu)
+
+## Parameters
+
+* Parameters (`Parameter recurrent_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/RecurrentParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/memorydata.md b/docs/tutorial/layers/memorydata.md
new file mode 100644
index 00000000000..754e62aef62
--- /dev/null
+++ b/docs/tutorial/layers/memorydata.md
@@ -0,0 +1,25 @@
+---
+title: Memory Data Layer
+---
+
+# Memory Data Layer
+
+* Layer type: `MemoryData`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1MemoryDataLayer.html)
+* Header: [`./include/caffe/layers/memory_data_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/memory_data_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/memory_data_layer.cpu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/memory_data_layer.cpu)
+
+The memory data layer reads data directly from memory, without copying it. In order to use it, one must call `MemoryDataLayer::Reset` (from C++) or `Net.set_input_arrays` (from Python) in order to specify a source of contiguous data (as 4D row major array), which is read one batch-sized chunk at a time.
+
+# Parameters
+
+* Parameters (`MemoryDataParameter memory_data_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/MemoryDataParameter.txt %}
+{% endhighlight %}
+
+* Parameters
+    - Required
+        - `batch_size`, `channels`, `height`, `width`: specify the size of input chunks to read from memory
diff --git a/docs/tutorial/layers/multinomiallogisticloss.md b/docs/tutorial/layers/multinomiallogisticloss.md
new file mode 100644
index 00000000000..a28ab914854
--- /dev/null
+++ b/docs/tutorial/layers/multinomiallogisticloss.md
@@ -0,0 +1,19 @@
+---
+title: Multinomial Logistic Loss Layer
+---
+
+# Multinomial Logistic Loss Layer
+
+* Layer type: `MultinomialLogisticLoss`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1MultinomialLogisticLossLayer.html)
+* Header: [`./include/caffe/layers/multinomial_logistic_loss_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/multinomial_logistic_loss_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/multinomial_logistic_loss_layer.cpu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/multinomial_logistic_loss_layer.cpu)
+
+## Parameters
+
+* Parameters (`LossParameter loss_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/LossParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/mvn.md b/docs/tutorial/layers/mvn.md
new file mode 100644
index 00000000000..08e44887d22
--- /dev/null
+++ b/docs/tutorial/layers/mvn.md
@@ -0,0 +1,20 @@
+---
+title: Mean-Variance Normalization (MVN) Layer
+---
+
+# Mean-Variance Normalization (MVN) Layer
+
+* Layer type: `MVN`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1MVNLayer.html)
+* Header: [`./include/caffe/layers/mvn_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/mvn_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/mvn_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/mvn_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/mvn_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/mvn_layer.cu)
+
+## Parameters
+
+* Parameters (`MVNParameter mvn_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/MVNParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/parameter.md b/docs/tutorial/layers/parameter.md
new file mode 100644
index 00000000000..b7e85ec5c9a
--- /dev/null
+++ b/docs/tutorial/layers/parameter.md
@@ -0,0 +1,21 @@
+---
+title: Parameter Layer
+---
+
+# Parameter Layer
+
+* Layer type: `Parameter`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1ParameterLayer.html)
+* Header: [`./include/caffe/layers/parameter_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/parameter_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/parameter_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/parameter_layer.cpp)
+
+See [https://github.com/BVLC/caffe/pull/2079](https://github.com/BVLC/caffe/pull/2079).
+
+## Parameters
+
+* Parameters (`ParameterParameter parameter_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/ParameterParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/pooling.md b/docs/tutorial/layers/pooling.md
new file mode 100644
index 00000000000..12669ee8d45
--- /dev/null
+++ b/docs/tutorial/layers/pooling.md
@@ -0,0 +1,47 @@
+---
+title: Pooling Layer
+---
+# Pooling
+
+* Layer type: `Pooling`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1PoolingLayer.html)
+* Header: [`./include/caffe/layers/pooling_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/pooling_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/pooling_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/pooling_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/pooling_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/pooling_layer.cu)
+
+* Input
+    - `n * c * h_i * w_i`
+* Output
+    - `n * c * h_o * w_o`, where h_o and w_o are computed in the same way as convolution.
+
+## Parameters
+
+* Parameters (`PoolingParameter pooling_param`)
+    - Required
+        - `kernel_size` (or `kernel_h` and `kernel_w`): specifies height and width of each filter
+    - Optional
+        - `pool` [default MAX]: the pooling method. Currently MAX, AVE, or STOCHASTIC
+        - `pad` (or `pad_h` and `pad_w`) [default 0]: specifies the number of pixels to (implicitly) add to each side of the input
+        - `stride` (or `stride_h` and `stride_w`) [default 1]: specifies the intervals at which to apply the filters to the input
+
+
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/PoolingParameter.txt %}
+{% endhighlight %}
+
+## Sample
+* Sample (as seen in [`./models/bvlc_reference_caffenet/train_val.prototxt`](https://github.com/BVLC/caffe/blob/master/models/bvlc_reference_caffenet/train_val.prototxt))
+
+      layer {
+        name: "pool1"
+        type: "Pooling"
+        bottom: "conv1"
+        top: "pool1"
+        pooling_param {
+          pool: MAX
+          kernel_size: 3 # pool over a 3x3 region
+          stride: 2      # step two pixels (in the bottom blob) between pooling regions
+        }
+      }
diff --git a/docs/tutorial/layers/power.md b/docs/tutorial/layers/power.md
new file mode 100644
index 00000000000..d6617529b7d
--- /dev/null
+++ b/docs/tutorial/layers/power.md
@@ -0,0 +1,46 @@
+---
+title: Power Layer
+---
+
+# Power Layer
+
+* Layer type: `Power`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1PowerLayer.html)
+* Header: [`./include/caffe/layers/power_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/power_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/power_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/power_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/power_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/power_layer.cu)
+
+The `Power` layer computes the output as (shift + scale * x) ^ power for each input element x.
+
+## Parameters
+* Parameters (`PowerParameter power_param`)
+    - Optional
+        - `power` [default 1]
+        - `scale` [default 1]
+        - `shift` [default 0]
+
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/PowerParameter.txt %}
+{% endhighlight %}
+ 
+ 
+ 
+## Sample
+
+      layer {
+        name: "layer"
+        bottom: "in"
+        top: "out"
+        type: "Power"
+        power_param {
+          power: 1
+          scale: 1
+          shift: 0
+        }
+      }
+
+## See also
+
+* [Exponential layer](exp.html)
diff --git a/docs/tutorial/layers/prelu.md b/docs/tutorial/layers/prelu.md
new file mode 100644
index 00000000000..e7b7b44acb6
--- /dev/null
+++ b/docs/tutorial/layers/prelu.md
@@ -0,0 +1,20 @@
+---
+title: PReLU Layer
+---
+
+# PReLU Layer
+
+* Layer type: `PReLU`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1PReLULayer.html)
+* Header: [`./include/caffe/layers/prelu_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/prelu_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/prelu_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/prelu_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/prelu_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/prelu_layer.cu)
+
+## Parameters
+
+* Parameters (`PReLUParameter prelu_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/PReLUParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/python.md b/docs/tutorial/layers/python.md
new file mode 100644
index 00000000000..2e30b3a79f6
--- /dev/null
+++ b/docs/tutorial/layers/python.md
@@ -0,0 +1,27 @@
+---
+title: Python Layer
+---
+
+# Python Layer
+
+* Layer type: `Python`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1PythonLayer.html)
+* Header: [`./include/caffe/layers/python_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/python_layer.hpp)
+
+The Python layer allows users to add customized layers without modifying the Caffe core code.
+
+## Parameters
+
+* Parameters (`PythonParameter python_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/PythonParameter.txt %}
+{% endhighlight %}
+
+## Examples and tutorials
+
+* Simple Euclidean loss example
+** [Python code](https://github.com/BVLC/caffe/blob/master/examples/pycaffe/layers/pyloss.py)
+** [Prototxt](https://github.com/BVLC/caffe/blob/master/examples/pycaffe/linreg.prototxt)
+* [Tutorial for writing Python layers with DIGITS](https://github.com/NVIDIA/DIGITS/tree/master/examples/python-layer)
diff --git a/docs/tutorial/layers/recurrent.md b/docs/tutorial/layers/recurrent.md
new file mode 100644
index 00000000000..a882b722f8a
--- /dev/null
+++ b/docs/tutorial/layers/recurrent.md
@@ -0,0 +1,20 @@
+---
+title: Recurrent Layer
+---
+
+# Recurrent Layer
+
+* Layer type: `Recurrent`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1RecurrentLayer.html)
+* Header: [`./include/caffe/layers/recurrent_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/recurrent_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/recurrent_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/recurrent_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/recurrent_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/recurrent_layer.cu)
+
+## Parameters
+
+* Parameters (`RecurrentParameter recurrent_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/RecurrentParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/reduction.md b/docs/tutorial/layers/reduction.md
new file mode 100644
index 00000000000..db55414b0af
--- /dev/null
+++ b/docs/tutorial/layers/reduction.md
@@ -0,0 +1,20 @@
+---
+title: Reduction Layer
+---
+
+# Reduction Layer
+
+* Layer type: `Reduction`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1ReductionLayer.html)
+* Header: [`./include/caffe/layers/reduction_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/reduction_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/reduction_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/reduction_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/reduction_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/reduction_layer.cu)
+
+## Parameters
+
+* Parameters (`ReductionParameter reduction_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/ReductionParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/relu.md b/docs/tutorial/layers/relu.md
new file mode 100644
index 00000000000..01aab0af4fa
--- /dev/null
+++ b/docs/tutorial/layers/relu.md
@@ -0,0 +1,32 @@
+---
+title: ReLU / Rectified-Linear and Leaky-ReLU Layer
+---
+
+# ReLU / Rectified-Linear and Leaky-ReLU Layer
+
+* Layer type: `ReLU`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1ReLULayer.html)
+* Header: [`./include/caffe/layers/relu_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/relu_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/relu_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/relu_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/relu_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/relu_layer.cu)
+* Sample (as seen in [`./models/bvlc_reference_caffenet/train_val.prototxt`](https://github.com/BVLC/caffe/blob/master/models/bvlc_reference_caffenet/train_val.prototxt))
+
+      layer {
+        name: "relu1"
+        type: "ReLU"
+        bottom: "conv1"
+        top: "conv1"
+      }
+
+Given an input value x, The `ReLU` layer computes the output as x if x > 0 and negative_slope * x if x <= 0. When the negative slope parameter is not set, it is equivalent to the standard ReLU function of taking max(x, 0). It also supports in-place computation, meaning that the bottom and the top blob could be the same to preserve memory consumption.
+
+## Parameters
+
+* Parameters (`ReLUParameter relu_param`)
+    - Optional
+        - `negative_slope` [default 0]: specifies whether to leak the negative part by multiplying it with the slope value rather than setting it to 0.
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/ReLUParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/reshape.md b/docs/tutorial/layers/reshape.md
new file mode 100644
index 00000000000..92d23f2c73e
--- /dev/null
+++ b/docs/tutorial/layers/reshape.md
@@ -0,0 +1,51 @@
+---
+title: Reshape Layer
+---
+
+# Reshape Layer
+* Layer type: `Reshape`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1ReshapeLayer.html)
+* Header: [`./include/caffe/layers/reshape_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/reshape_layer.hpp)
+* Implementation: [`./src/caffe/layers/reshape_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/reshape_layer.cpp)
+
+* Input
+    - a single blob with arbitrary dimensions
+* Output
+    - the same blob, with modified dimensions, as specified by `reshape_param`
+
+* Sample
+
+        layer {
+          name: "reshape"
+          type: "Reshape"
+          bottom: "input"
+          top: "output"
+          reshape_param {
+            shape {
+              dim: 0  # copy the dimension from below
+              dim: 2
+              dim: 3
+              dim: -1 # infer it from the other dimensions
+            }
+          }
+        }
+
+The `Reshape` layer can be used to change the dimensions of its input, without changing its data. Just like the `Flatten` layer, only the dimensions are changed; no data is copied in the process.
+
+Output dimensions are specified by the `ReshapeParam` proto. Positive numbers are used directly, setting the corresponding dimension of the output blob. In addition, two special values are accepted for any of the target dimension values:
+
+* **0** means "copy the respective dimension of the bottom layer". That is, if the bottom has 2 as its 1st dimension, the top will have 2 as its 1st dimension as well, given `dim: 0` as the 1st target dimension.
+* **-1** stands for "infer this from the other dimensions". This behavior is similar to that of -1 in *numpy*'s or `[]` for *MATLAB*'s reshape: this dimension is calculated to keep the overall element count the same as in the bottom layer. At most one -1 can be used in a reshape operation.
+
+As another example, specifying `reshape_param { shape { dim: 0 dim: -1 } }` makes the layer behave in exactly the same way as the `Flatten` layer.
+ 
+## Parameters
+
+* Parameters (`ReshapeParameter reshape_param`)
+    - Optional: (also see detailed description below)
+        - `shape`
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/ReshapeParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/rnn.md b/docs/tutorial/layers/rnn.md
new file mode 100644
index 00000000000..b6fcf47133f
--- /dev/null
+++ b/docs/tutorial/layers/rnn.md
@@ -0,0 +1,19 @@
+---
+title: RNN Layer
+---
+
+# RNN Layer
+
+* Layer type: `RNN`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1RNNLayer.html)
+* Header: [`./include/caffe/layers/rnn_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/rnn_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/rnn_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/rnn_layer.cpp)
+
+## Parameters
+
+* Parameters (`RecurrentParameter recurrent_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/RecurrentParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/scale.md b/docs/tutorial/layers/scale.md
new file mode 100644
index 00000000000..0e27549ad52
--- /dev/null
+++ b/docs/tutorial/layers/scale.md
@@ -0,0 +1,20 @@
+---
+title: Scale Layer
+---
+
+# Scale Layer
+
+* Layer type: `Scale`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1ScaleLayer.html)
+* Header: [`./include/caffe/layers/scale_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/scale_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/scale_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/scale_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/scale_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/scale_layer.cu)
+
+## Parameters
+
+* Parameters (`ScaleParameter scale_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/ScaleParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/sigmoid.md b/docs/tutorial/layers/sigmoid.md
new file mode 100644
index 00000000000..505318352c9
--- /dev/null
+++ b/docs/tutorial/layers/sigmoid.md
@@ -0,0 +1,20 @@
+---
+title: Sigmoid Layer
+---
+
+# Sigmoid Layer
+
+* Layer type: `Sigmoid`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1SigmoidLayer.html)
+* Header: [`./include/caffe/layers/sigmoid_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/sigmoid_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/sigmoid_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/sigmoid_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/sigmoid_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/sigmoid_layer.cu)
+
+## Parameters
+
+* Parameters (`SigmoidParameter sigmoid_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/SigmoidParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/sigmoidcrossentropyloss.md b/docs/tutorial/layers/sigmoidcrossentropyloss.md
new file mode 100644
index 00000000000..a6e42cadfa9
--- /dev/null
+++ b/docs/tutorial/layers/sigmoidcrossentropyloss.md
@@ -0,0 +1,13 @@
+---
+title: Sigmoid Cross-Entropy Loss Layer
+---
+
+# Sigmoid Cross-Entropy Loss Layer
+
+* Layer type: `SigmoidCrossEntropyLoss`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1SigmoidCrossEntropyLossLayer.html)
+* Header: [`./include/caffe/layers/sigmoid_cross_entropy_loss_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/sigmoid_cross_entropy_loss_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/sigmoid_cross_entropy_loss_layer.cu)
+
+To-do.
diff --git a/docs/tutorial/layers/silence.md b/docs/tutorial/layers/silence.md
new file mode 100644
index 00000000000..2c37a9cd67c
--- /dev/null
+++ b/docs/tutorial/layers/silence.md
@@ -0,0 +1,23 @@
+---
+title: Silence Layer
+---
+
+# Silence Layer
+
+* Layer type: `Silence`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1SilenceLayer.html)
+* Header: [`./include/caffe/layers/silence_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/silence_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/silence_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/silence_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/silence_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/silence_layer.cu)
+
+Silences a blob, so that it is not printed.
+
+## Parameters
+
+* Parameters (`SilenceParameter silence_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/BatchNormParameter.txt %}
+{% endhighlight %}
+
diff --git a/docs/tutorial/layers/slice.md b/docs/tutorial/layers/slice.md
new file mode 100644
index 00000000000..a492f1e82b9
--- /dev/null
+++ b/docs/tutorial/layers/slice.md
@@ -0,0 +1,42 @@
+---
+title: Slice Layer
+---
+
+# Slice Layer
+
+* Layer type: `Slice`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1SliceLayer.html)
+* Header: [`./include/caffe/layers/slice_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/slice_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/slice_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/slice_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/slice_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/slice_layer.cu)
+
+The `Slice` layer is a utility layer that slices an input layer to multiple output layers along a given dimension (currently num or channel only) with given slice indices.
+
+* Sample
+
+      layer {
+        name: "slicer_label"
+        type: "Slice"
+        bottom: "label"
+        ## Example of label with a shape N x 3 x 1 x 1
+        top: "label1"
+        top: "label2"
+        top: "label3"
+        slice_param {
+          axis: 1
+          slice_point: 1
+          slice_point: 2
+        }
+      }
+
+`axis` indicates the target axis; `slice_point` indicates indexes in the selected dimension (the number of indices must be equal to the number of top blobs minus one).
+
+## Parameters
+
+* Parameters (`SliceParameter slice_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/SliceParameter.txt %}
+{% endhighlight %}
+
diff --git a/docs/tutorial/layers/softmax.md b/docs/tutorial/layers/softmax.md
new file mode 100644
index 00000000000..e5d53425141
--- /dev/null
+++ b/docs/tutorial/layers/softmax.md
@@ -0,0 +1,24 @@
+---
+title: Softmax Layer
+---
+
+# Softmax Layer
+
+* Layer type: `Softmax`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1SoftmaxLayer.html)
+* Header: [`./include/caffe/layers/softmax_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/softmax_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/softmax_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/softmax_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/softmax_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/softmax_layer.cu)
+
+## Parameters
+
+* Parameters (`SoftmaxParameter softmax_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/SoftmaxParameter.txt %}
+{% endhighlight %}
+
+## See also
+
+* [Softmax loss layer](softmaxwithloss.html)
diff --git a/docs/tutorial/layers/softmaxwithloss.md b/docs/tutorial/layers/softmaxwithloss.md
new file mode 100644
index 00000000000..d9a6774a0ed
--- /dev/null
+++ b/docs/tutorial/layers/softmaxwithloss.md
@@ -0,0 +1,33 @@
+---
+title: Softmax with Loss Layer
+---
+
+# Softmax with Loss Layer
+
+* Layer type: `SoftmaxWithLoss`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1SoftmaxWithLossLayer.html)
+* Header: [`./include/caffe/layers/softmax_loss_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/softmax_loss_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/softmax_loss_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/softmax_loss_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/softmax_loss_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/softmax_loss_layer.cu)
+
+The softmax loss layer computes the multinomial logistic loss of the softmax of its inputs. It's conceptually identical to a softmax layer followed by a multinomial logistic loss layer, but provides a more numerically stable gradient.
+
+## Parameters
+
+* Parameters (`SoftmaxParameter softmax_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/SoftmaxParameter.txt %}
+{% endhighlight %}
+
+* Parameters (`LossParameter loss_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/LossParameter.txt %}
+{% endhighlight %}
+
+## See also
+
+* [Softmax layer](softmax.html)
diff --git a/docs/tutorial/layers/split.md b/docs/tutorial/layers/split.md
new file mode 100644
index 00000000000..4fb71d1f26b
--- /dev/null
+++ b/docs/tutorial/layers/split.md
@@ -0,0 +1,17 @@
+---
+title: Split Layer
+---
+
+# Split Layer
+
+* Layer type: `Split`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1SplitLayer.html)
+* Header: [`./include/caffe/layers/split_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/split_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/split_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/split_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/split_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/split_layer.cu)
+
+The `Split` layer is a utility layer that splits an input blob to multiple output blobs. This is used when a blob is fed into multiple output layers.
+
+## Parameters
+
+Does not take any parameters.
diff --git a/docs/tutorial/layers/spp.md b/docs/tutorial/layers/spp.md
new file mode 100644
index 00000000000..26e5862023e
--- /dev/null
+++ b/docs/tutorial/layers/spp.md
@@ -0,0 +1,20 @@
+---
+title: Spatial Pyramid Pooling Layer
+---
+
+# Spatial Pyramid Pooling Layer
+
+* Layer type: `SPP`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1SPPLayer.html)
+* Header: [`./include/caffe/layers/spp_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/spp_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/spp_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/spp_layer.cpp)
+
+
+## Parameters
+
+* Parameters (`SPPParameter spp_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/SPPParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/tanh.md b/docs/tutorial/layers/tanh.md
new file mode 100644
index 00000000000..360634596f9
--- /dev/null
+++ b/docs/tutorial/layers/tanh.md
@@ -0,0 +1,18 @@
+---
+title: TanH Layer
+---
+
+# TanH Layer
+
+* Header: [`./include/caffe/layers/tanh_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/tanh_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/tanh_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/tanh_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/tanh_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/tanh_layer.cu)
+
+## Parameters
+
+* Parameters (`TanHParameter tanh_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/TanHParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/threshold.md b/docs/tutorial/layers/threshold.md
new file mode 100644
index 00000000000..819e9e6f96d
--- /dev/null
+++ b/docs/tutorial/layers/threshold.md
@@ -0,0 +1,18 @@
+---
+title: Threshold Layer
+---
+
+# Threshold Layer
+
+* Header: [`./include/caffe/layers/threshold_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/threshold_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/threshold_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/threshold_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/threshold_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/threshold_layer.cu)
+
+## Parameters
+
+* Parameters (`ThresholdParameter threshold_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/ThresholdParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/tile.md b/docs/tutorial/layers/tile.md
new file mode 100644
index 00000000000..ea03aaa43af
--- /dev/null
+++ b/docs/tutorial/layers/tile.md
@@ -0,0 +1,20 @@
+---
+title: Tile Layer
+---
+
+# Tile Layer
+
+* Layer type: `Tile`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1TileLayer.html)
+* Header: [`./include/caffe/layers/tile_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/tile_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/tile_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/tile_layer.cpp)
+* CUDA GPU implementation: [`./src/caffe/layers/tile_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/tile_layer.cu)
+
+## Parameters
+
+* Parameters (`TileParameter tile_param`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/TileParameter.txt %}
+{% endhighlight %}
diff --git a/docs/tutorial/layers/windowdata.md b/docs/tutorial/layers/windowdata.md
new file mode 100644
index 00000000000..0cb4a8dfeb7
--- /dev/null
+++ b/docs/tutorial/layers/windowdata.md
@@ -0,0 +1,19 @@
+---
+title: WindowData Layer
+---
+
+# WindowData Layer
+
+* Layer type: `WindowData`
+* [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1WindowDataLayer.html)
+* Header: [`./include/caffe/layers/window_data_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/window_data_layer.hpp)
+* CPU implementation: [`./src/caffe/layers/window_data_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/window_data_layer.cpp)
+
+## Parameters
+
+* Parameters (`WindowDataParameter`)
+* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
+
+{% highlight Protobuf %}
+{% include proto/WindowDataParameter.txt %}
+{% endhighlight %}
diff --git a/scripts/build_docs.sh b/scripts/build_docs.sh
index 0e28bd71631..4837587ad30 100755
--- a/scripts/build_docs.sh
+++ b/scripts/build_docs.sh
@@ -12,6 +12,9 @@ cd $ROOT_DIR
 # Gather docs.
 scripts/gather_examples.sh
 
+# Split caffe.proto for inclusion by layer catalogue.
+scripts/split_caffe_proto.py
+
 # Generate developer docs.
 make docs
 
diff --git a/scripts/split_caffe_proto.py b/scripts/split_caffe_proto.py
new file mode 100755
index 00000000000..7e9dc3e7b22
--- /dev/null
+++ b/scripts/split_caffe_proto.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+import mmap
+import re
+import os
+import errno
+
+script_path = os.path.dirname(os.path.realpath(__file__))
+
+# a regex to match the parameter definitions in caffe.proto
+r = re.compile(r'(?://.*\n)*message ([^ ]*) \{\n(?: .*\n|\n)*\}')
+
+# create directory to put caffe.proto fragments
+try:
+    os.mkdir(
+        os.path.join(script_path,
+                     '../docs/_includes/'))
+    os.mkdir(
+        os.path.join(script_path,
+                     '../docs/_includes/proto/'))
+except OSError as exception:
+    if exception.errno != errno.EEXIST:
+        raise
+
+caffe_proto_fn = os.path.join(
+    script_path,
+    '../src/caffe/proto/caffe.proto')
+
+with open(caffe_proto_fn, 'r') as fin:
+
+    for m in r.finditer(fin.read(), re.MULTILINE):
+        fn = os.path.join(
+            script_path,
+            '../docs/_includes/proto/%s.txt' % m.group(1))
+        with open(fn, 'w') as fout:
+            fout.write(m.group(0))

From fb52c7ccd2b21b26621f5abe35e776736aa9db91 Mon Sep 17 00:00:00 2001
From: "Jonathan R. Williford" <jonathan.r.williford+github@gmail.com>
Date: Wed, 21 Dec 2016 11:33:42 +0100
Subject: [PATCH 168/264] Add Debian codenames and make link.

Add the Debian codenames / versions, so it is easier to tell which Debian version is which in the future when the releases are promoted.

Revise commit according to CDLuminate's comments.

Removed rolling release numbers. Mention that Debian/testing can install Caffe using the packages.
---
 docs/install_apt_debian.md | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/docs/install_apt_debian.md b/docs/install_apt_debian.md
index 745a6f4fadb..1580dc43bfc 100644
--- a/docs/install_apt_debian.md
+++ b/docs/install_apt_debian.md
@@ -4,10 +4,13 @@ title: "Installation: Debian"
 
 # Debian Installation
 
-Caffe packages are available for `Debian/unstable`. Debian/stable users
-should take a look at Ubuntu installation instruction.  
+Caffe packages are available for `Debian/unstable`. Debian/stable
+(jessie) users should take a look at [Ubuntu installation instruction](
+install_apt.html). Debian/testing (stretch) users may be able to get Caffe
+to work using the packages in Debian/unstable, but it is beyond the scope of
+this guide.
 
-Only experienced linux users are recommended to try Debian/unstable (Sid).  
+Only experienced linux users are recommended to try Debian/unstable (Sid). 
 
 Last update: Dec.21 2016  
 

From 5c437b13d2afde8f8e961e1e8a50fda060cb4519 Mon Sep 17 00:00:00 2001
From: Zhou Mo <cdluminate@gmail.com>
Date: Wed, 4 Jan 2017 02:49:11 +0000
Subject: [PATCH 169/264] docs: update debian installation guide. Thanks to
 @lukeyeager for comments.

---
 docs/install_apt_debian.md | 29 +++++++++++++++++++----------
 docs/installation.md       |  2 +-
 2 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/docs/install_apt_debian.md b/docs/install_apt_debian.md
index 1580dc43bfc..3175f6931ac 100644
--- a/docs/install_apt_debian.md
+++ b/docs/install_apt_debian.md
@@ -10,9 +10,9 @@ install_apt.html). Debian/testing (stretch) users may be able to get Caffe
 to work using the packages in Debian/unstable, but it is beyond the scope of
 this guide.
 
-Only experienced linux users are recommended to try Debian/unstable (Sid). 
+Only experienced linux users are recommended to try Debian/unstable (Sid).
 
-Last update: Dec.21 2016  
+Last update: 2017-01-04
 
 ## Debian/unstable
 
@@ -52,6 +52,7 @@ $ sudo apt build-dep [ caffe-cpu | caffe-cuda ]            # the most elegant wa
 $ apt source [ caffe-cpu | caffe-cuda ]               # download the source tarball and extract
 $ cd caffe-XXXX
 [ ... optional, customize caffe code/build ... ]
+$ dch -llocal "Modified XXX in order to XXX"          # write your one-line changelog
 $ debuild -B -j4                                      # build caffe with 4 parallel jobs (similar to make -j4)
 [ ... building ...]
 $ debc                                                # optional, if you want to check the package contents
@@ -59,6 +60,12 @@ $ sudo debi                                           # optional, install the ge
 ```
 The resulting deb packages can be found under the parent directory of the source tree.
 
+Note, the `dch ...` command line above is for bumping the package version number
+and adding an entry to the package changelog. If you would like to write
+more than one changelog entry, use subsequent `dch` command (see `man 1 dch`)
+instead of manually modifing `debian/changelog` unless you know how to keep its format correct.
+The changelog will be installed at e.g. `/usr/share/doc/caffe-cpu/changelog.Debian.gz`.
+
 ### Source installation
 
 Source installation under Debian/unstable is similar to that of Ubuntu, but
@@ -71,15 +78,13 @@ Note, this requires a `deb-src` entry in your `/etc/apt/sources.list`.
 ### Notes
 
 * Consider re-compiling OpenBLAS locally with optimization flags for sake of
-performance. This is highly recommended if you are writing a paper.
+performance. This is highly recommended for any kind of production use, including
+academic research.
 
 * If you are installing `caffe-cuda`, APT will automatically pull some of the
-CUDA packages and the nvidia driver packages. Please take care if you have
+CUDA packages and the nvidia driver packages. Please be careful if you have
 manually installed or hacked nvidia driver or CUDA toolkit or any other
-related stuff, because in this case it may fail.
-
-* If you encountered any problem when installing `caffe-*`, please report bug
-to Debian via Debian's bug tracking system. See https://www.debian.org/Bugs/ .
+related stuff, because in this case APT may fail.
 
 * Additionally, a manpage (`man caffe`) and a bash complementation script
 (`caffe <TAB><TAB>`, `caffe train <TAB><TAB>`) are provided.
@@ -88,6 +93,10 @@ Both of the two files are still not merged into caffe master.
 * The python interface is Python 3 version: `python3-caffe-{cpu,cuda}`.
 No plan to support python2.
 
+* If you encountered any problem related to the packaging system (e.g. failed to install `caffe-*`),
+please report bug to Debian via Debian's bug tracking system. See https://www.debian.org/Bugs/ .
+Patches and suggestions are also welcome.
+
 ## FAQ
 
 * where is caffe-cudnn?
@@ -96,11 +105,11 @@ CUDNN library seems not redistributable currently. If you really want the
 caffe-cudnn deb packages, the workaround is to install cudnn by yourself,
 and hack the packaging scripts, then build your customized package.
 
-* I installed the CPU version, How can I switch to the CUDA version?
+* I installed the CPU version. How can I switch to the CUDA version?
 
 `sudo apt install caffe-cuda`, apt's dependency resolver is smart enough to deal with this.
 
-* Where is the examples, the models and other documentation stuff?
+* Where are the examples, the models and other documentation stuff?
 
 ```
 sudo apt install caffe-doc
diff --git a/docs/installation.md b/docs/installation.md
index 14ec467427b..6b2cd3bdfe1 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -12,7 +12,7 @@ The official Makefile and `Makefile.config` build are complemented by a [communi
 
 - [Docker setup](https://github.com/BVLC/caffe/tree/master/docker) *out-of-the-box brewing*
 - [Ubuntu installation](install_apt.html) *the standard platform*
-- [Debian installation](install_apt_debian.html) *deploy caffe with a single command*
+- [Debian installation](install_apt_debian.html) *install caffe with a single command*
 - [OS X installation](install_osx.html)
 - [RHEL / CentOS / Fedora installation](install_yum.html)
 - [Windows](https://github.com/BVLC/caffe/tree/windows) *see the Windows branch led by Guillaume Dumont*

From 369a1f49fa7e40f39827c1dcaede224b78f6c10c Mon Sep 17 00:00:00 2001
From: Zhou Mo <cdluminate@gmail.com>
Date: Thu, 5 Jan 2017 05:00:37 +0000
Subject: [PATCH 170/264] docs: add some tables to debian install guide and
 misc update

docs: change UTF-8 characters
---
 docs/install_apt_debian.md | 70 +++++++++++++++++++++++++++++---------
 1 file changed, 54 insertions(+), 16 deletions(-)

diff --git a/docs/install_apt_debian.md b/docs/install_apt_debian.md
index 3175f6931ac..0d39e3ae22a 100644
--- a/docs/install_apt_debian.md
+++ b/docs/install_apt_debian.md
@@ -4,39 +4,51 @@ title: "Installation: Debian"
 
 # Debian Installation
 
-Caffe packages are available for `Debian/unstable`. Debian/stable
-(jessie) users should take a look at [Ubuntu installation instruction](
-install_apt.html). Debian/testing (stretch) users may be able to get Caffe
-to work using the packages in Debian/unstable, but it is beyond the scope of
-this guide.
+Caffe packages are available for several Debian versions, as shown in the
+following chart
 
-Only experienced linux users are recommended to try Debian/unstable (Sid).
+```
+Your Distro     |  CPU_ONLY  |  CUDA  |     Alias
+----------------+------------+--------+-------------------
+Debian/stable   |     ✘      |   ✘    | Debian Jessie
+Debian/testing  |     ✔      |   ☐    | Debian Stretch/Sid
+Debian/unstable |     ✔      |   ✔    | Debian Sid
+```
 
-Last update: 2017-01-04
+* `✘ ` You should take a look at [Ubuntu installation instruction](install_apt.html).
 
-## Debian/unstable
+* `✔ ` You can install caffe with a single command line following this guide.
 
-Apart from the installation methods based on source, Debian/unstable
-users can install pre-compiled Caffe packages via the official archive.
+* `☐ ` The same with `✔ `. However it will not work any more when Debian/Stretch becomes the stable branch.
 
-### Binary installation
+Last update: 2017-01-05
+
+## Binary installation with APT
+
+Apart from the installation methods based on source, Debian/unstable
+and Debian/testing users can install pre-compiled Caffe packages via the official archive.
 
 Make sure that there is something like the follows in your `/etc/apt/sources.list`:
 ```
-deb http://ftp2.cn.debian.org/debian sid main contrib non-free
+deb http://MIRROR/debian CODENAME main contrib non-free
 ```
+where `MIRROR` is your favorate Debian mirror, and `CODENAME ∈ {testing,stretch,sid}`.
+
 Then we update APT cache and directly install Caffe. Note, the cpu version and
 the cuda version cannot be installed at the same time.
 ```
 # apt update
 # apt install [ caffe-cpu | caffe-cuda ]
+# caffe                                              # command line interface working
+# python3 -c 'import caffe; print(caffe.__path__)'   # python3 interface working
 ```
 It should work out of box.
 
 #### Customizing caffe packages
 
-Some users may need to customize the Caffe package. Here is a brief
-guide of producing the customized `.deb` packages.
+Some users may need to customize the Caffe package. The way to customize
+the package is beyond this guide. Here is only a brief guide of producing
+the customized `.deb` packages. 
 
 Make sure that there is something like this in your `/etc/apt/sources.list`:
 ```
@@ -66,7 +78,7 @@ more than one changelog entry, use subsequent `dch` command (see `man 1 dch`)
 instead of manually modifing `debian/changelog` unless you know how to keep its format correct.
 The changelog will be installed at e.g. `/usr/share/doc/caffe-cpu/changelog.Debian.gz`.
 
-### Source installation
+## Source installation
 
 Source installation under Debian/unstable is similar to that of Ubuntu, but
 here is a more elegant way to pull caffe build dependencies:
@@ -75,7 +87,27 @@ $ sudo apt build-dep [ caffe-cpu | caffe-cuda ]
 ```
 Note, this requires a `deb-src` entry in your `/etc/apt/sources.list`.
 
-### Notes
+#### Compiler Combinations
+
+Some users may find their favorate compiler doesn't work well with CUDA.
+```
+CXX compiler |  CUDA 7.5  |  CUDA 8.0  |
+-------------+------------+------------+-
+GCC-7        |     ?      |     ?      |
+GCC-6        |     ✘      |     ✘      |
+GCC-5        |     ✔ [1]  |     ✔      |
+CLANG-4.0    |     ?      |     ?      |
+CLANG-3.9    |     ✘      |     ✘      |
+CLANG-3.8    |     ?      |     ✔      |
+```
+
+`[1]` CUDA 7.5 's `host_config.h` must be patched before working with GCC-5.
+
+BTW, please forget the GCC-4.X series, since its `libstdc++` ABI is not compatible with GCC-5's.
+You may encounter failure linking GCC-4.X object files against GCC-5 libraries.
+(See https://wiki.debian.org/GCC5 )
+
+## Notes
 
 * Consider re-compiling OpenBLAS locally with optimization flags for sake of
 performance. This is highly recommended for any kind of production use, including
@@ -115,3 +147,9 @@ and hack the packaging scripts, then build your customized package.
 sudo apt install caffe-doc
 dpkg -L caffe-doc
 ```
+
+* Where can I find the Debian package status?
+
+https://tracker.debian.org/pkg/caffe  (for the CPU_ONLY version)
+
+https://tracker.debian.org/pkg/caffe-contrib  (for the CUDA version)

From 2317fa19d3f5a65cb22adcbd3792ea248996744e Mon Sep 17 00:00:00 2001
From: Cyprien Noel <cyprien.noel@gmail.com>
Date: Tue, 22 Nov 2016 13:14:45 -0800
Subject: [PATCH 171/264] Logging from python, e.g. for lower log level on
 multi-GPU workers

---
 python/caffe/__init__.py |  2 +-
 python/caffe/_caffe.cpp  | 16 ++++++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/python/caffe/__init__.py b/python/caffe/__init__.py
index 35868a403a3..5fc6ec9b920 100644
--- a/python/caffe/__init__.py
+++ b/python/caffe/__init__.py
@@ -1,5 +1,5 @@
 from .pycaffe import Net, SGDSolver, NesterovSolver, AdaGradSolver, RMSPropSolver, AdaDeltaSolver, AdamSolver
-from ._caffe import set_mode_cpu, set_mode_gpu, set_device, Layer, get_solver, layer_type_list, set_random_seed
+from ._caffe import init_log, log, set_mode_cpu, set_mode_gpu, set_device, Layer, get_solver, layer_type_list, set_random_seed
 from ._caffe import __version__
 from .proto.caffe_pb2 import TRAIN, TEST
 from .classifier import Classifier
diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
index bdee75acd6c..0a86045bd46 100644
--- a/python/caffe/_caffe.cpp
+++ b/python/caffe/_caffe.cpp
@@ -51,6 +51,19 @@ const int NPY_DTYPE = NPY_FLOAT32;
 void set_mode_cpu() { Caffe::set_mode(Caffe::CPU); }
 void set_mode_gpu() { Caffe::set_mode(Caffe::GPU); }
 
+void InitLog(int level) {
+  FLAGS_logtostderr = 1;
+  FLAGS_minloglevel = level;
+  ::google::InitGoogleLogging("");
+  ::google::InstallFailureSignalHandler();
+}
+void InitLogInfo() {
+  InitLog(google::INFO);
+}
+void Log(const string& s) {
+  LOG(INFO) << s;
+}
+
 void set_random_seed(unsigned int seed) { Caffe::set_random_seed(seed); }
 
 // For convenience, check that input files can be opened, and raise an
@@ -283,6 +296,9 @@ BOOST_PYTHON_MODULE(_caffe) {
   bp::scope().attr("__version__") = AS_STRING(CAFFE_VERSION);
 
   // Caffe utility functions
+  bp::def("init_log", &InitLog);
+  bp::def("init_log", &InitLogInfo);
+  bp::def("log", &Log);
   bp::def("set_mode_cpu", &set_mode_cpu);
   bp::def("set_mode_gpu", &set_mode_gpu);
   bp::def("set_random_seed", &set_random_seed);

From 3ba20549b7f49a76cd023d19f781a6891b2c2122 Mon Sep 17 00:00:00 2001
From: Cyprien Noel <cyprien.noel@gmail.com>
Date: Fri, 6 Jan 2017 14:55:12 -0800
Subject: [PATCH 172/264] Switched multi-GPU to NCCL

---
 CMakeLists.txt                                |   1 +
 Makefile                                      |   6 +
 Makefile.config.example                       |   4 +
 cmake/Dependencies.cmake                      |  15 +-
 cmake/Modules/FindNCCL.cmake                  |  26 +
 cmake/Summary.cmake                           |   1 +
 include/caffe/blob.hpp                        |   1 +
 include/caffe/common.hpp                      |  14 +-
 include/caffe/data_reader.hpp                 |  82 ---
 include/caffe/internal_thread.hpp             |   4 +-
 include/caffe/layer.hpp                       |  43 +-
 include/caffe/layers/base_data_layer.hpp      |   5 +-
 include/caffe/layers/data_layer.hpp           |   7 +-
 include/caffe/layers/hdf5_data_layer.hpp      |   6 +-
 include/caffe/layers/python_layer.hpp         |   4 +-
 include/caffe/net.hpp                         |  40 +-
 include/caffe/parallel.hpp                    |  96 ++--
 include/caffe/solver.hpp                      |  40 +-
 include/caffe/syncedmem.hpp                   |  14 +-
 include/caffe/util/math_functions.hpp         |   5 +
 include/caffe/util/nccl.hpp                   |  37 ++
 src/caffe/blob.cpp                            |  18 +
 src/caffe/common.cpp                          |   5 +-
 src/caffe/data_reader.cpp                     | 119 ----
 src/caffe/internal_thread.cpp                 |  10 +-
 src/caffe/layer.cpp                           |  20 -
 src/caffe/layers/base_data_layer.cpp          |  45 +-
 src/caffe/layers/base_data_layer.cu           |  21 +-
 src/caffe/layers/data_layer.cpp               |  82 ++-
 src/caffe/layers/hdf5_data_layer.cpp          |  55 +-
 src/caffe/layers/hdf5_data_layer.cu           |  22 +-
 src/caffe/layers/image_data_layer.cpp         |  13 +-
 src/caffe/layers/window_data_layer.cpp        |   8 +-
 src/caffe/net.cpp                             |  47 +-
 src/caffe/parallel.cpp                        | 514 ++++++++----------
 src/caffe/proto/caffe.proto                   |   9 +-
 src/caffe/solver.cpp                          |  44 +-
 src/caffe/solvers/adagrad_solver.cpp          |   1 -
 src/caffe/solvers/nesterov_solver.cpp         |   1 -
 src/caffe/solvers/sgd_solver.cpp              |   4 +-
 src/caffe/syncedmem.cpp                       |  59 +-
 src/caffe/test/test_data_layer.cpp            |  36 ++
 src/caffe/test/test_gradient_based_solver.cpp |  34 +-
 src/caffe/test/test_hdf5data_layer.cpp        |  30 +
 src/caffe/util/blocking_queue.cpp             |   5 -
 src/caffe/util/db_lmdb.cpp                    |   2 +-
 src/caffe/util/math_functions.cu              |  20 +
 tools/caffe.cpp                               |  11 +-
 48 files changed, 813 insertions(+), 873 deletions(-)
 create mode 100644 cmake/Modules/FindNCCL.cmake
 delete mode 100644 include/caffe/data_reader.hpp
 create mode 100644 include/caffe/util/nccl.hpp
 delete mode 100644 src/caffe/data_reader.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index da7142c9b3c..3af394f7aa2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -28,6 +28,7 @@ include(cmake/ConfigGen.cmake)
 # ---[ Options
 caffe_option(CPU_ONLY  "Build Caffe without CUDA support" OFF) # TODO: rename to USE_CUDA
 caffe_option(USE_CUDNN "Build Caffe with cuDNN library support" ON IF NOT CPU_ONLY)
+caffe_option(USE_NCCL "Build Caffe with NCCL library support" OFF)
 caffe_option(BUILD_SHARED_LIBS "Build shared libraries" ON)
 caffe_option(BUILD_python "Build Python wrapper" ON)
 set(python_version "2" CACHE STRING "Specify which Python version to use")
diff --git a/Makefile b/Makefile
index ccc4d8b9e6a..65d08f7d31e 100644
--- a/Makefile
+++ b/Makefile
@@ -328,6 +328,12 @@ ifeq ($(USE_CUDNN), 1)
 	COMMON_FLAGS += -DUSE_CUDNN
 endif
 
+# NCCL acceleration configuration
+ifeq ($(USE_NCCL), 1)
+	LIBRARIES += nccl
+	COMMON_FLAGS += -DUSE_NCCL
+endif
+
 # configure IO libraries
 ifeq ($(USE_OPENCV), 1)
 	COMMON_FLAGS += -DUSE_OPENCV
diff --git a/Makefile.config.example b/Makefile.config.example
index 07bed63ae40..541cf8077d5 100644
--- a/Makefile.config.example
+++ b/Makefile.config.example
@@ -94,6 +94,10 @@ LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib
 # INCLUDE_DIRS += $(shell brew --prefix)/include
 # LIBRARY_DIRS += $(shell brew --prefix)/lib
 
+# NCCL acceleration switch (uncomment to build with NCCL)
+# https://github.com/NVIDIA/nccl (last tested version: v1.2.3-1+cuda8.0)
+# USE_NCCL := 1
+
 # Uncomment to use `pkg-config` to specify OpenCV library paths.
 # (Usually not necessary -- OpenCV libraries are normally installed in one of the above $LIBRARY_DIRS.)
 # USE_PKG_CONFIG := 1
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index ae9ce8e436d..ba28a128e6c 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -67,6 +67,13 @@ if(NOT HAVE_CUDA)
   add_definitions(-DCPU_ONLY)
 endif()
 
+if(USE_NCCL)
+  find_package(NCCL REQUIRED)
+  include_directories(SYSTEM ${NCCL_INCLUDE_DIR})
+  list(APPEND Caffe_LINKER_LIBS ${NCCL_LIBRARIES})
+  add_definitions(-DUSE_NCCL)
+endif()
+
 # ---[ OpenCV
 if(USE_OPENCV)
   find_package(OpenCV QUIET COMPONENTS core highgui imgproc imgcodecs)
@@ -119,18 +126,18 @@ if(BUILD_python)
     find_package(NumPy 1.7.1)
     # Find the matching boost python implementation
     set(version ${PYTHONLIBS_VERSION_STRING})
-    
+
     STRING( REGEX REPLACE "[^0-9]" "" boost_py_version ${version} )
     find_package(Boost 1.46 COMPONENTS "python-py${boost_py_version}")
     set(Boost_PYTHON_FOUND ${Boost_PYTHON-PY${boost_py_version}_FOUND})
-    
+
     while(NOT "${version}" STREQUAL "" AND NOT Boost_PYTHON_FOUND)
       STRING( REGEX REPLACE "([0-9.]+).[0-9]+" "\\1" version ${version} )
-      
+
       STRING( REGEX REPLACE "[^0-9]" "" boost_py_version ${version} )
       find_package(Boost 1.46 COMPONENTS "python-py${boost_py_version}")
       set(Boost_PYTHON_FOUND ${Boost_PYTHON-PY${boost_py_version}_FOUND})
-      
+
       STRING( REGEX MATCHALL "([0-9.]+).[0-9]+" has_more_version ${version} )
       if("${has_more_version}" STREQUAL "")
         break()
diff --git a/cmake/Modules/FindNCCL.cmake b/cmake/Modules/FindNCCL.cmake
new file mode 100644
index 00000000000..c8845934102
--- /dev/null
+++ b/cmake/Modules/FindNCCL.cmake
@@ -0,0 +1,26 @@
+set(NCCL_INC_PATHS
+    /usr/include
+    /usr/local/include
+    $ENV{NCCL_DIR}/include
+    )
+
+set(NCCL_LIB_PATHS
+    /lib
+    /lib64
+    /usr/lib
+    /usr/lib64
+    /usr/local/lib
+    /usr/local/lib64
+    $ENV{NCCL_DIR}/lib
+    )
+
+find_path(NCCL_INCLUDE_DIR NAMES nccl.h PATHS ${NCCL_INC_PATHS})
+find_library(NCCL_LIBRARIES NAMES nccl PATHS ${NCCL_LIB_PATHS})
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(NCCL DEFAULT_MSG NCCL_INCLUDE_DIR NCCL_LIBRARIES)
+
+if (NCCL_FOUND)
+  message(STATUS "Found NCCL    (include: ${NCCL_INCLUDE_DIR}, library: ${NCCL_LIBRARIES})")
+  mark_as_advanced(NCCL_INCLUDE_DIR NCCL_LIBRARIES)
+endif ()
diff --git a/cmake/Summary.cmake b/cmake/Summary.cmake
index ba025cf81e0..ed8c25268db 100644
--- a/cmake/Summary.cmake
+++ b/cmake/Summary.cmake
@@ -117,6 +117,7 @@ function(caffe_print_configuration_summary)
   caffe_status("  USE_OPENCV        :   ${USE_OPENCV}")
   caffe_status("  USE_LEVELDB       :   ${USE_LEVELDB}")
   caffe_status("  USE_LMDB          :   ${USE_LMDB}")
+  caffe_status("  USE_NCCL          :   ${USE_NCCL}")
   caffe_status("  ALLOW_LMDB_NOLOCK :   ${ALLOW_LMDB_NOLOCK}")
   caffe_status("")
   caffe_status("Dependencies:")
diff --git a/include/caffe/blob.hpp b/include/caffe/blob.hpp
index af360ac24bd..2f59471c29e 100644
--- a/include/caffe/blob.hpp
+++ b/include/caffe/blob.hpp
@@ -220,6 +220,7 @@ class Blob {
   void set_cpu_data(Dtype* data);
   const int* gpu_shape() const;
   const Dtype* gpu_data() const;
+  void set_gpu_data(Dtype* data);
   const Dtype* cpu_diff() const;
   const Dtype* gpu_diff() const;
   Dtype* mutable_cpu_data();
diff --git a/include/caffe/common.hpp b/include/caffe/common.hpp
index 3c6a076ec2f..4904d1d8661 100644
--- a/include/caffe/common.hpp
+++ b/include/caffe/common.hpp
@@ -158,11 +158,14 @@ class Caffe {
   // Search from start_id to the highest possible device ordinal,
   // return the ordinal of the first available device.
   static int FindDevice(const int start_id = 0);
-  // Parallel training info
+  // Parallel training
   inline static int solver_count() { return Get().solver_count_; }
   inline static void set_solver_count(int val) { Get().solver_count_ = val; }
-  inline static bool root_solver() { return Get().root_solver_; }
-  inline static void set_root_solver(bool val) { Get().root_solver_ = val; }
+  inline static int solver_rank() { return Get().solver_rank_; }
+  inline static void set_solver_rank(int val) { Get().solver_rank_ = val; }
+  inline static bool multiprocess() { return Get().multiprocess_; }
+  inline static void set_multiprocess(bool val) { Get().multiprocess_ = val; }
+  inline static bool root_solver() { return Get().solver_rank_ == 0; }
 
  protected:
 #ifndef CPU_ONLY
@@ -172,8 +175,11 @@ class Caffe {
   shared_ptr<RNG> random_generator_;
 
   Brew mode_;
+
+  // Parallel training
   int solver_count_;
-  bool root_solver_;
+  int solver_rank_;
+  bool multiprocess_;
 
  private:
   // The private constructor to avoid duplicate instantiation.
diff --git a/include/caffe/data_reader.hpp b/include/caffe/data_reader.hpp
deleted file mode 100644
index 8ed5542cb8d..00000000000
--- a/include/caffe/data_reader.hpp
+++ /dev/null
@@ -1,82 +0,0 @@
-#ifndef CAFFE_DATA_READER_HPP_
-#define CAFFE_DATA_READER_HPP_
-
-#include <map>
-#include <string>
-#include <vector>
-
-#include "caffe/common.hpp"
-#include "caffe/internal_thread.hpp"
-#include "caffe/util/blocking_queue.hpp"
-#include "caffe/util/db.hpp"
-
-namespace caffe {
-
-/**
- * @brief Reads data from a source to queues available to data layers.
- * A single reading thread is created per source, even if multiple solvers
- * are running in parallel, e.g. for multi-GPU training. This makes sure
- * databases are read sequentially, and that each solver accesses a different
- * subset of the database. Data is distributed to solvers in a round-robin
- * way to keep parallel training deterministic.
- */
-class DataReader {
- public:
-  explicit DataReader(const LayerParameter& param);
-  ~DataReader();
-
-  inline BlockingQueue<Datum*>& free() const {
-    return queue_pair_->free_;
-  }
-  inline BlockingQueue<Datum*>& full() const {
-    return queue_pair_->full_;
-  }
-
- protected:
-  // Queue pairs are shared between a body and its readers
-  class QueuePair {
-   public:
-    explicit QueuePair(int size);
-    ~QueuePair();
-
-    BlockingQueue<Datum*> free_;
-    BlockingQueue<Datum*> full_;
-
-  DISABLE_COPY_AND_ASSIGN(QueuePair);
-  };
-
-  // A single body is created per source
-  class Body : public InternalThread {
-   public:
-    explicit Body(const LayerParameter& param);
-    virtual ~Body();
-
-   protected:
-    void InternalThreadEntry();
-    void read_one(db::Cursor* cursor, QueuePair* qp);
-
-    const LayerParameter param_;
-    BlockingQueue<shared_ptr<QueuePair> > new_queue_pairs_;
-
-    friend class DataReader;
-
-  DISABLE_COPY_AND_ASSIGN(Body);
-  };
-
-  // A source is uniquely identified by its layer name + path, in case
-  // the same database is read from two different locations in the net.
-  static inline string source_key(const LayerParameter& param) {
-    return param.name() + ":" + param.data_param().source();
-  }
-
-  const shared_ptr<QueuePair> queue_pair_;
-  shared_ptr<Body> body_;
-
-  static map<const string, boost::weak_ptr<DataReader::Body> > bodies_;
-
-DISABLE_COPY_AND_ASSIGN(DataReader);
-};
-
-}  // namespace caffe
-
-#endif  // CAFFE_DATA_READER_HPP_
diff --git a/include/caffe/internal_thread.hpp b/include/caffe/internal_thread.hpp
index 6a8c5a02892..0ba67665035 100644
--- a/include/caffe/internal_thread.hpp
+++ b/include/caffe/internal_thread.hpp
@@ -42,8 +42,8 @@ class InternalThread {
   bool must_stop();
 
  private:
-  void entry(int device, Caffe::Brew mode, int rand_seed, int solver_count,
-      bool root_solver);
+  void entry(int device, Caffe::Brew mode, int rand_seed,
+      int solver_count, int solver_rank, bool multiprocess);
 
   shared_ptr<boost::thread> thread_;
 };
diff --git a/include/caffe/layer.hpp b/include/caffe/layer.hpp
index 10f353f94f9..30dbfd53758 100644
--- a/include/caffe/layer.hpp
+++ b/include/caffe/layer.hpp
@@ -38,7 +38,7 @@ class Layer {
    * layer.
    */
   explicit Layer(const LayerParameter& param)
-    : layer_param_(param), is_shared_(false) {
+    : layer_param_(param) {
       // Set phase and copy blobs (if there are any).
       phase_ = param.phase();
       if (layer_param_.blobs_size() > 0) {
@@ -66,7 +66,6 @@ class Layer {
    */
   void SetUp(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
-    InitMutex();
     CheckBlobCounts(bottom, top);
     LayerSetUp(bottom, top);
     Reshape(bottom, top);
@@ -92,30 +91,6 @@ class Layer {
   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {}
 
-  /**
-   * @brief Whether a layer should be shared by multiple nets during data
-   *        parallelism. By default, all layers except for data layers should
-   *        not be shared. data layers should be shared to ensure each worker
-   *        solver access data sequentially during data parallelism.
-   */
-  virtual inline bool ShareInParallel() const { return false; }
-
-  /** @brief Return whether this layer is actually shared by other nets.
-   *         If ShareInParallel() is true and using more than one GPU and the
-   *         net has TRAIN phase, then this function is expected return true.
-   */
-  inline bool IsShared() const { return is_shared_; }
-
-  /** @brief Set whether this layer is actually shared by other nets
-   *         If ShareInParallel() is true and using more than one GPU and the
-   *         net has TRAIN phase, then is_shared should be set true.
-   */
-  inline void SetShared(bool is_shared) {
-    CHECK(ShareInParallel() || !is_shared)
-        << type() << "Layer does not support sharing.";
-    is_shared_ = is_shared;
-  }
-
   /**
    * @brief Adjust the shapes of top blobs and internal buffers to accommodate
    *        the shapes of the bottom blobs.
@@ -428,19 +403,6 @@ class Layer {
   }
 
  private:
-  /** Whether this layer is actually shared by other nets*/
-  bool is_shared_;
-
-  /** The mutex for sequential forward if this layer is shared */
-  shared_ptr<boost::mutex> forward_mutex_;
-
-  /** Initialize forward_mutex_ */
-  void InitMutex();
-  /** Lock forward_mutex_ if this layer is shared */
-  void Lock();
-  /** Unlock forward_mutex_ if this layer is shared */
-  void Unlock();
-
   DISABLE_COPY_AND_ASSIGN(Layer);
 };  // class Layer
 
@@ -450,8 +412,6 @@ class Layer {
 template <typename Dtype>
 inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom,
     const vector<Blob<Dtype>*>& top) {
-  // Lock during forward to ensure sequential forward
-  Lock();
   Dtype loss = 0;
   Reshape(bottom, top);
   switch (Caffe::mode()) {
@@ -482,7 +442,6 @@ inline Dtype Layer<Dtype>::Forward(const vector<Blob<Dtype>*>& bottom,
   default:
     LOG(FATAL) << "Unknown caffe mode.";
   }
-  Unlock();
   return loss;
 }
 
diff --git a/include/caffe/layers/base_data_layer.hpp b/include/caffe/layers/base_data_layer.hpp
index 2c49b73184b..925b019d460 100644
--- a/include/caffe/layers/base_data_layer.hpp
+++ b/include/caffe/layers/base_data_layer.hpp
@@ -68,15 +68,16 @@ class BasePrefetchingDataLayer :
       const vector<Blob<Dtype>*>& top);
 
   // Prefetches batches (asynchronously if to GPU memory)
-  static const int PREFETCH_COUNT = 3;
+  static const int PREFETCH_COUNT = 4;  // same as proto
 
  protected:
   virtual void InternalThreadEntry();
   virtual void load_batch(Batch<Dtype>* batch) = 0;
 
-  Batch<Dtype> prefetch_[PREFETCH_COUNT];
+  vector<shared_ptr<Batch<Dtype> > > prefetch_;
   BlockingQueue<Batch<Dtype>*> prefetch_free_;
   BlockingQueue<Batch<Dtype>*> prefetch_full_;
+  Batch<Dtype>* prefetch_current_;
 
   Blob<Dtype> transformed_data_;
 };
diff --git a/include/caffe/layers/data_layer.hpp b/include/caffe/layers/data_layer.hpp
index 6c361791a0c..dec58180976 100644
--- a/include/caffe/layers/data_layer.hpp
+++ b/include/caffe/layers/data_layer.hpp
@@ -4,7 +4,6 @@
 #include <vector>
 
 #include "caffe/blob.hpp"
-#include "caffe/data_reader.hpp"
 #include "caffe/data_transformer.hpp"
 #include "caffe/internal_thread.hpp"
 #include "caffe/layer.hpp"
@@ -29,9 +28,13 @@ class DataLayer : public BasePrefetchingDataLayer<Dtype> {
   virtual inline int MaxTopBlobs() const { return 2; }
 
  protected:
+  void Next();
+  bool Skip();
   virtual void load_batch(Batch<Dtype>* batch);
 
-  DataReader reader_;
+  shared_ptr<db::DB> db_;
+  shared_ptr<db::Cursor> cursor_;
+  uint64_t offset_;
 };
 
 }  // namespace caffe
diff --git a/include/caffe/layers/hdf5_data_layer.hpp b/include/caffe/layers/hdf5_data_layer.hpp
index b04cf8e1940..650a3fb0c87 100644
--- a/include/caffe/layers/hdf5_data_layer.hpp
+++ b/include/caffe/layers/hdf5_data_layer.hpp
@@ -23,7 +23,7 @@ template <typename Dtype>
 class HDF5DataLayer : public Layer<Dtype> {
  public:
   explicit HDF5DataLayer(const LayerParameter& param)
-      : Layer<Dtype>(param) {}
+      : Layer<Dtype>(param), offset_() {}
   virtual ~HDF5DataLayer();
   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top);
@@ -38,6 +38,9 @@ class HDF5DataLayer : public Layer<Dtype> {
   virtual inline int MinTopBlobs() const { return 1; }
 
  protected:
+  void Next();
+  bool Skip();
+
   virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top);
   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
@@ -55,6 +58,7 @@ class HDF5DataLayer : public Layer<Dtype> {
   std::vector<shared_ptr<Blob<Dtype> > > hdf_blobs_;
   std::vector<unsigned int> data_permutation_;
   std::vector<unsigned int> file_permutation_;
+  uint64_t offset_;
 };
 
 }  // namespace caffe
diff --git a/include/caffe/layers/python_layer.hpp b/include/caffe/layers/python_layer.hpp
index 66dbbdf13b8..529b09cb88b 100644
--- a/include/caffe/layers/python_layer.hpp
+++ b/include/caffe/layers/python_layer.hpp
@@ -21,8 +21,8 @@ class PythonLayer : public Layer<Dtype> {
     // Disallow PythonLayer in MultiGPU training stage, due to GIL issues
     // Details: https://github.com/BVLC/caffe/issues/2936
     if (this->phase_ == TRAIN && Caffe::solver_count() > 1
-        && !ShareInParallel()) {
-      LOG(FATAL) << "PythonLayer is not implemented in Multi-GPU training";
+        && !Caffe::root_solver() && !Caffe::multiprocess()) {
+      LOG(FATAL) << "PythonLayer does not support CLI Multi-GPU, use train.py";
     }
     self_.attr("param_str") = bp::str(
         this->layer_param_.python_param().param_str());
diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp
index 493bdf294e2..d3c9306e9cf 100644
--- a/include/caffe/net.hpp
+++ b/include/caffe/net.hpp
@@ -23,10 +23,9 @@ namespace caffe {
 template <typename Dtype>
 class Net {
  public:
-  explicit Net(const NetParameter& param, const Net* root_net = NULL);
+  explicit Net(const NetParameter& param);
   explicit Net(const string& param_file, Phase phase,
-      const int level = 0, const vector<string>* stages = NULL,
-      const Net* root_net = NULL);
+      const int level = 0, const vector<string>* stages = NULL);
   virtual ~Net() {}
 
   /// @brief Initialize a network with a NetParameter.
@@ -228,6 +227,31 @@ class Net {
   static bool StateMeetsRule(const NetState& state, const NetStateRule& rule,
       const string& layer_name);
 
+  // Invoked at specific points during an iteration
+  class Callback {
+   protected:
+    virtual void run(int layer) = 0;
+
+    template <typename T>
+    friend class Net;
+  };
+  const vector<Callback*>& before_forward() const { return before_forward_; }
+  void add_before_forward(Callback* value) {
+    before_forward_.push_back(value);
+  }
+  const vector<Callback*>& after_forward() const { return after_forward_; }
+  void add_after_forward(Callback* value) {
+    after_forward_.push_back(value);
+  }
+  const vector<Callback*>& before_backward() const { return before_backward_; }
+  void add_before_backward(Callback* value) {
+    before_backward_.push_back(value);
+  }
+  const vector<Callback*>& after_backward() const { return after_backward_; }
+  void add_after_backward(Callback* value) {
+    after_backward_.push_back(value);
+  }
+
  protected:
   // Helpers for Init.
   /// @brief Append a new top blob to the net.
@@ -306,9 +330,13 @@ class Net {
   size_t memory_used_;
   /// Whether to compute and display debug info for the net.
   bool debug_info_;
-  /// The root net that actually holds the shared layers in data parallelism
-  const Net* const root_net_;
-  DISABLE_COPY_AND_ASSIGN(Net);
+  // Callbacks
+  vector<Callback*> before_forward_;
+  vector<Callback*> after_forward_;
+  vector<Callback*> before_backward_;
+  vector<Callback*> after_backward_;
+
+DISABLE_COPY_AND_ASSIGN(Net);
 };
 
 
diff --git a/include/caffe/parallel.hpp b/include/caffe/parallel.hpp
index 6c496c884e3..64bb48e6b02 100644
--- a/include/caffe/parallel.hpp
+++ b/include/caffe/parallel.hpp
@@ -1,8 +1,11 @@
 #ifndef CAFFE_PARALLEL_HPP_
 #define CAFFE_PARALLEL_HPP_
 
-#include <boost/date_time/posix_time/posix_time.hpp>
+#ifdef USE_NCCL
 
+#include <boost/thread.hpp>
+
+#include <string>
 #include <vector>
 
 #include "caffe/blob.hpp"
@@ -13,6 +16,7 @@
 #include "caffe/solver.hpp"
 #include "caffe/syncedmem.hpp"
 #include "caffe/util/blocking_queue.hpp"
+#include "caffe/util/nccl.hpp"
 
 namespace caffe {
 
@@ -51,7 +55,7 @@ class GPUParams : public Params<Dtype> {
   GPUParams(shared_ptr<Solver<Dtype> > root_solver, int device);
   virtual ~GPUParams();
 
-  void configure(Solver<Dtype>* solver) const;
+  void Configure(Solver<Dtype>* solver) const;
 
  protected:
   using Params<Dtype>::size_;
@@ -59,58 +63,55 @@ class GPUParams : public Params<Dtype> {
   using Params<Dtype>::diff_;
 };
 
-class DevicePair {
- public:
-  DevicePair(int parent, int device)
-      : parent_(parent),
-        device_(device) {
-  }
-  inline int parent() {
-    return parent_;
-  }
-  inline int device() {
-    return device_;
-  }
-
-  // Group GPUs in pairs, by proximity depending on machine's topology
-  static void compute(const vector<int> devices, vector<DevicePair>* pairs);
-
- protected:
-  int parent_;
-  int device_;
-};
-
-// Synchronous data parallelism using map-reduce between local GPUs.
 template<typename Dtype>
-class P2PSync : public GPUParams<Dtype>, public Solver<Dtype>::Callback,
-    public InternalThread {
+class NCCL : public GPUParams<Dtype>,
+             public Solver<Dtype>::Callback,
+             public Net<Dtype>::Callback {
  public:
-  explicit P2PSync(shared_ptr<Solver<Dtype> > root_solver,
-                   P2PSync<Dtype>* parent, const SolverParameter& param);
-  virtual ~P2PSync();
-
-  inline const shared_ptr<Solver<Dtype> >& solver() const {
-    return solver_;
-  }
-
-  void Run(const vector<int>& gpus);
-  void Prepare(const vector<int>& gpus,
-               vector<shared_ptr<P2PSync<Dtype> > >* syncs);
-  inline const int initial_iter() const { return initial_iter_; }
+  /**
+   * Single process version.
+   */
+  explicit NCCL(shared_ptr<Solver<Dtype> > solver);
+  /**
+   * In multi-process settings, first create a NCCL id (new_uid), then
+   * pass it to each process to create connected instances.
+   */
+  NCCL(shared_ptr<Solver<Dtype> > solver, const string& uid);
+  ~NCCL();
+
+  boost::barrier* barrier();
+  void set_barrier(boost::barrier* value);
+
+  /**
+   * In single process settings, create instances without uids and
+   * call this to connect them.
+   */
+  static void InitSingleProcess(vector<NCCL<Dtype>*>* nccls);
+
+  static string new_uid();
+
+  /**
+   * Broadcast weights from rank 0 other solvers.
+   */
+  void Broadcast();
+
+  /**
+   * Single process multi-GPU.
+   */
+  void Run(const vector<int>& gpus, const char* restore);
 
  protected:
-  void on_start();
+  void Init();
+  void on_start() {}
+  void run(int layer);  // Net callback
   void on_gradients_ready();
 
-  void InternalThreadEntry();
+  ncclComm_t comm_;
+  cudaStream_t stream_;
 
-  P2PSync<Dtype>* parent_;
-  vector<P2PSync<Dtype>*> children_;
-  BlockingQueue<P2PSync<Dtype>*> queue_;
-  const int initial_iter_;
-  Dtype* parent_grads_;
   shared_ptr<Solver<Dtype> > solver_;
-
+  // Should not be necessary, https://github.com/NVIDIA/nccl/issues/37
+  boost::barrier* barrier_;
   using Params<Dtype>::size_;
   using Params<Dtype>::data_;
   using Params<Dtype>::diff_;
@@ -118,4 +119,5 @@ class P2PSync : public GPUParams<Dtype>, public Solver<Dtype>::Callback,
 
 }  // namespace caffe
 
-#endif
+#endif  // USE_NCCL
+#endif  // header
diff --git a/include/caffe/solver.hpp b/include/caffe/solver.hpp
index eafcee32904..a28d8cb897e 100644
--- a/include/caffe/solver.hpp
+++ b/include/caffe/solver.hpp
@@ -6,6 +6,7 @@
 
 #include "caffe/net.hpp"
 #include "caffe/solver_factory.hpp"
+#include "caffe/util/benchmark.hpp"
 
 namespace caffe {
 
@@ -40,9 +41,8 @@ typedef boost::function<SolverAction::Enum()> ActionCallback;
 template <typename Dtype>
 class Solver {
  public:
-  explicit Solver(const SolverParameter& param,
-      const Solver* root_solver = NULL);
-  explicit Solver(const string& param_file, const Solver* root_solver = NULL);
+  explicit Solver(const SolverParameter& param);
+  explicit Solver(const string& param_file);
   void Init(const SolverParameter& param);
   void InitTrainNet();
   void InitTestNets();
@@ -72,7 +72,7 @@ class Solver {
   inline const vector<shared_ptr<Net<Dtype> > >& test_nets() {
     return test_nets_;
   }
-  int iter() { return iter_; }
+  int iter() const { return iter_; }
 
   // Invoked at specific points during an iteration
   class Callback {
@@ -118,10 +118,6 @@ class Solver {
   vector<Dtype> losses_;
   Dtype smoothed_loss_;
 
-  // The root solver that holds root nets (actually containing shared layers)
-  // in data parallelism
-  const Solver* const root_solver_;
-
   // A function that can be set by a client of the Solver to provide indication
   // that it wants a snapshot saved and/or to exit early.
   ActionCallback action_request_function_;
@@ -129,31 +125,11 @@ class Solver {
   // True iff a request to stop early was received.
   bool requested_early_exit_;
 
-  DISABLE_COPY_AND_ASSIGN(Solver);
-};
+  // Timing information, handy to tune e.g. nbr of GPUs
+  Timer iteration_timer_;
+  float iterations_last_;
 
-/**
- * @brief Solver that only computes gradients, used as worker
- *        for multi-GPU training.
- */
-template <typename Dtype>
-class WorkerSolver : public Solver<Dtype> {
- public:
-  explicit WorkerSolver(const SolverParameter& param,
-      const Solver<Dtype>* root_solver = NULL)
-      : Solver<Dtype>(param, root_solver) {}
-
- protected:
-  void ApplyUpdate() {}
-  void SnapshotSolverState(const string& model_filename) {
-    LOG(FATAL) << "Should not be called on worker solver.";
-  }
-  void RestoreSolverStateFromBinaryProto(const string& state_file) {
-    LOG(FATAL) << "Should not be called on worker solver.";
-  }
-  void RestoreSolverStateFromHDF5(const string& state_file) {
-    LOG(FATAL) << "Should not be called on worker solver.";
-  }
+  DISABLE_COPY_AND_ASSIGN(Solver);
 };
 
 }  // namespace caffe
diff --git a/include/caffe/syncedmem.hpp b/include/caffe/syncedmem.hpp
index 38ee4664028..a41066bacd9 100644
--- a/include/caffe/syncedmem.hpp
+++ b/include/caffe/syncedmem.hpp
@@ -44,14 +44,8 @@ inline void CaffeFreeHost(void* ptr, bool use_cuda) {
  */
 class SyncedMemory {
  public:
-  SyncedMemory()
-      : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(0), head_(UNINITIALIZED),
-        own_cpu_data_(false), cpu_malloc_use_cuda_(false), own_gpu_data_(false),
-        gpu_device_(-1) {}
-  explicit SyncedMemory(size_t size)
-      : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(size), head_(UNINITIALIZED),
-        own_cpu_data_(false), cpu_malloc_use_cuda_(false), own_gpu_data_(false),
-        gpu_device_(-1) {}
+  SyncedMemory();
+  explicit SyncedMemory(size_t size);
   ~SyncedMemory();
   const void* cpu_data();
   void set_cpu_data(void* data);
@@ -68,6 +62,8 @@ class SyncedMemory {
 #endif
 
  private:
+  void check_device();
+
   void to_cpu();
   void to_gpu();
   void* cpu_ptr_;
@@ -77,7 +73,7 @@ class SyncedMemory {
   bool own_cpu_data_;
   bool cpu_malloc_use_cuda_;
   bool own_gpu_data_;
-  int gpu_device_;
+  int device_;
 
   DISABLE_COPY_AND_ASSIGN(SyncedMemory);
 };  // class SyncedMemory
diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp
index 6f6d3feeae2..51068fe2b80 100644
--- a/include/caffe/util/math_functions.hpp
+++ b/include/caffe/util/math_functions.hpp
@@ -185,6 +185,11 @@ void caffe_gpu_add_scalar(const int N, const Dtype alpha, Dtype *X);
 template <typename Dtype>
 void caffe_gpu_scal(const int N, const Dtype alpha, Dtype *X);
 
+#ifndef CPU_ONLY
+template <typename Dtype>
+void caffe_gpu_scal(const int N, const Dtype alpha, Dtype* X, cudaStream_t str);
+#endif
+
 template <typename Dtype>
 void caffe_gpu_add(const int N, const Dtype* a, const Dtype* b, Dtype* y);
 
diff --git a/include/caffe/util/nccl.hpp b/include/caffe/util/nccl.hpp
new file mode 100644
index 00000000000..e01fb7451e8
--- /dev/null
+++ b/include/caffe/util/nccl.hpp
@@ -0,0 +1,37 @@
+#ifndef CAFFE_UTIL_NCCL_H_
+#define CAFFE_UTIL_NCCL_H_
+#ifdef USE_NCCL
+
+#include <nccl.h>
+
+#include "caffe/common.hpp"
+
+#define NCCL_CHECK(condition) \
+{ \
+  ncclResult_t result = condition; \
+  CHECK_EQ(result, ncclSuccess) << " " \
+    << ncclGetErrorString(result); \
+}
+
+namespace caffe {
+
+namespace nccl {
+
+template <typename Dtype> class dataType;
+
+template<> class dataType<float> {
+ public:
+  static const ncclDataType_t type = ncclFloat;
+};
+template<> class dataType<double> {
+ public:
+  static const ncclDataType_t type = ncclDouble;
+};
+
+}  // namespace nccl
+
+}  // namespace caffe
+
+#endif  // end USE_NCCL
+
+#endif  // CAFFE_UTIL_NCCL_H_
diff --git a/src/caffe/blob.cpp b/src/caffe/blob.cpp
index 4a34e4c5856..603e52f7025 100644
--- a/src/caffe/blob.cpp
+++ b/src/caffe/blob.cpp
@@ -89,6 +89,12 @@ const Dtype* Blob<Dtype>::cpu_data() const {
 template <typename Dtype>
 void Blob<Dtype>::set_cpu_data(Dtype* data) {
   CHECK(data);
+  // Make sure CPU and GPU sizes remain equal
+  size_t size = count_ * sizeof(Dtype);
+  if (data_->size() != size) {
+    data_.reset(new SyncedMemory(size));
+    diff_.reset(new SyncedMemory(size));
+  }
   data_->set_cpu_data(data);
 }
 
@@ -98,6 +104,18 @@ const Dtype* Blob<Dtype>::gpu_data() const {
   return (const Dtype*)data_->gpu_data();
 }
 
+template <typename Dtype>
+void Blob<Dtype>::set_gpu_data(Dtype* data) {
+  CHECK(data);
+  // Make sure CPU and GPU sizes remain equal
+  size_t size = count_ * sizeof(Dtype);
+  if (data_->size() != size) {
+    data_.reset(new SyncedMemory(size));
+    diff_.reset(new SyncedMemory(size));
+  }
+  data_->set_gpu_data(data);
+}
+
 template <typename Dtype>
 const Dtype* Blob<Dtype>::cpu_diff() const {
   CHECK(diff_);
diff --git a/src/caffe/common.cpp b/src/caffe/common.cpp
index dee681654aa..4f6f9bccc38 100644
--- a/src/caffe/common.cpp
+++ b/src/caffe/common.cpp
@@ -53,7 +53,7 @@ void GlobalInit(int* pargc, char*** pargv) {
 
 Caffe::Caffe()
     : random_generator_(), mode_(Caffe::CPU),
-      solver_count_(1), root_solver_(true) { }
+      solver_count_(1), solver_rank_(0), multiprocess_(false) { }
 
 Caffe::~Caffe() { }
 
@@ -106,7 +106,8 @@ void* Caffe::RNG::generator() {
 
 Caffe::Caffe()
     : cublas_handle_(NULL), curand_generator_(NULL), random_generator_(),
-    mode_(Caffe::CPU), solver_count_(1), root_solver_(true) {
+    mode_(Caffe::CPU),
+    solver_count_(1), solver_rank_(0), multiprocess_(false) {
   // Try to create a cublas handler, and report an error if failed (but we will
   // keep the program running as one might just want to run CPU code).
   if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) {
diff --git a/src/caffe/data_reader.cpp b/src/caffe/data_reader.cpp
deleted file mode 100644
index 9f019bbfcb7..00000000000
--- a/src/caffe/data_reader.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-#include <boost/thread.hpp>
-#include <map>
-#include <string>
-#include <vector>
-
-#include "caffe/common.hpp"
-#include "caffe/data_reader.hpp"
-#include "caffe/layers/data_layer.hpp"
-#include "caffe/proto/caffe.pb.h"
-
-namespace caffe {
-
-using boost::weak_ptr;
-
-map<const string, weak_ptr<DataReader::Body> > DataReader::bodies_;
-static boost::mutex bodies_mutex_;
-
-DataReader::DataReader(const LayerParameter& param)
-    : queue_pair_(new QueuePair(  //
-        param.data_param().prefetch() * param.data_param().batch_size())) {
-  // Get or create a body
-  boost::mutex::scoped_lock lock(bodies_mutex_);
-  string key = source_key(param);
-  weak_ptr<Body>& weak = bodies_[key];
-  body_ = weak.lock();
-  if (!body_) {
-    body_.reset(new Body(param));
-    bodies_[key] = weak_ptr<Body>(body_);
-  }
-  body_->new_queue_pairs_.push(queue_pair_);
-}
-
-DataReader::~DataReader() {
-  string key = source_key(body_->param_);
-  body_.reset();
-  boost::mutex::scoped_lock lock(bodies_mutex_);
-  if (bodies_[key].expired()) {
-    bodies_.erase(key);
-  }
-}
-
-//
-
-DataReader::QueuePair::QueuePair(int size) {
-  // Initialize the free queue with requested number of datums
-  for (int i = 0; i < size; ++i) {
-    free_.push(new Datum());
-  }
-}
-
-DataReader::QueuePair::~QueuePair() {
-  Datum* datum;
-  while (free_.try_pop(&datum)) {
-    delete datum;
-  }
-  while (full_.try_pop(&datum)) {
-    delete datum;
-  }
-}
-
-//
-
-DataReader::Body::Body(const LayerParameter& param)
-    : param_(param),
-      new_queue_pairs_() {
-  StartInternalThread();
-}
-
-DataReader::Body::~Body() {
-  StopInternalThread();
-}
-
-void DataReader::Body::InternalThreadEntry() {
-  shared_ptr<db::DB> db(db::GetDB(param_.data_param().backend()));
-  db->Open(param_.data_param().source(), db::READ);
-  shared_ptr<db::Cursor> cursor(db->NewCursor());
-  vector<shared_ptr<QueuePair> > qps;
-  try {
-    int solver_count = param_.phase() == TRAIN ? Caffe::solver_count() : 1;
-
-    // To ensure deterministic runs, only start running once all solvers
-    // are ready. But solvers need to peek on one item during initialization,
-    // so read one item, then wait for the next solver.
-    for (int i = 0; i < solver_count; ++i) {
-      shared_ptr<QueuePair> qp(new_queue_pairs_.pop());
-      read_one(cursor.get(), qp.get());
-      qps.push_back(qp);
-    }
-    // Main loop
-    while (!must_stop()) {
-      for (int i = 0; i < solver_count; ++i) {
-        read_one(cursor.get(), qps[i].get());
-      }
-      // Check no additional readers have been created. This can happen if
-      // more than one net is trained at a time per process, whether single
-      // or multi solver. It might also happen if two data layers have same
-      // name and same source.
-      CHECK_EQ(new_queue_pairs_.size(), 0);
-    }
-  } catch (boost::thread_interrupted&) {
-    // Interrupted exception is expected on shutdown
-  }
-}
-
-void DataReader::Body::read_one(db::Cursor* cursor, QueuePair* qp) {
-  Datum* datum = qp->free_.pop();
-  // TODO deserialize in-place instead of copy?
-  datum->ParseFromString(cursor->value());
-  qp->full_.push(datum);
-
-  // go to the next iter
-  cursor->Next();
-  if (!cursor->valid()) {
-    DLOG(INFO) << "Restarting data prefetching from start.";
-    cursor->SeekToFirst();
-  }
-}
-
-}  // namespace caffe
diff --git a/src/caffe/internal_thread.cpp b/src/caffe/internal_thread.cpp
index 104884e0295..11de4979935 100644
--- a/src/caffe/internal_thread.cpp
+++ b/src/caffe/internal_thread.cpp
@@ -28,25 +28,27 @@ void InternalThread::StartInternalThread() {
   Caffe::Brew mode = Caffe::mode();
   int rand_seed = caffe_rng_rand();
   int solver_count = Caffe::solver_count();
-  bool root_solver = Caffe::root_solver();
+  int solver_rank = Caffe::solver_rank();
+  bool multiprocess = Caffe::multiprocess();
 
   try {
     thread_.reset(new boost::thread(&InternalThread::entry, this, device, mode,
-          rand_seed, solver_count, root_solver));
+          rand_seed, solver_count, solver_rank, multiprocess));
   } catch (std::exception& e) {
     LOG(FATAL) << "Thread exception: " << e.what();
   }
 }
 
 void InternalThread::entry(int device, Caffe::Brew mode, int rand_seed,
-    int solver_count, bool root_solver) {
+    int solver_count, int solver_rank, bool multiprocess) {
 #ifndef CPU_ONLY
   CUDA_CHECK(cudaSetDevice(device));
 #endif
   Caffe::set_mode(mode);
   Caffe::set_random_seed(rand_seed);
   Caffe::set_solver_count(solver_count);
-  Caffe::set_root_solver(root_solver);
+  Caffe::set_solver_rank(solver_rank);
+  Caffe::set_multiprocess(multiprocess);
 
   InternalThreadEntry();
 }
diff --git a/src/caffe/layer.cpp b/src/caffe/layer.cpp
index 3b9128986ae..684ae88bb49 100644
--- a/src/caffe/layer.cpp
+++ b/src/caffe/layer.cpp
@@ -1,27 +1,7 @@
-#include <boost/thread.hpp>
 #include "caffe/layer.hpp"
 
 namespace caffe {
 
-template <typename Dtype>
-void Layer<Dtype>::InitMutex() {
-  forward_mutex_.reset(new boost::mutex());
-}
-
-template <typename Dtype>
-void Layer<Dtype>::Lock() {
-  if (IsShared()) {
-    forward_mutex_->lock();
-  }
-}
-
-template <typename Dtype>
-void Layer<Dtype>::Unlock() {
-  if (IsShared()) {
-    forward_mutex_->unlock();
-  }
-}
-
 INSTANTIATE_CLASS(Layer);
 
 }  // namespace caffe
diff --git a/src/caffe/layers/base_data_layer.cpp b/src/caffe/layers/base_data_layer.cpp
index 989319f1a07..9414f6f98b2 100644
--- a/src/caffe/layers/base_data_layer.cpp
+++ b/src/caffe/layers/base_data_layer.cpp
@@ -36,9 +36,12 @@ template <typename Dtype>
 BasePrefetchingDataLayer<Dtype>::BasePrefetchingDataLayer(
     const LayerParameter& param)
     : BaseDataLayer<Dtype>(param),
-      prefetch_free_(), prefetch_full_() {
-  for (int i = 0; i < PREFETCH_COUNT; ++i) {
-    prefetch_free_.push(&prefetch_[i]);
+      prefetch_(param.has_data_param() ?
+                param.data_param().prefetch() : PREFETCH_COUNT),
+      prefetch_free_(), prefetch_full_(), prefetch_current_() {
+  for (int i = 0; i < prefetch_.size(); ++i) {
+    prefetch_[i].reset(new Batch<Dtype>());
+    prefetch_free_.push(prefetch_[i].get());
   }
 }
 
@@ -46,22 +49,23 @@ template <typename Dtype>
 void BasePrefetchingDataLayer<Dtype>::LayerSetUp(
     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
   BaseDataLayer<Dtype>::LayerSetUp(bottom, top);
+
   // Before starting the prefetch thread, we make cpu_data and gpu_data
   // calls so that the prefetch thread does not accidentally make simultaneous
   // cudaMalloc calls when the main thread is running. In some GPUs this
   // seems to cause failures if we do not so.
-  for (int i = 0; i < PREFETCH_COUNT; ++i) {
-    prefetch_[i].data_.mutable_cpu_data();
+  for (int i = 0; i < prefetch_.size(); ++i) {
+    prefetch_[i]->data_.mutable_cpu_data();
     if (this->output_labels_) {
-      prefetch_[i].label_.mutable_cpu_data();
+      prefetch_[i]->label_.mutable_cpu_data();
     }
   }
 #ifndef CPU_ONLY
   if (Caffe::mode() == Caffe::GPU) {
-    for (int i = 0; i < PREFETCH_COUNT; ++i) {
-      prefetch_[i].data_.mutable_gpu_data();
+    for (int i = 0; i < prefetch_.size(); ++i) {
+      prefetch_[i]->data_.mutable_gpu_data();
       if (this->output_labels_) {
-        prefetch_[i].label_.mutable_gpu_data();
+        prefetch_[i]->label_.mutable_gpu_data();
       }
     }
   }
@@ -88,6 +92,9 @@ void BasePrefetchingDataLayer<Dtype>::InternalThreadEntry() {
 #ifndef CPU_ONLY
       if (Caffe::mode() == Caffe::GPU) {
         batch->data_.data().get()->async_gpu_push(stream);
+        if (this->output_labels_) {
+          batch->label_.data().get()->async_gpu_push(stream);
+        }
         CUDA_CHECK(cudaStreamSynchronize(stream));
       }
 #endif
@@ -106,22 +113,18 @@ void BasePrefetchingDataLayer<Dtype>::InternalThreadEntry() {
 template <typename Dtype>
 void BasePrefetchingDataLayer<Dtype>::Forward_cpu(
     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
-  Batch<Dtype>* batch = prefetch_full_.pop("Data layer prefetch queue empty");
+  if (prefetch_current_) {
+    prefetch_free_.push(prefetch_current_);
+  }
+  prefetch_current_ = prefetch_full_.pop("Waiting for data");
   // Reshape to loaded data.
-  top[0]->ReshapeLike(batch->data_);
-  // Copy the data
-  caffe_copy(batch->data_.count(), batch->data_.cpu_data(),
-             top[0]->mutable_cpu_data());
-  DLOG(INFO) << "Prefetch copied";
+  top[0]->ReshapeLike(prefetch_current_->data_);
+  top[0]->set_cpu_data(prefetch_current_->data_.mutable_cpu_data());
   if (this->output_labels_) {
     // Reshape to loaded labels.
-    top[1]->ReshapeLike(batch->label_);
-    // Copy the labels.
-    caffe_copy(batch->label_.count(), batch->label_.cpu_data(),
-        top[1]->mutable_cpu_data());
+    top[1]->ReshapeLike(prefetch_current_->label_);
+    top[1]->set_cpu_data(prefetch_current_->label_.mutable_cpu_data());
   }
-
-  prefetch_free_.push(batch);
 }
 
 #ifdef CPU_ONLY
diff --git a/src/caffe/layers/base_data_layer.cu b/src/caffe/layers/base_data_layer.cu
index 4056d36a7b4..64c621a74f1 100644
--- a/src/caffe/layers/base_data_layer.cu
+++ b/src/caffe/layers/base_data_layer.cu
@@ -7,23 +7,18 @@ namespace caffe {
 template <typename Dtype>
 void BasePrefetchingDataLayer<Dtype>::Forward_gpu(
     const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
-  Batch<Dtype>* batch = prefetch_full_.pop("Data layer prefetch queue empty");
+  if (prefetch_current_) {
+    prefetch_free_.push(prefetch_current_);
+  }
+  prefetch_current_ = prefetch_full_.pop("Waiting for data");
   // Reshape to loaded data.
-  top[0]->ReshapeLike(batch->data_);
-  // Copy the data
-  caffe_copy(batch->data_.count(), batch->data_.gpu_data(),
-      top[0]->mutable_gpu_data());
+  top[0]->ReshapeLike(prefetch_current_->data_);
+  top[0]->set_gpu_data(prefetch_current_->data_.mutable_gpu_data());
   if (this->output_labels_) {
     // Reshape to loaded labels.
-    top[1]->ReshapeLike(batch->label_);
-    // Copy the labels.
-    caffe_copy(batch->label_.count(), batch->label_.gpu_data(),
-        top[1]->mutable_gpu_data());
+    top[1]->ReshapeLike(prefetch_current_->label_);
+    top[1]->set_gpu_data(prefetch_current_->label_.mutable_gpu_data());
   }
-  // Ensure the copy is synchronous wrt the host, so that the next batch isn't
-  // copied in meanwhile.
-  CUDA_CHECK(cudaStreamSynchronize(cudaStreamDefault));
-  prefetch_free_.push(batch);
 }
 
 INSTANTIATE_LAYER_GPU_FORWARD(BasePrefetchingDataLayer);
diff --git a/src/caffe/layers/data_layer.cpp b/src/caffe/layers/data_layer.cpp
index 66e6301fd45..0f1296bbc77 100644
--- a/src/caffe/layers/data_layer.cpp
+++ b/src/caffe/layers/data_layer.cpp
@@ -14,7 +14,10 @@ namespace caffe {
 template <typename Dtype>
 DataLayer<Dtype>::DataLayer(const LayerParameter& param)
   : BasePrefetchingDataLayer<Dtype>(param),
-    reader_(param) {
+    offset_() {
+  db_.reset(db::GetDB(param.data_param().backend()));
+  db_->Open(param.data_param().source(), db::READ);
+  cursor_.reset(db_->NewCursor());
 }
 
 template <typename Dtype>
@@ -27,7 +30,8 @@ void DataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
   const int batch_size = this->layer_param_.data_param().batch_size();
   // Read a data point, and use it to initialize the top blob.
-  Datum& datum = *(reader_.full().peek());
+  Datum datum;
+  datum.ParseFromString(cursor_->value());
 
   // Use data_transformer to infer the expected blob shape from datum.
   vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);
@@ -35,22 +39,44 @@ void DataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
   // Reshape top[0] and prefetch_data according to the batch_size.
   top_shape[0] = batch_size;
   top[0]->Reshape(top_shape);
-  for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
-    this->prefetch_[i].data_.Reshape(top_shape);
+  for (int i = 0; i < this->prefetch_.size(); ++i) {
+    this->prefetch_[i]->data_.Reshape(top_shape);
   }
-  LOG(INFO) << "output data size: " << top[0]->num() << ","
+  LOG_IF(INFO, Caffe::root_solver())
+      << "output data size: " << top[0]->num() << ","
       << top[0]->channels() << "," << top[0]->height() << ","
       << top[0]->width();
   // label
   if (this->output_labels_) {
     vector<int> label_shape(1, batch_size);
     top[1]->Reshape(label_shape);
-    for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
-      this->prefetch_[i].label_.Reshape(label_shape);
+    for (int i = 0; i < this->prefetch_.size(); ++i) {
+      this->prefetch_[i]->label_.Reshape(label_shape);
     }
   }
 }
 
+template <typename Dtype>
+bool DataLayer<Dtype>::Skip() {
+  int size = Caffe::solver_count();
+  int rank = Caffe::solver_rank();
+  bool keep = (offset_ % size) == rank ||
+              // In test mode, only rank 0 runs, so avoid skipping
+              this->layer_param_.phase() == TEST;
+  return !keep;
+}
+
+template<typename Dtype>
+void DataLayer<Dtype>::Next() {
+  cursor_->Next();
+  if (!cursor_->valid()) {
+    LOG_IF(INFO, Caffe::root_solver())
+        << "Restarting data prefetching from start.";
+    cursor_->SeekToFirst();
+  }
+  offset_++;
+}
+
 // This function is called on prefetch thread
 template<typename Dtype>
 void DataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
@@ -61,41 +87,41 @@ void DataLayer<Dtype>::load_batch(Batch<Dtype>* batch) {
   CPUTimer timer;
   CHECK(batch->data_.count());
   CHECK(this->transformed_data_.count());
-
-  // Reshape according to the first datum of each batch
-  // on single input batches allows for inputs of varying dimension.
   const int batch_size = this->layer_param_.data_param().batch_size();
-  Datum& datum = *(reader_.full().peek());
-  // Use data_transformer to infer the expected blob shape from datum.
-  vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);
-  this->transformed_data_.Reshape(top_shape);
-  // Reshape batch according to the batch_size.
-  top_shape[0] = batch_size;
-  batch->data_.Reshape(top_shape);
-
-  Dtype* top_data = batch->data_.mutable_cpu_data();
-  Dtype* top_label = NULL;  // suppress warnings about uninitialized variables
 
-  if (this->output_labels_) {
-    top_label = batch->label_.mutable_cpu_data();
-  }
+  Datum datum;
   for (int item_id = 0; item_id < batch_size; ++item_id) {
     timer.Start();
-    // get a datum
-    Datum& datum = *(reader_.full().pop("Waiting for data"));
+    while (Skip()) {
+      Next();
+    }
+    datum.ParseFromString(cursor_->value());
     read_time += timer.MicroSeconds();
-    timer.Start();
+
+    if (item_id == 0) {
+      // Reshape according to the first datum of each batch
+      // on single input batches allows for inputs of varying dimension.
+      // Use data_transformer to infer the expected blob shape from datum.
+      vector<int> top_shape = this->data_transformer_->InferBlobShape(datum);
+      this->transformed_data_.Reshape(top_shape);
+      // Reshape batch according to the batch_size.
+      top_shape[0] = batch_size;
+      batch->data_.Reshape(top_shape);
+    }
+
     // Apply data transformations (mirror, scale, crop...)
+    timer.Start();
     int offset = batch->data_.offset(item_id);
+    Dtype* top_data = batch->data_.mutable_cpu_data();
     this->transformed_data_.set_cpu_data(top_data + offset);
     this->data_transformer_->Transform(datum, &(this->transformed_data_));
     // Copy label.
     if (this->output_labels_) {
+      Dtype* top_label = batch->label_.mutable_cpu_data();
       top_label[item_id] = datum.label();
     }
     trans_time += timer.MicroSeconds();
-
-    reader_.free().push(const_cast<Datum*>(&datum));
+    Next();
   }
   timer.Stop();
   batch_timer.Stop();
diff --git a/src/caffe/layers/hdf5_data_layer.cpp b/src/caffe/layers/hdf5_data_layer.cpp
index c957451ae1e..b9a071ceab3 100644
--- a/src/caffe/layers/hdf5_data_layer.cpp
+++ b/src/caffe/layers/hdf5_data_layer.cpp
@@ -124,28 +124,46 @@ void HDF5DataLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   }
 }
 
+template <typename Dtype>
+bool HDF5DataLayer<Dtype>::Skip() {
+  int size = Caffe::solver_count();
+  int rank = Caffe::solver_rank();
+  bool keep = (offset_ % size) == rank ||
+              // In test mode, only rank 0 runs, so avoid skipping
+              this->layer_param_.phase() == TEST;
+  return !keep;
+}
+
+template<typename Dtype>
+void HDF5DataLayer<Dtype>::Next() {
+  if (++current_row_ == hdf_blobs_[0]->shape(0)) {
+    if (num_files_ > 1) {
+      ++current_file_;
+      if (current_file_ == num_files_) {
+        current_file_ = 0;
+        if (this->layer_param_.hdf5_data_param().shuffle()) {
+          std::random_shuffle(file_permutation_.begin(),
+                              file_permutation_.end());
+        }
+        DLOG(INFO) << "Looping around to first file.";
+      }
+      LoadHDF5FileData(
+        hdf_filenames_[file_permutation_[current_file_]].c_str());
+    }
+    current_row_ = 0;
+    if (this->layer_param_.hdf5_data_param().shuffle())
+      std::random_shuffle(data_permutation_.begin(), data_permutation_.end());
+  }
+  offset_++;
+}
+
 template <typename Dtype>
 void HDF5DataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
   const int batch_size = this->layer_param_.hdf5_data_param().batch_size();
-  for (int i = 0; i < batch_size; ++i, ++current_row_) {
-    if (current_row_ == hdf_blobs_[0]->shape(0)) {
-      if (num_files_ > 1) {
-        ++current_file_;
-        if (current_file_ == num_files_) {
-          current_file_ = 0;
-          if (this->layer_param_.hdf5_data_param().shuffle()) {
-            std::random_shuffle(file_permutation_.begin(),
-                                file_permutation_.end());
-          }
-          DLOG(INFO) << "Looping around to first file.";
-        }
-        LoadHDF5FileData(
-            hdf_filenames_[file_permutation_[current_file_]].c_str());
-      }
-      current_row_ = 0;
-      if (this->layer_param_.hdf5_data_param().shuffle())
-        std::random_shuffle(data_permutation_.begin(), data_permutation_.end());
+  for (int i = 0; i < batch_size; ++i) {
+    while (Skip()) {
+      Next();
     }
     for (int j = 0; j < this->layer_param_.top_size(); ++j) {
       int data_dim = top[j]->count() / top[j]->shape(0);
@@ -153,6 +171,7 @@ void HDF5DataLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
           &hdf_blobs_[j]->cpu_data()[data_permutation_[current_row_]
             * data_dim], &top[j]->mutable_cpu_data()[i * data_dim]);
     }
+    Next();
   }
 }
 
diff --git a/src/caffe/layers/hdf5_data_layer.cu b/src/caffe/layers/hdf5_data_layer.cu
index 595d2230220..33eebd41dfc 100644
--- a/src/caffe/layers/hdf5_data_layer.cu
+++ b/src/caffe/layers/hdf5_data_layer.cu
@@ -17,24 +17,9 @@ template <typename Dtype>
 void HDF5DataLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
   const int batch_size = this->layer_param_.hdf5_data_param().batch_size();
-  for (int i = 0; i < batch_size; ++i, ++current_row_) {
-    if (current_row_ == hdf_blobs_[0]->shape(0)) {
-      if (num_files_ > 1) {
-        current_file_ += 1;
-        if (current_file_ == num_files_) {
-          current_file_ = 0;
-          if (this->layer_param_.hdf5_data_param().shuffle()) {
-            std::random_shuffle(file_permutation_.begin(),
-                                file_permutation_.end());
-          }
-          DLOG(INFO) << "Looping around to first file.";
-        }
-        LoadHDF5FileData(
-            hdf_filenames_[file_permutation_[current_file_]].c_str());
-      }
-      current_row_ = 0;
-      if (this->layer_param_.hdf5_data_param().shuffle())
-        std::random_shuffle(data_permutation_.begin(), data_permutation_.end());
+  for (int i = 0; i < batch_size; ++i) {
+    while (Skip()) {
+      Next();
     }
     for (int j = 0; j < this->layer_param_.top_size(); ++j) {
       int data_dim = top[j]->count() / top[j]->shape(0);
@@ -42,6 +27,7 @@ void HDF5DataLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
           &hdf_blobs_[j]->cpu_data()[data_permutation_[current_row_]
             * data_dim], &top[j]->mutable_gpu_data()[i * data_dim]);
     }
+    Next();
   }
 }
 
diff --git a/src/caffe/layers/image_data_layer.cpp b/src/caffe/layers/image_data_layer.cpp
index 7ee7dc40714..ec0fc5b0383 100644
--- a/src/caffe/layers/image_data_layer.cpp
+++ b/src/caffe/layers/image_data_layer.cpp
@@ -54,6 +54,11 @@ void ImageDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
     const unsigned int prefetch_rng_seed = caffe_rng_rand();
     prefetch_rng_.reset(new Caffe::RNG(prefetch_rng_seed));
     ShuffleImages();
+  } else {
+    if (this->phase_ == TRAIN && Caffe::solver_rank() > 0 &&
+        this->layer_param_.image_data_param().rand_skip() == 0) {
+      LOG(WARNING) << "Shuffling or skipping recommended for multi-GPU";
+    }
   }
   LOG(INFO) << "A total of " << lines_.size() << " images.";
 
@@ -77,8 +82,8 @@ void ImageDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
   const int batch_size = this->layer_param_.image_data_param().batch_size();
   CHECK_GT(batch_size, 0) << "Positive batch size required";
   top_shape[0] = batch_size;
-  for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
-    this->prefetch_[i].data_.Reshape(top_shape);
+  for (int i = 0; i < this->prefetch_.size(); ++i) {
+    this->prefetch_[i]->data_.Reshape(top_shape);
   }
   top[0]->Reshape(top_shape);
 
@@ -88,8 +93,8 @@ void ImageDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
   // label
   vector<int> label_shape(1, batch_size);
   top[1]->Reshape(label_shape);
-  for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
-    this->prefetch_[i].label_.Reshape(label_shape);
+  for (int i = 0; i < this->prefetch_.size(); ++i) {
+    this->prefetch_[i]->label_.Reshape(label_shape);
   }
 }
 
diff --git a/src/caffe/layers/window_data_layer.cpp b/src/caffe/layers/window_data_layer.cpp
index 103dd4b6af8..1bf3760e9fd 100644
--- a/src/caffe/layers/window_data_layer.cpp
+++ b/src/caffe/layers/window_data_layer.cpp
@@ -173,8 +173,8 @@ void WindowDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
   CHECK_GT(crop_size, 0);
   const int batch_size = this->layer_param_.window_data_param().batch_size();
   top[0]->Reshape(batch_size, channels, crop_size, crop_size);
-  for (int i = 0; i < this->PREFETCH_COUNT; ++i)
-    this->prefetch_[i].data_.Reshape(
+  for (int i = 0; i < this->prefetch_.size(); ++i)
+    this->prefetch_[i]->data_.Reshape(
         batch_size, channels, crop_size, crop_size);
 
   LOG(INFO) << "output data size: " << top[0]->num() << ","
@@ -183,8 +183,8 @@ void WindowDataLayer<Dtype>::DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
   // label
   vector<int> label_shape(1, batch_size);
   top[1]->Reshape(label_shape);
-  for (int i = 0; i < this->PREFETCH_COUNT; ++i) {
-    this->prefetch_[i].label_.Reshape(label_shape);
+  for (int i = 0; i < this->prefetch_.size(); ++i) {
+    this->prefetch_[i]->label_.Reshape(label_shape);
   }
 
   // data mean
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index 644cb7e97ee..aa9e8f2f386 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -22,16 +22,13 @@
 namespace caffe {
 
 template <typename Dtype>
-Net<Dtype>::Net(const NetParameter& param, const Net* root_net)
-    : root_net_(root_net) {
+Net<Dtype>::Net(const NetParameter& param) {
   Init(param);
 }
 
 template <typename Dtype>
 Net<Dtype>::Net(const string& param_file, Phase phase,
-    const int level, const vector<string>* stages,
-    const Net* root_net)
-    : root_net_(root_net) {
+    const int level, const vector<string>* stages) {
   NetParameter param;
   ReadNetParamsFromTextFileOrDie(param_file, &param);
   // Set phase, stages and level
@@ -47,8 +44,6 @@ Net<Dtype>::Net(const string& param_file, Phase phase,
 
 template <typename Dtype>
 void Net<Dtype>::Init(const NetParameter& in_param) {
-  CHECK(Caffe::root_solver() || root_net_)
-      << "root_net_ needs to be set for all non-root solvers";
   // Set phase from the state.
   phase_ = in_param.state().phase();
   // Filter layers based on their include/exclude rules and
@@ -74,9 +69,6 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
   top_id_vecs_.resize(param.layer_size());
   bottom_need_backward_.resize(param.layer_size());
   for (int layer_id = 0; layer_id < param.layer_size(); ++layer_id) {
-    // For non-root solvers, whether this layer is shared from root_net_.
-    bool share_from_root = !Caffe::root_solver()
-        && root_net_->layers_[layer_id]->ShareInParallel();
     // Inherit phase from net if unset.
     if (!param.layer(layer_id).has_phase()) {
       param.mutable_layer(layer_id)->set_phase(phase_);
@@ -89,13 +81,7 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
           << "propagate_down param must be specified "
           << "either 0 or bottom_size times ";
     }
-    if (share_from_root) {
-      LOG(INFO) << "Sharing layer " << layer_param.name() << " from root net";
-      layers_.push_back(root_net_->layers_[layer_id]);
-      layers_[layer_id]->SetShared(true);
-    } else {
-      layers_.push_back(LayerRegistry<Dtype>::CreateLayer(layer_param));
-    }
+    layers_.push_back(LayerRegistry<Dtype>::CreateLayer(layer_param));
     layer_names_.push_back(layer_param.name());
     LOG_IF(INFO, Caffe::root_solver())
         << "Creating Layer " << layer_param.name();
@@ -134,19 +120,7 @@ void Net<Dtype>::Init(const NetParameter& in_param) {
       }
     }
     // After this layer is connected, set it up.
-    if (share_from_root) {
-      // Set up size of top blobs using root_net_
-      const vector<Blob<Dtype>*>& base_top = root_net_->top_vecs_[layer_id];
-      const vector<Blob<Dtype>*>& this_top = this->top_vecs_[layer_id];
-      for (int top_id = 0; top_id < base_top.size(); ++top_id) {
-        this_top[top_id]->ReshapeLike(*base_top[top_id]);
-        LOG(INFO) << "Created top blob " << top_id << " (shape: "
-            << this_top[top_id]->shape_string() <<  ") for shared layer "
-            << layer_param.name();
-      }
-    } else {
-      layers_[layer_id]->SetUp(bottom_vecs_[layer_id], top_vecs_[layer_id]);
-    }
+    layers_[layer_id]->SetUp(bottom_vecs_[layer_id], top_vecs_[layer_id]);
     LOG_IF(INFO, Caffe::root_solver())
         << "Setting up " << layer_names_[layer_id];
     for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) {
@@ -546,10 +520,15 @@ Dtype Net<Dtype>::ForwardFromTo(int start, int end) {
   CHECK_LT(end, layers_.size());
   Dtype loss = 0;
   for (int i = start; i <= end; ++i) {
-    // LOG(ERROR) << "Forwarding " << layer_names_[i];
+    for (int c = 0; c < before_forward_.size(); ++c) {
+      before_forward_[c]->run(i);
+    }
     Dtype layer_loss = layers_[i]->Forward(bottom_vecs_[i], top_vecs_[i]);
     loss += layer_loss;
     if (debug_info_) { ForwardDebugInfo(i); }
+    for (int c = 0; c < after_forward_.size(); ++c) {
+      after_forward_[c]->run(i);
+    }
   }
   return loss;
 }
@@ -591,11 +570,17 @@ void Net<Dtype>::BackwardFromTo(int start, int end) {
   CHECK_GE(end, 0);
   CHECK_LT(start, layers_.size());
   for (int i = start; i >= end; --i) {
+    for (int c = 0; c < before_backward_.size(); ++c) {
+      before_backward_[c]->run(i);
+    }
     if (layer_need_backward_[i]) {
       layers_[i]->Backward(
           top_vecs_[i], bottom_need_backward_[i], bottom_vecs_[i]);
       if (debug_info_) { BackwardDebugInfo(i); }
     }
+    for (int c = 0; c < after_backward_.size(); ++c) {
+      after_backward_[c]->run(i);
+    }
   }
 }
 
diff --git a/src/caffe/parallel.cpp b/src/caffe/parallel.cpp
index 5bc41c6a6e5..d9433917d25 100644
--- a/src/caffe/parallel.cpp
+++ b/src/caffe/parallel.cpp
@@ -1,16 +1,15 @@
-#ifndef CPU_ONLY
+#ifdef USE_NCCL
+
 #include <cuda_runtime.h>
-#endif
 #include <glog/logging.h>
 #include <stdio.h>
-
 #include <sstream>
 #include <string>
 #include <vector>
 
-#include "boost/thread.hpp"
 #include "caffe/caffe.hpp"
 #include "caffe/parallel.hpp"
+#include "caffe/sgd_solvers.hpp"
 
 namespace caffe {
 
@@ -68,15 +67,14 @@ static size_t total_size(const vector<Blob<Dtype>*>& params) {
 
 template<typename Dtype>
 Params<Dtype>::Params(shared_ptr<Solver<Dtype> > root_solver)
-    : size_(total_size<Dtype>(root_solver->net()->learnable_params())),
-      data_(),
-      diff_() {
+  : size_(total_size<Dtype>(root_solver->net()->learnable_params())),
+    data_(),
+    diff_() {
 }
 
 template<typename Dtype>
 GPUParams<Dtype>::GPUParams(shared_ptr<Solver<Dtype> > root_solver, int device)
-    : Params<Dtype>(root_solver) {
-#ifndef CPU_ONLY
+  : Params<Dtype>(root_solver) {
   int initial_device;
   CUDA_CHECK(cudaGetDevice(&initial_device));
 
@@ -86,358 +84,288 @@ GPUParams<Dtype>::GPUParams(shared_ptr<Solver<Dtype> > root_solver, int device)
 
   // Copy blob values
   const vector<Blob<Dtype>*>& net =
-      root_solver->net()->learnable_params();
+    root_solver->net()->learnable_params();
   apply_buffers(net, data_, size_, copy);
 
   CUDA_CHECK(cudaMalloc(&diff_, size_ * sizeof(Dtype)));
   caffe_gpu_set(size_, Dtype(0), diff_);
 
   CUDA_CHECK(cudaSetDevice(initial_device));
-#else
-  NO_GPU;
-#endif
 }
 
 template<typename Dtype>
 GPUParams<Dtype>::~GPUParams() {
-#ifndef CPU_ONLY
   CUDA_CHECK(cudaFree(data_));
   CUDA_CHECK(cudaFree(diff_));
-#endif
 }
 
 template<typename Dtype>
-void GPUParams<Dtype>::configure(Solver<Dtype>* solver) const {
+void GPUParams<Dtype>::Configure(Solver<Dtype>* solver) const {
   const vector<Blob<Dtype>*>& net =
-      solver->net()->learnable_params();
+    solver->net()->learnable_params();
   apply_buffers(net, data_, size_, replace_gpu);
   apply_buffers(net, diff_, size_, replace_gpu_diff);
 }
 
-void DevicePair::compute(const vector<int> devices, vector<DevicePair>* pairs) {
-#ifndef CPU_ONLY
-  vector<int> remaining(devices);
-
-  // Depth for reduction tree
-  int remaining_depth = static_cast<int>(ceil(log2(remaining.size())));
-
-  // Group GPUs by board
-  for (int d = 0; d < remaining_depth; ++d) {
-    for (int i = 0; i < remaining.size(); ++i) {
-      for (int j = i + 1; j < remaining.size(); ++j) {
-        cudaDeviceProp a, b;
-        CUDA_CHECK(cudaGetDeviceProperties(&a, remaining[i]));
-        CUDA_CHECK(cudaGetDeviceProperties(&b, remaining[j]));
-        if (a.isMultiGpuBoard && b.isMultiGpuBoard) {
-          if (a.multiGpuBoardGroupID == b.multiGpuBoardGroupID) {
-            pairs->push_back(DevicePair(remaining[i], remaining[j]));
-            DLOG(INFO) << "GPU board: " << remaining[i] << ":" << remaining[j];
-            remaining.erase(remaining.begin() + j);
-            break;
-          }
-        }
-      }
-    }
-  }
-  ostringstream s;
-  for (int i = 0; i < remaining.size(); ++i) {
-    s << (i ? ", " : "") << remaining[i];
-  }
-  DLOG(INFO) << "GPUs paired by boards, remaining: " << s.str();
-
-  // Group by P2P accessibility
-  remaining_depth = ceil(log2(remaining.size()));
-  for (int d = 0; d < remaining_depth; ++d) {
-    for (int i = 0; i < remaining.size(); ++i) {
-      for (int j = i + 1; j < remaining.size(); ++j) {
-        int access;
-        CUDA_CHECK(
-            cudaDeviceCanAccessPeer(&access, remaining[i], remaining[j]));
-        if (access) {
-          pairs->push_back(DevicePair(remaining[i], remaining[j]));
-          DLOG(INFO) << "P2P pair: " << remaining[i] << ":" << remaining[j];
-          remaining.erase(remaining.begin() + j);
-          break;
-        }
-      }
-    }
-  }
-  s.str("");
-  for (int i = 0; i < remaining.size(); ++i) {
-    s << (i ? ", " : "") << remaining[i];
-  }
-  DLOG(INFO) << "GPUs paired by P2P access, remaining: " << s.str();
-
-  // Group remaining
-  remaining_depth = ceil(log2(remaining.size()));
-  for (int d = 0; d < remaining_depth; ++d) {
-    for (int i = 0; i < remaining.size(); ++i) {
-      pairs->push_back(DevicePair(remaining[i], remaining[i + 1]));
-      DLOG(INFO) << "Remaining pair: " << remaining[i] << ":"
-                 << remaining[i + 1];
-      remaining.erase(remaining.begin() + i + 1);
-    }
-  }
+static int getDevice() {
+  int device = 0;
+  CUDA_CHECK(cudaGetDevice(&device));
+  return device;
+}
 
-  // Should only be the parent node remaining
-  CHECK_EQ(remaining.size(), 1);
+template<typename Dtype>
+NCCL<Dtype>::NCCL(shared_ptr<Solver<Dtype> > solver)
+  : GPUParams<Dtype>(solver, getDevice()),
+    comm_(), solver_(solver), barrier_() {
+  this->Configure(solver.get());
+  Init();
+}
 
-  pairs->insert(pairs->begin(), DevicePair(-1, remaining[0]));
+template<typename Dtype>
+NCCL<Dtype>::NCCL(shared_ptr<Solver<Dtype> > solver, const string& uid)
+  : GPUParams<Dtype>(solver, getDevice()),
+    solver_(solver), barrier_() {
+  this->Configure(solver.get());
+  Caffe::set_multiprocess(true);
+  ncclUniqueId nccl_uid;
+  memcpy(&nccl_uid, &uid[0], NCCL_UNIQUE_ID_BYTES);  // NOLINT(caffe/alt_fn)
+  NCCL_CHECK(ncclCommInitRank(&comm_,
+                              Caffe::solver_count(),
+                              nccl_uid,
+                              Caffe::solver_rank()));
+  Init();
+}
 
-  CHECK(pairs->size() == devices.size());
-  for (int i = 0; i < pairs->size(); ++i) {
-    CHECK((*pairs)[i].parent() != (*pairs)[i].device());
-    for (int j = i + 1; j < pairs->size(); ++j) {
-      CHECK((*pairs)[i].device() != (*pairs)[j].device());
-    }
+template<typename Dtype>
+void NCCL<Dtype>::Init() {
+  if (solver_->param().layer_wise_reduce()) {
+    CUDA_CHECK(cudaStreamCreateWithFlags(&stream_, cudaStreamNonBlocking));
   }
-#else
-  NO_GPU;
-#endif
 }
 
-//
-
 template<typename Dtype>
-P2PSync<Dtype>::P2PSync(shared_ptr<Solver<Dtype> > root_solver,
-                        P2PSync<Dtype>* parent, const SolverParameter& param)
-    : GPUParams<Dtype>(root_solver, param.device_id()),
-      parent_(parent),
-      children_(),
-      queue_(),
-      initial_iter_(root_solver->iter()),
-      solver_() {
-#ifndef CPU_ONLY
-  int initial_device;
-  CUDA_CHECK(cudaGetDevice(&initial_device));
-  const int self = param.device_id();
-  CUDA_CHECK(cudaSetDevice(self));
-
-  if (parent == NULL) {
-    solver_ = root_solver;
-  } else {
-    Caffe::set_root_solver(false);
-    solver_.reset(new WorkerSolver<Dtype>(param, root_solver.get()));
-    Caffe::set_root_solver(true);
+NCCL<Dtype>::~NCCL() {
+  if (solver_->param().layer_wise_reduce()) {
+    CUDA_CHECK(cudaStreamDestroy(stream_));
   }
-  this->configure(solver_.get());
-  solver_->add_callback(this);
-
-  if (parent) {
-    // Enable p2p access between devices
-    const int peer = parent->solver_->param().device_id();
-    int access;
-    CUDA_CHECK(cudaDeviceCanAccessPeer(&access, self, peer));
-    if (access) {
-      CUDA_CHECK(cudaDeviceEnablePeerAccess(peer, 0));
-    } else {
-      LOG(INFO)<< "GPU " << self << " does not have p2p access to GPU " << peer;
-    }
-    // Allocate receiving buffer on parent
-    CUDA_CHECK(cudaSetDevice(peer));
-    CUDA_CHECK(cudaMalloc(&parent_grads_, size_ * sizeof(Dtype)));
-    CUDA_CHECK(cudaSetDevice(self));
+  if (comm_) {
+    ncclCommDestroy(comm_);
   }
-
-  CUDA_CHECK(cudaSetDevice(initial_device));
-#else
-  NO_GPU;
-#endif
 }
 
 template<typename Dtype>
-P2PSync<Dtype>::~P2PSync() {
-#ifndef CPU_ONLY
-  int initial_device;
-  CUDA_CHECK(cudaGetDevice(&initial_device));
-  const int self = solver_->param().device_id();
-  CUDA_CHECK(cudaSetDevice(self));
-
-  if (parent_) {
-    CUDA_CHECK(cudaFree(parent_grads_));
-    const int peer = parent_->solver_->param().device_id();
-    int access;
-    CUDA_CHECK(cudaDeviceCanAccessPeer(&access, self, peer));
-    if (access) {
-      CUDA_CHECK(cudaDeviceDisablePeerAccess(peer));
-    }
-  }
-
-  CUDA_CHECK(cudaSetDevice(initial_device));
-#endif
+boost::barrier* NCCL<Dtype>::barrier() {
+  return barrier_;
+}
+template<typename Dtype>
+void NCCL<Dtype>::set_barrier(boost::barrier* value) {
+  barrier_ = value;
 }
 
 template<typename Dtype>
-void P2PSync<Dtype>::InternalThreadEntry() {
-  Caffe::SetDevice(solver_->param().device_id());
-  CHECK(Caffe::root_solver());
-  Caffe::set_root_solver(false);
-  // See if there is a defined seed and reset random state if so
-  if (solver_->param().random_seed() >= 0) {
-    // Fetch random seed and modulate by device ID to make sure
-    // everyone doesn't have the same seed.  We seem to have some
-    // solver instability if we have everyone with the same seed
-    Caffe::set_random_seed(
-        solver_->param().random_seed() + solver_->param().device_id());
+void NCCL<Dtype>::InitSingleProcess(vector<NCCL<Dtype>*>* nccls) {
+  ncclComm_t* comms = new ncclComm_t[nccls->size()];
+  int* gpu_list = new int[nccls->size()];
+  for (int i = 0; i < nccls->size(); ++i) {
+    gpu_list[i] = (*nccls)[i]->solver_->param().device_id();
+  }
+  NCCL_CHECK(ncclCommInitAll(comms, static_cast<int>(nccls->size()), gpu_list));
+  for (int i = 0; i < nccls->size(); ++i) {
+    (*nccls)[i]->comm_ = comms[i];
   }
-  solver_->Step(solver_->param().max_iter() - initial_iter_);
 }
 
 template<typename Dtype>
-void P2PSync<Dtype>::on_start() {
-#ifndef CPU_ONLY
-#ifdef DEBUG
-  int device;
-  CUDA_CHECK(cudaGetDevice(&device));
-  CHECK(device == solver_->param().device_id());
-#else
-//  CHECK(false);
-#endif
+string NCCL<Dtype>::new_uid() {
+  string uid;
+  uid.resize(NCCL_UNIQUE_ID_BYTES);
+  ncclUniqueId nccl_uid;
+  NCCL_CHECK(ncclGetUniqueId(&nccl_uid));
+  memcpy(&uid[0], &nccl_uid, NCCL_UNIQUE_ID_BYTES);  // NOLINT(caffe/alt_fn)
+  return uid;
+}
 
-  // Wait for update from parent
-  if (parent_) {
-    P2PSync<Dtype> *parent = queue_.pop();
-    CHECK(parent == parent_);
+template<typename Dtype>
+void NCCL<Dtype>::Broadcast() {
+  if (barrier_) {  // NULL in multi process case
+    barrier_->wait();
   }
-
-  // Update children
-  for (int i = children_.size() - 1; i >= 0; i--) {
-    Dtype* src = data_;
-    Dtype* dst = children_[i]->data_;
-
-#ifdef DEBUG
-    cudaPointerAttributes attributes;
-    CUDA_CHECK(cudaPointerGetAttributes(&attributes, src));
-    CHECK(attributes.device == device);
-    CUDA_CHECK(cudaPointerGetAttributes(&attributes, dst));
-    CHECK(attributes.device == children_[i]->solver_->param().device_id());
-#endif
-
-    CUDA_CHECK(cudaMemcpyAsync(dst, src, size_ * sizeof(Dtype),
-        cudaMemcpyDeviceToDevice, cudaStreamDefault));
-    CUDA_CHECK(cudaStreamSynchronize(cudaStreamDefault));
-    children_[i]->queue_.push(this);
+  NCCL_CHECK(ncclBcast(data_, static_cast<int>(size_),
+                       nccl::dataType<Dtype>::type, 0,
+                       comm_, cudaStreamDefault));
+  if (barrier_) {
+    barrier_->wait();
   }
-#endif
 }
 
 template<typename Dtype>
-void P2PSync<Dtype>::on_gradients_ready() {
-#ifndef CPU_ONLY
+void NCCL<Dtype>::run(int layer) {
+  CHECK(solver_->param().layer_wise_reduce());
+  vector<shared_ptr<Blob<Dtype> > >& blobs =
+    solver_->net()->layers()[layer]->blobs();
 #ifdef DEBUG
-  int device;
-  CUDA_CHECK(cudaGetDevice(&device));
-  CHECK(device == solver_->param().device_id());
+  // Assert blobs are contiguous to reduce in one step (e.g. bias often small)
+  for (int i = 1; i < blobs.size(); ++i) {
+    CHECK_EQ(blobs[i - 1]->gpu_diff() + blobs[i - 1]->count(),
+             blobs[i + 0]->gpu_diff());
+  }
 #endif
+  if (blobs.size() > 0) {
+    // Make sure default stream is done computing gradients. Could be
+    // replaced by cudaEventRecord+cudaStreamWaitEvent to avoid
+    // blocking the default stream, but it's actually slower.
+    CUDA_CHECK(cudaStreamSynchronize(cudaStreamDefault));
 
-  // Sum children gradients as they appear in the queue
-  for (int i = 0; i < children_.size(); ++i) {
-    P2PSync<Dtype> *child = queue_.pop();
-    Dtype* src = child->parent_grads_;
-    Dtype* dst = diff_;
-
-#ifdef DEBUG
-    bool ok = false;
-    for (int j = 0; j < children_.size(); ++j) {
-      if (child == children_[j]) {
-        ok = true;
-      }
+    // Reduce asynchronously
+    int size = 0;
+    for (int i = 0; i < blobs.size(); ++i) {
+      size += blobs[i]->count();
     }
-    CHECK(ok);
-    cudaPointerAttributes attributes;
-    CUDA_CHECK(cudaPointerGetAttributes(&attributes, src));
-    CHECK(attributes.device == device);
-    CUDA_CHECK(cudaPointerGetAttributes(&attributes, dst));
-    CHECK(attributes.device == device);
-#endif
-
-    caffe_gpu_add(size_, src, dst, dst);
+    if (barrier_) {  // NULL in multi process case
+      barrier_->wait();
+    }
+    NCCL_CHECK(ncclAllReduce(blobs[0]->mutable_gpu_diff(),
+                             blobs[0]->mutable_gpu_diff(),
+                             size,
+                             nccl::dataType<Dtype>::type,
+                             ncclSum, comm_, stream_));
+    caffe_gpu_scal(size, (Dtype) 1.0 / Caffe::solver_count(),
+                   blobs[0]->mutable_gpu_diff(), stream_);
   }
+}
 
-  // Send gradients to parent
-  if (parent_) {
-    Dtype* src = diff_;
-    Dtype* dst = parent_grads_;
-
-#ifdef DEBUG
-    cudaPointerAttributes attributes;
-    CUDA_CHECK(cudaPointerGetAttributes(&attributes, src));
-    CHECK(attributes.device == device);
-    CUDA_CHECK(cudaPointerGetAttributes(&attributes, dst));
-    CHECK(attributes.device == parent_->solver_->param().device_id());
-#endif
-
-    CUDA_CHECK(cudaMemcpyAsync(dst, src, size_ * sizeof(Dtype),  //
-        cudaMemcpyDeviceToDevice, cudaStreamDefault));
-    CUDA_CHECK(cudaStreamSynchronize(cudaStreamDefault));
-    parent_->queue_.push(this);
+template<typename Dtype>
+void NCCL<Dtype>::on_gradients_ready() {
+  if (solver_->param().layer_wise_reduce()) {
+    CHECK_EQ(solver_->net()->params().size(),
+             solver_->net()->learnable_params().size())
+      << "Layer-wise reduce is not supported for nets with shared weights.";
+
+    // Make sure reduction is done before applying gradients
+    CUDA_CHECK(cudaStreamSynchronize(stream_));
   } else {
-    // Loss functions divide gradients by the batch size, so to compensate
-    // for split batch, the root solver divides by number of solvers.
-    caffe_gpu_scal(size_, Dtype(1.0 / Caffe::solver_count()), diff_);
+    if (barrier_) {  // NULL in multi process case
+      barrier_->wait();
+    }
+    NCCL_CHECK(ncclAllReduce(diff_, diff_, static_cast<int>(size_),
+                             nccl::dataType<Dtype>::type, ncclSum, comm_,
+                             cudaStreamDefault));
+    caffe_gpu_scal(static_cast<int>(size_),
+                   (Dtype) 1.0 / Caffe::solver_count(), diff_);
   }
-#endif
 }
 
 template<typename Dtype>
-void P2PSync<Dtype>::Prepare(const vector<int>& gpus,
-            vector<shared_ptr<P2PSync<Dtype> > >* syncs) {
-  // Pair devices for map-reduce synchronization
-  vector<DevicePair> pairs;
-  DevicePair::compute(gpus, &pairs);
-  ostringstream s;
-  for (int i = 1; i < pairs.size(); ++i) {
-    s << (i == 1 ? "" : ", ") << pairs[i].parent() << ":" << pairs[i].device();
+class Worker : public InternalThread {
+ public:
+  explicit Worker(shared_ptr<Solver<Dtype> > rank0, int device,
+                  boost::barrier* barrier, vector<NCCL<Dtype>*>* nccls,
+                  const char* restore)
+    : rank0_(rank0), device_(device), barrier_(barrier),
+      nccls_(nccls), restore_(restore) {
   }
-  LOG(INFO)<< "GPUs pairs " << s.str();
-
-  SolverParameter param(solver_->param());
-
-  // Build the GPU tree by finding the parent for each solver
-  for (int attempts = 0; attempts < pairs.size(); ++attempts) {
-    for (int i = 1; i < pairs.size(); ++i) {
-      if (!syncs->at(i).get()) {
-        P2PSync<Dtype>* parent = NULL;
-        for (int j = 0; j < syncs->size(); ++j) {
-          P2PSync<Dtype>* sync = j == 0 ? this : syncs->at(j).get();
-          if (sync) {
-            const SolverParameter& p = sync->solver()->param();
-            if (p.device_id() == pairs[i].parent()) {
-              parent = sync;
-            }
-          }
-        }
-        if (parent) {
-          param.set_device_id(pairs[i].device());
-          syncs->at(i).reset(new P2PSync<Dtype>(solver_, parent, param));
-          parent->children_.push_back((P2PSync<Dtype>*) syncs->at(i).get());
-        }
+  virtual ~Worker() {}
+
+ protected:
+  void InternalThreadEntry() {
+    // Create solver and install callbacks
+    SolverParameter param(rank0_->param());
+    param.set_device_id(device_);
+#ifdef DEBUG
+    int device;
+    CUDA_CHECK(cudaGetDevice(&device));
+    CHECK_EQ(device, device_);
+#endif
+    param.set_type(rank0_->type());
+    shared_ptr<Solver<Dtype> > s(SolverRegistry<Dtype>::CreateSolver(param));
+    CHECK_EQ(s->type(), rank0_->type());
+    if (restore_) {
+      // Could not make NCCL broadcast solver state, it seems to crash
+      // if called in a tight loop, regardless of barriers etc. so
+      // restore all solvers from file.
+      s->Restore(restore_);
+    }
+    NCCL<Dtype> nccl(s);
+    nccl.set_barrier(barrier_);
+    s->add_callback(&nccl);
+    if (s->param().layer_wise_reduce()) {
+      s->net()->add_after_backward(&nccl);
+    }
+    (*nccls_)[Caffe::solver_rank()] = &nccl;
+    // Wait for other threads
+    barrier_->wait();
+    // Wait for NCCL init
+    barrier_->wait();
+    // Broadcast rank 0 state
+    nccl.Broadcast();
+    // Solve
+    s->Step(param.max_iter() - s->iter());
+    barrier_->wait();
+#ifdef DEBUG
+    // Check all solvers have same state
+    SGDSolver<Dtype>* sa = static_cast<SGDSolver<Dtype>*>(rank0_.get());
+    SGDSolver<Dtype>* sb = static_cast<SGDSolver<Dtype>*>(s.get());
+    for (int h = 0; h < sa->history().size(); ++h) {
+      CUDA_CHECK(cudaSetDevice(sa->param().device_id()));
+      const Dtype* a = sa->history()[h]->cpu_data();
+      CUDA_CHECK(cudaSetDevice(sb->param().device_id()));
+      const Dtype* b = sb->history()[h]->cpu_data();
+      for (int v = 0; v < sa->history()[h]->count(); ++v) {
+        CHECK_DOUBLE_EQ(a[v], b[v]);
       }
     }
+#endif
   }
-}
-
-template<typename Dtype>
-void P2PSync<Dtype>::Run(const vector<int>& gpus) {
-  vector<shared_ptr<P2PSync<Dtype> > > syncs(gpus.size());
-  Prepare(gpus, &syncs);
 
-  LOG(INFO)<< "Starting Optimization";
+  shared_ptr<Solver<Dtype> > rank0_;
+  int device_;
+  boost::barrier* barrier_;
+  vector<NCCL<Dtype>*>* nccls_;
+  const char* restore_;
+};
 
-  for (int i = 1; i < syncs.size(); ++i) {
-    syncs[i]->StartInternalThread();
+template<typename Dtype>
+void NCCL<Dtype>::Run(const vector<int>& gpus, const char* restore) {
+  boost::barrier barrier(static_cast<int>(gpus.size()));
+  vector<NCCL<Dtype>*> nccls(gpus.size());
+  // Create workers
+  vector<shared_ptr<Worker<Dtype> > > workers(gpus.size());
+  for (int i = 1; i < gpus.size(); ++i) {
+    CUDA_CHECK(cudaSetDevice(gpus[i]));
+    Caffe::set_solver_rank(i);
+    Worker<Dtype>* w = new Worker<Dtype>(solver_, gpus[i], &barrier,
+                                         &nccls, restore);
+    w->StartInternalThread();
+    workers[i].reset(w);
   }
-
-  // Run root solver on current thread
+  CUDA_CHECK(cudaSetDevice(gpus[0]));
+  Caffe::set_solver_rank(0);
+  barrier_ = &barrier;
+  solver_->add_callback(this);
+  if (solver_->param().layer_wise_reduce()) {
+    solver_->net()->add_after_backward(this);
+  }
+  nccls[0] = this;
+  // Wait for workers
+  barrier.wait();
+  // Init NCCL
+  InitSingleProcess(&nccls);
+  barrier.wait();
+  // Run first solver on current thread
+  Broadcast();
   solver_->Solve();
-
-  for (int i = 1; i < syncs.size(); ++i) {
-    syncs[i]->StopInternalThread();
+  barrier.wait();  // Hangs without it when running tests
+  // Wait for shutdown
+  for (int i = 1; i < gpus.size(); ++i) {
+    workers[i]->StopInternalThread();
   }
 }
 
 INSTANTIATE_CLASS(Params);
 INSTANTIATE_CLASS(GPUParams);
-INSTANTIATE_CLASS(P2PSync);
+INSTANTIATE_CLASS(Worker);
+INSTANTIATE_CLASS(NCCL);
 
 }  // namespace caffe
+
+#endif  // USE_NCCL
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 430a0dea109..1c85f69698f 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -98,7 +98,7 @@ message NetParameter {
 // NOTE
 // Update the next available ID when you add a new SolverParameter field.
 //
-// SolverParameter next available ID: 41 (last added: type)
+// SolverParameter next available ID: 42 (last added: layer_wise_reduce)
 message SolverParameter {
   //////////////////////////////////////////////////////////////////////////////
   // Specifying the train and test networks
@@ -239,6 +239,9 @@ message SolverParameter {
   }
   // DEPRECATED: use type instead of solver_type
   optional SolverType solver_type = 30 [default = SGD];
+
+  // Overlap compute and communication for data parallel training
+  optional bool layer_wise_reduce = 41 [default = true];
 }
 
 // A message that stores the solver snapshots
@@ -655,8 +658,8 @@ message DataParameter {
   optional bool mirror = 6 [default = false];
   // Force the encoded image to have 3 color channels
   optional bool force_encoded_color = 9 [default = false];
-  // Prefetch queue (Number of batches to prefetch to host memory, increase if
-  // data access bandwidth varies).
+  // Prefetch queue (Increase if data feeding bandwidth varies, within the
+  // limit of device memory for GPU training)
   optional uint32 prefetch = 10 [default = 4];
 }
 
diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp
index ece3913e88a..1c1a9e59565 100644
--- a/src/caffe/solver.cpp
+++ b/src/caffe/solver.cpp
@@ -26,16 +26,14 @@ SolverAction::Enum Solver<Dtype>::GetRequestedAction() {
 }
 
 template <typename Dtype>
-Solver<Dtype>::Solver(const SolverParameter& param, const Solver* root_solver)
-    : net_(), callbacks_(), root_solver_(root_solver),
-      requested_early_exit_(false) {
+Solver<Dtype>::Solver(const SolverParameter& param)
+    : net_(), callbacks_(), requested_early_exit_(false) {
   Init(param);
 }
 
 template <typename Dtype>
-Solver<Dtype>::Solver(const string& param_file, const Solver* root_solver)
-    : net_(), callbacks_(), root_solver_(root_solver),
-      requested_early_exit_(false) {
+Solver<Dtype>::Solver(const string& param_file)
+    : net_(), callbacks_(), requested_early_exit_(false) {
   SolverParameter param;
   ReadSolverParamsFromTextFileOrDie(param_file, &param);
   Init(param);
@@ -43,15 +41,13 @@ Solver<Dtype>::Solver(const string& param_file, const Solver* root_solver)
 
 template <typename Dtype>
 void Solver<Dtype>::Init(const SolverParameter& param) {
-  CHECK(Caffe::root_solver() || root_solver_)
-      << "root_solver_ needs to be set for all non-root solvers";
   LOG_IF(INFO, Caffe::root_solver()) << "Initializing solver from parameters: "
     << std::endl << param.DebugString();
   param_ = param;
   CHECK_GE(param_.average_loss(), 1) << "average_loss should be non-negative.";
   CheckSnapshotWritePermissions();
-  if (Caffe::root_solver() && param_.random_seed() >= 0) {
-    Caffe::set_random_seed(param_.random_seed());
+  if (param_.random_seed() >= 0) {
+    Caffe::set_random_seed(param_.random_seed() + Caffe::solver_rank());
   }
   // Scaffolding code
   InitTrainNet();
@@ -101,11 +97,7 @@ void Solver<Dtype>::InitTrainNet() {
   net_state.MergeFrom(net_param.state());
   net_state.MergeFrom(param_.train_state());
   net_param.mutable_state()->CopyFrom(net_state);
-  if (Caffe::root_solver()) {
-    net_.reset(new Net<Dtype>(net_param));
-  } else {
-    net_.reset(new Net<Dtype>(net_param, root_solver_->net_.get()));
-  }
+  net_.reset(new Net<Dtype>(net_param));
 }
 
 template <typename Dtype>
@@ -180,12 +172,7 @@ void Solver<Dtype>::InitTestNets() {
     net_params[i].mutable_state()->CopyFrom(net_state);
     LOG(INFO)
         << "Creating test net (#" << i << ") specified by " << sources[i];
-    if (Caffe::root_solver()) {
-      test_nets_[i].reset(new Net<Dtype>(net_params[i]));
-    } else {
-      test_nets_[i].reset(new Net<Dtype>(net_params[i],
-          root_solver_->test_nets_[i].get()));
-    }
+    test_nets_[i].reset(new Net<Dtype>(net_params[i]));
     test_nets_[i]->set_debug_info(param_.debug_info());
   }
 }
@@ -197,14 +184,16 @@ void Solver<Dtype>::Step(int iters) {
   int average_loss = this->param_.average_loss();
   losses_.clear();
   smoothed_loss_ = 0;
+  iteration_timer_.Start();
 
   while (iter_ < stop_iter) {
     // zero-init the params
     net_->ClearParamDiffs();
     if (param_.test_interval() && iter_ % param_.test_interval() == 0
-        && (iter_ > 0 || param_.test_initialization())
-        && Caffe::root_solver()) {
-      TestAll();
+        && (iter_ > 0 || param_.test_initialization())) {
+      if (Caffe::root_solver()) {
+        TestAll();
+      }
       if (requested_early_exit_) {
         // Break out of the while loop because stop was requested while testing.
         break;
@@ -225,8 +214,13 @@ void Solver<Dtype>::Step(int iters) {
     // average the loss across iterations for smoothed reporting
     UpdateSmoothedLoss(loss, start_iter, average_loss);
     if (display) {
+      float lapse = iteration_timer_.Seconds();
+      float per_s = (iter_ - iterations_last_) / (lapse ? lapse : 1);
       LOG_IF(INFO, Caffe::root_solver()) << "Iteration " << iter_
-          << ", loss = " << smoothed_loss_;
+          << " (" << per_s << " iter/s, " << lapse << "s/"
+          << param_.display() << " iters), loss = " << smoothed_loss_;
+      iteration_timer_.Start();
+      iterations_last_ = iter_;
       const vector<Blob<Dtype>*>& result = net_->output_blobs();
       int score_index = 0;
       for (int j = 0; j < result.size(); ++j) {
diff --git a/src/caffe/solvers/adagrad_solver.cpp b/src/caffe/solvers/adagrad_solver.cpp
index e78eadca141..d8107e1e623 100644
--- a/src/caffe/solvers/adagrad_solver.cpp
+++ b/src/caffe/solvers/adagrad_solver.cpp
@@ -12,7 +12,6 @@ void adagrad_update_gpu(int N, Dtype* g, Dtype* h, Dtype delta,
 
 template <typename Dtype>
 void AdaGradSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
-  CHECK(Caffe::root_solver());
   const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
   const vector<float>& net_params_lr = this->net_->params_lr();
   Dtype delta = this->param_.delta();
diff --git a/src/caffe/solvers/nesterov_solver.cpp b/src/caffe/solvers/nesterov_solver.cpp
index 23ab2d4369a..7c1fac1f884 100644
--- a/src/caffe/solvers/nesterov_solver.cpp
+++ b/src/caffe/solvers/nesterov_solver.cpp
@@ -12,7 +12,6 @@ void nesterov_update_gpu(int N, Dtype* g, Dtype* h, Dtype momentum,
 
 template <typename Dtype>
 void NesterovSolver<Dtype>::ComputeUpdateValue(int param_id, Dtype rate) {
-  CHECK(Caffe::root_solver());
   const vector<Blob<Dtype>*>& net_params = this->net_->learnable_params();
   const vector<float>& net_params_lr = this->net_->params_lr();
   Dtype momentum = this->param_.momentum();
diff --git a/src/caffe/solvers/sgd_solver.cpp b/src/caffe/solvers/sgd_solver.cpp
index f30f316d1a0..ad6abe54a0a 100644
--- a/src/caffe/solvers/sgd_solver.cpp
+++ b/src/caffe/solvers/sgd_solver.cpp
@@ -100,10 +100,10 @@ void SGDSolver<Dtype>::ClipGradients() {
 
 template <typename Dtype>
 void SGDSolver<Dtype>::ApplyUpdate() {
-  CHECK(Caffe::root_solver());
   Dtype rate = GetLearningRate();
   if (this->param_.display() && this->iter_ % this->param_.display() == 0) {
-    LOG(INFO) << "Iteration " << this->iter_ << ", lr = " << rate;
+    LOG_IF(INFO, Caffe::root_solver()) << "Iteration " << this->iter_
+        << ", lr = " << rate;
   }
   ClipGradients();
   for (int param_id = 0; param_id < this->net_->learnable_params().size();
diff --git a/src/caffe/syncedmem.cpp b/src/caffe/syncedmem.cpp
index 4d3564172ab..88d9b78510a 100644
--- a/src/caffe/syncedmem.cpp
+++ b/src/caffe/syncedmem.cpp
@@ -3,26 +3,41 @@
 #include "caffe/util/math_functions.hpp"
 
 namespace caffe {
+SyncedMemory::SyncedMemory()
+  : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(0), head_(UNINITIALIZED),
+    own_cpu_data_(false), cpu_malloc_use_cuda_(false), own_gpu_data_(false) {
+#ifndef CPU_ONLY
+#ifdef DEBUG
+  CUDA_CHECK(cudaGetDevice(&device_));
+#endif
+#endif
+}
+
+SyncedMemory::SyncedMemory(size_t size)
+  : cpu_ptr_(NULL), gpu_ptr_(NULL), size_(size), head_(UNINITIALIZED),
+    own_cpu_data_(false), cpu_malloc_use_cuda_(false), own_gpu_data_(false) {
+#ifndef CPU_ONLY
+#ifdef DEBUG
+  CUDA_CHECK(cudaGetDevice(&device_));
+#endif
+#endif
+}
 
 SyncedMemory::~SyncedMemory() {
+  check_device();
   if (cpu_ptr_ && own_cpu_data_) {
     CaffeFreeHost(cpu_ptr_, cpu_malloc_use_cuda_);
   }
 
 #ifndef CPU_ONLY
   if (gpu_ptr_ && own_gpu_data_) {
-    int initial_device;
-    cudaGetDevice(&initial_device);
-    if (gpu_device_ != -1) {
-      CUDA_CHECK(cudaSetDevice(gpu_device_));
-    }
     CUDA_CHECK(cudaFree(gpu_ptr_));
-    cudaSetDevice(initial_device);
   }
 #endif  // CPU_ONLY
 }
 
 inline void SyncedMemory::to_cpu() {
+  check_device();
   switch (head_) {
   case UNINITIALIZED:
     CaffeMallocHost(&cpu_ptr_, size_, &cpu_malloc_use_cuda_);
@@ -49,10 +64,10 @@ inline void SyncedMemory::to_cpu() {
 }
 
 inline void SyncedMemory::to_gpu() {
+  check_device();
 #ifndef CPU_ONLY
   switch (head_) {
   case UNINITIALIZED:
-    CUDA_CHECK(cudaGetDevice(&gpu_device_));
     CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
     caffe_gpu_memset(size_, 0, gpu_ptr_);
     head_ = HEAD_AT_GPU;
@@ -60,7 +75,6 @@ inline void SyncedMemory::to_gpu() {
     break;
   case HEAD_AT_CPU:
     if (gpu_ptr_ == NULL) {
-      CUDA_CHECK(cudaGetDevice(&gpu_device_));
       CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
       own_gpu_data_ = true;
     }
@@ -77,11 +91,13 @@ inline void SyncedMemory::to_gpu() {
 }
 
 const void* SyncedMemory::cpu_data() {
+  check_device();
   to_cpu();
   return (const void*)cpu_ptr_;
 }
 
 void SyncedMemory::set_cpu_data(void* data) {
+  check_device();
   CHECK(data);
   if (own_cpu_data_) {
     CaffeFreeHost(cpu_ptr_, cpu_malloc_use_cuda_);
@@ -92,6 +108,7 @@ void SyncedMemory::set_cpu_data(void* data) {
 }
 
 const void* SyncedMemory::gpu_data() {
+  check_device();
 #ifndef CPU_ONLY
   to_gpu();
   return (const void*)gpu_ptr_;
@@ -102,16 +119,11 @@ const void* SyncedMemory::gpu_data() {
 }
 
 void SyncedMemory::set_gpu_data(void* data) {
+  check_device();
 #ifndef CPU_ONLY
   CHECK(data);
   if (own_gpu_data_) {
-    int initial_device;
-    cudaGetDevice(&initial_device);
-    if (gpu_device_ != -1) {
-      CUDA_CHECK(cudaSetDevice(gpu_device_));
-    }
     CUDA_CHECK(cudaFree(gpu_ptr_));
-    cudaSetDevice(initial_device);
   }
   gpu_ptr_ = data;
   head_ = HEAD_AT_GPU;
@@ -122,12 +134,14 @@ void SyncedMemory::set_gpu_data(void* data) {
 }
 
 void* SyncedMemory::mutable_cpu_data() {
+  check_device();
   to_cpu();
   head_ = HEAD_AT_CPU;
   return cpu_ptr_;
 }
 
 void* SyncedMemory::mutable_gpu_data() {
+  check_device();
 #ifndef CPU_ONLY
   to_gpu();
   head_ = HEAD_AT_GPU;
@@ -140,9 +154,9 @@ void* SyncedMemory::mutable_gpu_data() {
 
 #ifndef CPU_ONLY
 void SyncedMemory::async_gpu_push(const cudaStream_t& stream) {
+  check_device();
   CHECK(head_ == HEAD_AT_CPU);
   if (gpu_ptr_ == NULL) {
-    CUDA_CHECK(cudaGetDevice(&gpu_device_));
     CUDA_CHECK(cudaMalloc(&gpu_ptr_, size_));
     own_gpu_data_ = true;
   }
@@ -153,5 +167,20 @@ void SyncedMemory::async_gpu_push(const cudaStream_t& stream) {
 }
 #endif
 
+void SyncedMemory::check_device() {
+#ifndef CPU_ONLY
+#ifdef DEBUG
+  int device;
+  cudaGetDevice(&device);
+  CHECK(device == device_);
+  if (gpu_ptr_ && own_gpu_data_) {
+    cudaPointerAttributes attributes;
+    CUDA_CHECK(cudaPointerGetAttributes(&attributes, gpu_ptr_));
+    CHECK(attributes.device == device_);
+  }
+#endif
+#endif
+}
+
 }  // namespace caffe
 
diff --git a/src/caffe/test/test_data_layer.cpp b/src/caffe/test/test_data_layer.cpp
index 3e8d113d918..3835af1f173 100644
--- a/src/caffe/test/test_data_layer.cpp
+++ b/src/caffe/test/test_data_layer.cpp
@@ -105,6 +105,32 @@ class DataLayerTest : public MultiDeviceTest<TypeParam> {
     }
   }
 
+  void TestSkip() {
+    LayerParameter param;
+    param.set_phase(TRAIN);
+    DataParameter* data_param = param.mutable_data_param();
+    int batch_size = 5;
+    data_param->set_batch_size(batch_size);
+    data_param->set_source(filename_->c_str());
+    data_param->set_backend(backend_);
+    Caffe::set_solver_count(8);
+    for (int dev = 0; dev < Caffe::solver_count(); ++dev) {
+      Caffe::set_solver_rank(dev);
+      DataLayer<Dtype> layer(param);
+      layer.SetUp(blob_bottom_vec_, blob_top_vec_);
+      int label = dev;
+      for (int iter = 0; iter < 10; ++iter) {
+        layer.Forward(blob_bottom_vec_, blob_top_vec_);
+        for (int i = 0; i < batch_size; ++i) {
+          EXPECT_EQ(label % batch_size, blob_top_label_->cpu_data()[i]);
+          label += Caffe::solver_count();
+        }
+      }
+    }
+    Caffe::set_solver_count(1);
+    Caffe::set_solver_rank(0);
+  }
+
   void TestReshape(DataParameter_DB backend) {
     const int num_inputs = 5;
     // Save data of varying shapes.
@@ -356,6 +382,11 @@ TYPED_TEST(DataLayerTest, TestReadLevelDB) {
   this->TestRead();
 }
 
+TYPED_TEST(DataLayerTest, TestSkipLevelDB) {
+  this->Fill(false, DataParameter_DB_LEVELDB);
+  this->TestSkip();
+}
+
 TYPED_TEST(DataLayerTest, TestReshapeLevelDB) {
   this->TestReshape(DataParameter_DB_LEVELDB);
 }
@@ -396,6 +427,11 @@ TYPED_TEST(DataLayerTest, TestReadLMDB) {
   this->TestRead();
 }
 
+TYPED_TEST(DataLayerTest, TestSkipLMDB) {
+  this->Fill(false, DataParameter_DB_LMDB);
+  this->TestSkip();
+}
+
 TYPED_TEST(DataLayerTest, TestReshapeLMDB) {
   this->TestReshape(DataParameter_DB_LMDB);
 }
diff --git a/src/caffe/test/test_gradient_based_solver.cpp b/src/caffe/test/test_gradient_based_solver.cpp
index 975a8f0f88a..6ad0d8f6544 100644
--- a/src/caffe/test/test_gradient_based_solver.cpp
+++ b/src/caffe/test/test_gradient_based_solver.cpp
@@ -36,7 +36,9 @@ class GradientBasedSolverTest : public MultiDeviceTest<TypeParam> {
 
   string snapshot_prefix_;
   shared_ptr<SGDSolver<Dtype> > solver_;
-  shared_ptr<P2PSync<Dtype> > sync_;
+#ifdef USE_NCCL
+  shared_ptr<NCCL<Dtype> > nccl_;
+#endif
   int seed_;
   // Dimensions are determined by generate_sample_data.py
   // TODO this is brittle and the hdf5 file should be checked instead.
@@ -85,6 +87,7 @@ class GradientBasedSolverTest : public MultiDeviceTest<TypeParam> {
        "lr_policy: 'fixed' "
        "iter_size: " << iter_size << " "
        "device_id: " << device_id << " "
+       "layer_wise_reduce: " << (!share_) << " "
        "net_param { "
        "  name: 'TestNetwork' "
        "  layer { "
@@ -183,7 +186,7 @@ class GradientBasedSolverTest : public MultiDeviceTest<TypeParam> {
     }
     Caffe::set_random_seed(this->seed_);
     this->InitSolverFromProtoString(proto.str());
-    if (from_snapshot != NULL) {
+    if (from_snapshot) {
       this->solver_->Restore(from_snapshot);
       for (int i = 0; i < this->solver_->iter(); ++i) {
         this->solver_->net()->Forward();
@@ -202,9 +205,10 @@ class GradientBasedSolverTest : public MultiDeviceTest<TypeParam> {
           gpus.push_back(i);
       }
       Caffe::set_solver_count(gpus.size());
-      this->sync_.reset(new P2PSync<Dtype>(
-          this->solver_, NULL, this->solver_->param()));
-      this->sync_->Run(gpus);
+#ifdef USE_NCCL
+      this->nccl_.reset(new NCCL<Dtype>(this->solver_));
+      this->nccl_->Run(gpus, from_snapshot);
+#endif
       Caffe::set_solver_count(1);
     }
     if (snapshot) {
@@ -457,12 +461,28 @@ class GradientBasedSolverTest : public MultiDeviceTest<TypeParam> {
     const int kIterSize = 1;
     // Test over all numbers of devices.
     int available_devices = 1;
-#ifndef CPU_ONLY
+#ifdef USE_NCCL
     if (Caffe::mode() == Caffe::GPU) {
       CUDA_CHECK(cudaGetDeviceCount(&available_devices));
     }
 #endif
-    for (int devices = 1; devices <= available_devices; ++devices) {
+    // Takes a while to test all sizes for each test so sparse
+    vector<int> sizes;
+    sizes.push_back(1);
+    if (available_devices >= 2) {
+      sizes.push_back(2);
+    }
+    if (available_devices >= 3) {
+      sizes.push_back(3);
+    }
+    if (available_devices >= 8) {
+      sizes.push_back(8);
+    }
+    if (available_devices >= 16) {
+      sizes.push_back(16);
+    }
+    for (int i = 0; i < sizes.size(); ++i) {
+      int devices = sizes[i];
       // Configure batch size for single / multi device equivalence.
       // Constant data is needed for multi device as for accumulation.
       num_ = kNum * devices;
diff --git a/src/caffe/test/test_hdf5data_layer.cpp b/src/caffe/test/test_hdf5data_layer.cpp
index 8884ce95a23..68e10286d0b 100644
--- a/src/caffe/test/test_hdf5data_layer.cpp
+++ b/src/caffe/test/test_hdf5data_layer.cpp
@@ -133,4 +133,34 @@ TYPED_TEST(HDF5DataLayerTest, TestRead) {
   }
 }
 
+TYPED_TEST(HDF5DataLayerTest, TestSkip) {
+  typedef typename TypeParam::Dtype Dtype;
+  LayerParameter param;
+  param.add_top("data");
+  param.add_top("label");
+
+  HDF5DataParameter* hdf5_data_param = param.mutable_hdf5_data_param();
+  int batch_size = 5;
+  hdf5_data_param->set_batch_size(batch_size);
+  hdf5_data_param->set_source(*(this->filename));
+
+  Caffe::set_solver_count(8);
+  for (int dev = 0; dev < Caffe::solver_count(); ++dev) {
+    Caffe::set_solver_rank(dev);
+
+    HDF5DataLayer<Dtype> layer(param);
+    layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
+    int label = dev;
+    for (int iter = 0; iter < 1; ++iter) {
+      layer.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
+      for (int i = 0; i < batch_size; ++i) {
+        EXPECT_EQ(1 + label, this->blob_top_label_->cpu_data()[i]);
+        label = (label + Caffe::solver_count()) % (batch_size * 2);
+      }
+    }
+  }
+  Caffe::set_solver_count(1);
+  Caffe::set_solver_rank(0);
+}
+
 }  // namespace caffe
diff --git a/src/caffe/util/blocking_queue.cpp b/src/caffe/util/blocking_queue.cpp
index 058668fe28c..f69d210459c 100644
--- a/src/caffe/util/blocking_queue.cpp
+++ b/src/caffe/util/blocking_queue.cpp
@@ -1,7 +1,6 @@
 #include <boost/thread.hpp>
 #include <string>
 
-#include "caffe/data_reader.hpp"
 #include "caffe/layers/base_data_layer.hpp"
 #include "caffe/parallel.hpp"
 #include "caffe/util/blocking_queue.hpp"
@@ -88,9 +87,5 @@ size_t BlockingQueue<T>::size() const {
 
 template class BlockingQueue<Batch<float>*>;
 template class BlockingQueue<Batch<double>*>;
-template class BlockingQueue<Datum*>;
-template class BlockingQueue<shared_ptr<DataReader::QueuePair> >;
-template class BlockingQueue<P2PSync<float>*>;
-template class BlockingQueue<P2PSync<double>*>;
 
 }  // namespace caffe
diff --git a/src/caffe/util/db_lmdb.cpp b/src/caffe/util/db_lmdb.cpp
index fb1d4956aa1..491a9bd03a6 100644
--- a/src/caffe/util/db_lmdb.cpp
+++ b/src/caffe/util/db_lmdb.cpp
@@ -32,7 +32,7 @@ void LMDB::Open(const string& source, Mode mode) {
     MDB_CHECK(rc);
   }
 #endif
-  LOG(INFO) << "Opened lmdb " << source;
+  LOG_IF(INFO, Caffe::root_solver()) << "Opened lmdb " << source;
 }
 
 LMDBCursor* LMDB::NewCursor() {
diff --git a/src/caffe/util/math_functions.cu b/src/caffe/util/math_functions.cu
index 4c587537435..6d001026082 100644
--- a/src/caffe/util/math_functions.cu
+++ b/src/caffe/util/math_functions.cu
@@ -90,6 +90,26 @@ void caffe_gpu_scal<double>(const int N, const double alpha, double *X) {
   CUBLAS_CHECK(cublasDscal(Caffe::cublas_handle(), N, &alpha, X, 1));
 }
 
+template <>
+void caffe_gpu_scal<float>(const int N, const float alpha, float* X,
+                           cudaStream_t str) {
+  cudaStream_t initial_stream;
+  CUBLAS_CHECK(cublasGetStream(Caffe::cublas_handle(), &initial_stream));
+  CUBLAS_CHECK(cublasSetStream(Caffe::cublas_handle(), str));
+  CUBLAS_CHECK(cublasSscal(Caffe::cublas_handle(), N, &alpha, X, 1));
+  CUBLAS_CHECK(cublasSetStream(Caffe::cublas_handle(), initial_stream));
+}
+
+template <>
+void caffe_gpu_scal<double>(const int N, const double alpha, double* X,
+                            cudaStream_t str) {
+  cudaStream_t initial_stream;
+  CUBLAS_CHECK(cublasGetStream(Caffe::cublas_handle(), &initial_stream));
+  CUBLAS_CHECK(cublasSetStream(Caffe::cublas_handle(), str));
+  CUBLAS_CHECK(cublasDscal(Caffe::cublas_handle(), N, &alpha, X, 1));
+  CUBLAS_CHECK(cublasSetStream(Caffe::cublas_handle(), initial_stream));
+}
+
 template <>
 void caffe_gpu_axpby<float>(const int N, const float alpha, const float* X,
     const float beta, float* Y) {
diff --git a/tools/caffe.cpp b/tools/caffe.cpp
index 9bf4214ad93..3587d8aa1be 100644
--- a/tools/caffe.cpp
+++ b/tools/caffe.cpp
@@ -195,6 +195,7 @@ int train() {
   // If the gpus flag is not provided, allow the mode and device to be set
   // in the solver prototxt.
   if (FLAGS_gpu.size() == 0
+      && solver_param.has_solver_mode()
       && solver_param.solver_mode() == caffe::SolverParameter_SolverMode_GPU) {
       if (solver_param.has_device_id()) {
           FLAGS_gpu = "" +
@@ -244,11 +245,15 @@ int train() {
     CopyLayers(solver.get(), FLAGS_weights);
   }
 
+  LOG(INFO) << "Starting Optimization";
   if (gpus.size() > 1) {
-    caffe::P2PSync<float> sync(solver, NULL, solver->param());
-    sync.Run(gpus);
+#ifdef USE_NCCL
+    caffe::NCCL<float> nccl(solver);
+    nccl.Run(gpus, FLAGS_snapshot.size() > 0 ? FLAGS_snapshot.c_str() : NULL);
+#else
+    LOG(FATAL) << "Multi-GPU execution not available - rebuild with USE_NCCL";
+#endif
   } else {
-    LOG(INFO) << "Starting Optimization";
     solver->Solve();
   }
   LOG(INFO) << "Optimization Done.";

From e21b42004001879b232daed8f142fbc5a7e0b75d Mon Sep 17 00:00:00 2001
From: Cyprien Noel <cyprien.noel@gmail.com>
Date: Tue, 22 Nov 2016 16:46:55 -0800
Subject: [PATCH 173/264] Python Multi-GPU

---
 python/caffe/__init__.py |  4 +-
 python/caffe/_caffe.cpp  | 96 ++++++++++++++++++++++++++++++++++++--
 python/caffe/pycaffe.py  |  2 +-
 python/train.py          | 99 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 193 insertions(+), 8 deletions(-)
 create mode 100644 python/train.py

diff --git a/python/caffe/__init__.py b/python/caffe/__init__.py
index 5fc6ec9b920..dde2e9863e4 100644
--- a/python/caffe/__init__.py
+++ b/python/caffe/__init__.py
@@ -1,5 +1,5 @@
-from .pycaffe import Net, SGDSolver, NesterovSolver, AdaGradSolver, RMSPropSolver, AdaDeltaSolver, AdamSolver
-from ._caffe import init_log, log, set_mode_cpu, set_mode_gpu, set_device, Layer, get_solver, layer_type_list, set_random_seed
+from .pycaffe import Net, SGDSolver, NesterovSolver, AdaGradSolver, RMSPropSolver, AdaDeltaSolver, AdamSolver, NCCL, Timer
+from ._caffe import init_log, log, set_mode_cpu, set_mode_gpu, set_device, Layer, get_solver, layer_type_list, set_random_seed, solver_count, set_solver_count, solver_rank, set_solver_rank, Layer, get_solver
 from ._caffe import __version__
 from .proto.caffe_pb2 import TRAIN, TEST
 from .classifier import Classifier
diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
index 0a86045bd46..04dac2344a0 100644
--- a/python/caffe/_caffe.cpp
+++ b/python/caffe/_caffe.cpp
@@ -267,12 +267,12 @@ bp::object BlobVec_add_blob(bp::tuple args, bp::dict kwargs) {
 }
 
 template<typename Dtype>
-class PythonCallback: public Solver<Dtype>::Callback {
+class SolverCallback: public Solver<Dtype>::Callback {
  protected:
   bp::object on_start_, on_gradients_ready_;
 
  public:
-  PythonCallback(bp::object on_start, bp::object on_gradients_ready)
+  SolverCallback(bp::object on_start, bp::object on_gradients_ready)
     : on_start_(on_start), on_gradients_ready_(on_gradients_ready) { }
   virtual void on_gradients_ready() {
     on_gradients_ready_();
@@ -284,9 +284,61 @@ class PythonCallback: public Solver<Dtype>::Callback {
 template<typename Dtype>
 void Solver_add_callback(Solver<Dtype> * solver, bp::object on_start,
   bp::object on_gradients_ready) {
-  solver->add_callback(new PythonCallback<Dtype>(on_start, on_gradients_ready));
+  solver->add_callback(new SolverCallback<Dtype>(on_start, on_gradients_ready));
 }
 
+// Seems boost cannot call the base method directly
+void Solver_add_nccl(SGDSolver<Dtype>* solver
+#ifdef USE_NCCL
+  , NCCL<Dtype>* nccl
+#endif
+) {
+#ifdef USE_NCCL
+  solver->add_callback(nccl);
+#endif
+}
+
+template<typename Dtype>
+class NetCallback: public Net<Dtype>::Callback {
+ public:
+  explicit NetCallback(bp::object run) : run_(run) {}
+
+ protected:
+  virtual void run(int layer) {
+    run_(layer);
+  }
+  bp::object run_;
+};
+void Net_before_forward(Net<Dtype>* net, bp::object run) {
+  net->add_before_forward(new NetCallback<Dtype>(run));
+}
+void Net_after_forward(Net<Dtype>* net, bp::object run) {
+  net->add_after_forward(new NetCallback<Dtype>(run));
+}
+void Net_before_backward(Net<Dtype>* net, bp::object run) {
+  net->add_before_backward(new NetCallback<Dtype>(run));
+}
+void Net_after_backward(Net<Dtype>* net, bp::object run) {
+  net->add_after_backward(new NetCallback<Dtype>(run));
+}
+
+void Net_add_nccl(Net<Dtype>* net
+#ifdef USE_NCCL
+  , NCCL<Dtype>* nccl
+#endif
+) {
+#ifdef USE_NCCL
+  net->add_after_backward(nccl);
+#endif
+}
+#ifndef USE_NCCL
+template<typename Dtype>
+class NCCL {
+ public:
+  NCCL(shared_ptr<Solver<Dtype> > solver, const string& uid) {}
+};
+#endif
+
 BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(SolveOverloads, Solve, 0, 1);
 
 BOOST_PYTHON_MODULE(_caffe) {
@@ -303,6 +355,10 @@ BOOST_PYTHON_MODULE(_caffe) {
   bp::def("set_mode_gpu", &set_mode_gpu);
   bp::def("set_random_seed", &set_random_seed);
   bp::def("set_device", &Caffe::SetDevice);
+  bp::def("solver_count", &Caffe::solver_count);
+  bp::def("set_solver_count", &Caffe::set_solver_count);
+  bp::def("solver_rank", &Caffe::solver_rank);
+  bp::def("set_solver_rank", &Caffe::set_solver_rank);
 
   bp::def("layer_type_list", &LayerRegistry<Dtype>::LayerTypeList);
 
@@ -346,7 +402,12 @@ BOOST_PYTHON_MODULE(_caffe) {
         bp::with_custodian_and_ward<1, 2, bp::with_custodian_and_ward<1, 3> >())
     .def("save", &Net_Save)
     .def("save_hdf5", &Net_SaveHDF5)
-    .def("load_hdf5", &Net_LoadHDF5);
+    .def("load_hdf5", &Net_LoadHDF5)
+    .def("before_forward", &Net_before_forward)
+    .def("after_forward", &Net_after_forward)
+    .def("before_backward", &Net_before_backward)
+    .def("after_backward", &Net_after_backward)
+    .def("after_backward", &Net_add_nccl);
   BP_REGISTER_SHARED_PTR_TO_PYTHON(Net<Dtype>);
 
   bp::class_<Blob<Dtype>, shared_ptr<Blob<Dtype> >, boost::noncopyable>(
@@ -378,6 +439,10 @@ BOOST_PYTHON_MODULE(_caffe) {
     .add_property("type", bp::make_function(&Layer<Dtype>::type));
   BP_REGISTER_SHARED_PTR_TO_PYTHON(Layer<Dtype>);
 
+  bp::class_<SolverParameter>("SolverParameter", bp::no_init)
+    .add_property("max_iter", &SolverParameter::max_iter)
+    .add_property("display", &SolverParameter::display)
+    .add_property("layer_wise_reduce", &SolverParameter::layer_wise_reduce);
   bp::class_<LayerParameter>("LayerParameter", bp::no_init);
 
   bp::class_<Solver<Dtype>, shared_ptr<Solver<Dtype> >, boost::noncopyable>(
@@ -387,11 +452,14 @@ BOOST_PYTHON_MODULE(_caffe) {
           bp::return_internal_reference<>()))
     .add_property("iter", &Solver<Dtype>::iter)
     .def("add_callback", &Solver_add_callback<Dtype>)
+    .def("add_callback", &Solver_add_nccl)
     .def("solve", static_cast<void (Solver<Dtype>::*)(const char*)>(
           &Solver<Dtype>::Solve), SolveOverloads())
     .def("step", &Solver<Dtype>::Step)
     .def("restore", &Solver<Dtype>::Restore)
-    .def("snapshot", &Solver<Dtype>::Snapshot);
+    .def("snapshot", &Solver<Dtype>::Snapshot)
+    .add_property("param", bp::make_function(&Solver<Dtype>::param,
+              bp::return_value_policy<bp::copy_const_reference>()));
   BP_REGISTER_SHARED_PTR_TO_PYTHON(Solver<Dtype>);
 
   bp::class_<SGDSolver<Dtype>, bp::bases<Solver<Dtype> >,
@@ -435,6 +503,24 @@ BOOST_PYTHON_MODULE(_caffe) {
   bp::class_<vector<bool> >("BoolVec")
     .def(bp::vector_indexing_suite<vector<bool> >());
 
+  bp::class_<NCCL<Dtype>, shared_ptr<NCCL<Dtype> >,
+    boost::noncopyable>("NCCL",
+                        bp::init<shared_ptr<Solver<Dtype> >, const string&>())
+#ifdef USE_NCCL
+    .def("new_uid", &NCCL<Dtype>::new_uid).staticmethod("new_uid")
+    .def("bcast", &NCCL<Dtype>::Broadcast)
+#endif
+    /* NOLINT_NEXT_LINE(whitespace/semicolon) */
+  ;
+  BP_REGISTER_SHARED_PTR_TO_PYTHON(NCCL<Dtype>);
+
+  bp::class_<Timer, shared_ptr<Timer>, boost::noncopyable>(
+    "Timer", bp::init<>())
+    .def("start", &Timer::Start)
+    .def("stop", &Timer::Stop)
+    .add_property("ms", &Timer::MilliSeconds);
+  BP_REGISTER_SHARED_PTR_TO_PYTHON(Timer);
+
   // boost python expects a void (missing) return value, while import_array
   // returns NULL for python3. import_array1() forces a void return value.
   import_array1();
diff --git a/python/caffe/pycaffe.py b/python/caffe/pycaffe.py
index 5bae18d9a4d..18803818fef 100644
--- a/python/caffe/pycaffe.py
+++ b/python/caffe/pycaffe.py
@@ -11,7 +11,7 @@
 import numpy as np
 
 from ._caffe import Net, SGDSolver, NesterovSolver, AdaGradSolver, \
-        RMSPropSolver, AdaDeltaSolver, AdamSolver
+        RMSPropSolver, AdaDeltaSolver, AdamSolver, NCCL, Timer
 import caffe.io
 
 import six
diff --git a/python/train.py b/python/train.py
new file mode 100644
index 00000000000..730dbe70186
--- /dev/null
+++ b/python/train.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python
+"""
+Trains a model using one or more GPUs.
+"""
+from multiprocessing import Process
+
+import caffe
+
+
+def train(
+        solver,  # solver proto definition
+        snapshot,  # solver snapshot to restore
+        gpus,  # list of device ids
+        timing=False,  # show timing info for compute and communications
+):
+    # NCCL uses a uid to identify a session
+    uid = caffe.NCCL.new_uid()
+
+    caffe.init_log()
+    caffe.log('Using devices %s' % str(gpus))
+
+    procs = []
+    for rank in range(len(gpus)):
+        p = Process(target=solve,
+                    args=(solver, snapshot, gpus, timing, uid, rank))
+        p.daemon = True
+        p.start()
+        procs.append(p)
+    for p in procs:
+        p.join()
+
+
+def time(solver, nccl):
+    fprop = []
+    bprop = []
+    total = caffe.Timer()
+    allrd = caffe.Timer()
+    for _ in range(len(solver.net.layers)):
+        fprop.append(caffe.Timer())
+        bprop.append(caffe.Timer())
+    display = solver.param.display
+
+    def show_time():
+        if solver.iter % display == 0:
+            s = '\n'
+            for i in range(len(solver.net.layers)):
+                s += 'forw %3d %8s ' % (i, solver.net.layers[i].layer_param.name)
+                s += ': %.2f\n' % fprop[i].ms
+            for i in range(len(solver.net.layers) - 1, -1, -1):
+                s += 'back %3d %8s ' % (i, solver.net.layers[i].layer_param.name)
+                s += ': %.2f\n' % bprop[i].ms
+            s += 'solver total: %.2f\n' % total.ms
+            s += 'allreduce: %.2f\n' % allrd.ms
+            caffe.log(s)
+
+    solver.net.before_forward(lambda layer: fprop[layer].start())
+    solver.net.after_forward(lambda layer: fprop[layer].stop())
+    solver.net.before_backward(lambda layer: bprop[layer].start())
+    solver.net.after_backward(lambda layer: bprop[layer].stop())
+    solver.add_callback(lambda: total.start(), lambda: (total.stop(), allrd.start()))
+    solver.add_callback(nccl)
+    solver.add_callback(lambda: '', lambda: (allrd.stop(), show_time()))
+
+
+def solve(proto, snapshot, gpus, timing, uid, rank):
+    caffe.set_mode_gpu()
+    caffe.set_device(gpus[rank])
+    caffe.set_solver_count(len(gpus))
+    caffe.set_solver_rank(rank)
+
+    solver = caffe.SGDSolver(proto)
+    if snapshot and len(snapshot) != 0:
+        solver.restore(snapshot)
+
+    nccl = caffe.NCCL(solver, uid)
+    nccl.bcast()
+
+    if timing and rank == 0:
+        time(solver, nccl)
+    else:
+        solver.add_callback(nccl)
+
+    if solver.param.layer_wise_reduce:
+        solver.net.after_backward(nccl)
+    solver.step(solver.param.max_iter)
+
+
+if __name__ == '__main__':
+    import argparse
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--solver", required=True, help="Solver proto definition.")
+    parser.add_argument("--snapshot", help="Solver snapshot to restore.")
+    parser.add_argument("--gpus", type=int, nargs='+', default=[0],
+                        help="List of device ids.")
+    parser.add_argument("--timing", action='store_true', help="Show timing info.")
+    args = parser.parse_args()
+
+    train(args.solver, args.snapshot, args.gpus, args.timing)

From 0d27efc7e3d3d2edbf45cccb73bad03ad655c164 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marian=20Gla=CC=88ser?= <mglaeser@me.com>
Date: Thu, 22 Dec 2016 12:25:46 -0800
Subject: [PATCH 174/264] Python layers should build on multiprocess &
 solver_cnt; enable with bindings

---
 include/caffe/layers/python_layer.hpp | 2 +-
 python/caffe/__init__.py              | 2 +-
 python/caffe/_caffe.cpp               | 1 +
 python/train.py                       | 5 +++--
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/include/caffe/layers/python_layer.hpp b/include/caffe/layers/python_layer.hpp
index 529b09cb88b..10c4bfd0250 100644
--- a/include/caffe/layers/python_layer.hpp
+++ b/include/caffe/layers/python_layer.hpp
@@ -21,7 +21,7 @@ class PythonLayer : public Layer<Dtype> {
     // Disallow PythonLayer in MultiGPU training stage, due to GIL issues
     // Details: https://github.com/BVLC/caffe/issues/2936
     if (this->phase_ == TRAIN && Caffe::solver_count() > 1
-        && !Caffe::root_solver() && !Caffe::multiprocess()) {
+        && !Caffe::multiprocess()) {
       LOG(FATAL) << "PythonLayer does not support CLI Multi-GPU, use train.py";
     }
     self_.attr("param_str") = bp::str(
diff --git a/python/caffe/__init__.py b/python/caffe/__init__.py
index dde2e9863e4..43a0c49be63 100644
--- a/python/caffe/__init__.py
+++ b/python/caffe/__init__.py
@@ -1,5 +1,5 @@
 from .pycaffe import Net, SGDSolver, NesterovSolver, AdaGradSolver, RMSPropSolver, AdaDeltaSolver, AdamSolver, NCCL, Timer
-from ._caffe import init_log, log, set_mode_cpu, set_mode_gpu, set_device, Layer, get_solver, layer_type_list, set_random_seed, solver_count, set_solver_count, solver_rank, set_solver_rank, Layer, get_solver
+from ._caffe import init_log, log, set_mode_cpu, set_mode_gpu, set_device, Layer, get_solver, layer_type_list, set_random_seed, solver_count, set_solver_count, solver_rank, set_solver_rank, set_multiprocess, Layer, get_solver
 from ._caffe import __version__
 from .proto.caffe_pb2 import TRAIN, TEST
 from .classifier import Classifier
diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
index 04dac2344a0..3589e476f5c 100644
--- a/python/caffe/_caffe.cpp
+++ b/python/caffe/_caffe.cpp
@@ -359,6 +359,7 @@ BOOST_PYTHON_MODULE(_caffe) {
   bp::def("set_solver_count", &Caffe::set_solver_count);
   bp::def("solver_rank", &Caffe::solver_rank);
   bp::def("set_solver_rank", &Caffe::set_solver_rank);
+  bp::def("set_multiprocess", &Caffe::set_multiprocess);
 
   bp::def("layer_type_list", &LayerRegistry<Dtype>::LayerTypeList);
 
diff --git a/python/train.py b/python/train.py
index 730dbe70186..5897f5dcb90 100644
--- a/python/train.py
+++ b/python/train.py
@@ -44,10 +44,10 @@ def show_time():
         if solver.iter % display == 0:
             s = '\n'
             for i in range(len(solver.net.layers)):
-                s += 'forw %3d %8s ' % (i, solver.net.layers[i].layer_param.name)
+                s += 'forw %3d %8s ' % (i, solver.net._layer_names[i])
                 s += ': %.2f\n' % fprop[i].ms
             for i in range(len(solver.net.layers) - 1, -1, -1):
-                s += 'back %3d %8s ' % (i, solver.net.layers[i].layer_param.name)
+                s += 'back %3d %8s ' % (i, solver.net._layer_names[i])
                 s += ': %.2f\n' % bprop[i].ms
             s += 'solver total: %.2f\n' % total.ms
             s += 'allreduce: %.2f\n' % allrd.ms
@@ -67,6 +67,7 @@ def solve(proto, snapshot, gpus, timing, uid, rank):
     caffe.set_device(gpus[rank])
     caffe.set_solver_count(len(gpus))
     caffe.set_solver_rank(rank)
+    caffe.set_multiprocess(True)
 
     solver = caffe.SGDSolver(proto)
     if snapshot and len(snapshot) != 0:

From 5f28eb1147c1abb6e5e5c7cd282218679b0d531d Mon Sep 17 00:00:00 2001
From: Cyprien Noel <cyprien.noel@gmail.com>
Date: Wed, 4 Jan 2017 00:25:00 -0800
Subject: [PATCH 175/264] Using default from proto for prefetch

---
 include/caffe/layers/base_data_layer.hpp | 3 ---
 src/caffe/layers/base_data_layer.cpp     | 3 +--
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/include/caffe/layers/base_data_layer.hpp b/include/caffe/layers/base_data_layer.hpp
index 925b019d460..21d3ada50d0 100644
--- a/include/caffe/layers/base_data_layer.hpp
+++ b/include/caffe/layers/base_data_layer.hpp
@@ -67,9 +67,6 @@ class BasePrefetchingDataLayer :
   virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top);
 
-  // Prefetches batches (asynchronously if to GPU memory)
-  static const int PREFETCH_COUNT = 4;  // same as proto
-
  protected:
   virtual void InternalThreadEntry();
   virtual void load_batch(Batch<Dtype>* batch) = 0;
diff --git a/src/caffe/layers/base_data_layer.cpp b/src/caffe/layers/base_data_layer.cpp
index 9414f6f98b2..93a798f3571 100644
--- a/src/caffe/layers/base_data_layer.cpp
+++ b/src/caffe/layers/base_data_layer.cpp
@@ -36,8 +36,7 @@ template <typename Dtype>
 BasePrefetchingDataLayer<Dtype>::BasePrefetchingDataLayer(
     const LayerParameter& param)
     : BaseDataLayer<Dtype>(param),
-      prefetch_(param.has_data_param() ?
-                param.data_param().prefetch() : PREFETCH_COUNT),
+      prefetch_(param.data_param().prefetch()),
       prefetch_free_(), prefetch_full_(), prefetch_current_() {
   for (int i = 0; i < prefetch_.size(); ++i) {
     prefetch_[i].reset(new Batch<Dtype>());

From 8e63bb6ef1537db2d94ddf2dc084020af5c8727d Mon Sep 17 00:00:00 2001
From: Fan Yang <stoneyang0915@gmail.com>
Date: Thu, 12 Jan 2017 15:26:07 +0800
Subject: [PATCH 176/264] minor typo

---
 models/bvlc_googlenet/train_val.prototxt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 mode change 100644 => 100755 models/bvlc_googlenet/train_val.prototxt

diff --git a/models/bvlc_googlenet/train_val.prototxt b/models/bvlc_googlenet/train_val.prototxt
old mode 100644
new mode 100755
index 5dee3abe28f..5fe367f2263
--- a/models/bvlc_googlenet/train_val.prototxt
+++ b/models/bvlc_googlenet/train_val.prototxt
@@ -1692,7 +1692,7 @@ layer {
   type: "SoftmaxWithLoss"
   bottom: "loss2/classifier"
   bottom: "label"
-  top: "loss2/loss1"
+  top: "loss2/loss2"
   loss_weight: 0.3
 }
 layer {

From 91c15e85124ce2b143d2c18ccab5c5740ef4ce31 Mon Sep 17 00:00:00 2001
From: Guillaume Dumont <dumont.guillaume@gmail.com>
Date: Fri, 13 Jan 2017 14:33:35 -0500
Subject: [PATCH 177/264] Python 2/3 compatible download_model_binary.py

---
 scripts/download_model_binary.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/scripts/download_model_binary.py b/scripts/download_model_binary.py
index fcdbb5a91a2..a72fd5d76ba 100755
--- a/scripts/download_model_binary.py
+++ b/scripts/download_model_binary.py
@@ -3,10 +3,11 @@
 import sys
 import time
 import yaml
-import urllib
 import hashlib
 import argparse
 
+from six.moves import urllib
+
 required_keys = ['caffemodel', 'caffemodel_url', 'sha1']
 
 
@@ -69,7 +70,7 @@ def model_checks_out(filename=model_filename, sha1=frontmatter['sha1']):
         sys.exit(0)
 
     # Download and verify model.
-    urllib.urlretrieve(
+    urllib.request.urlretrieve(
         frontmatter['caffemodel_url'], model_filename, reporthook)
     if not model_checks_out():
         print('ERROR: model did not download correctly! Run this again.')

From a19357a190664b1ea99d18e14eedc27e43ebed42 Mon Sep 17 00:00:00 2001
From: shai <shai@magisto.com>
Date: Sun, 15 Jan 2017 08:54:45 +0000
Subject: [PATCH 178/264] fixing upgrade_proto for BatchNorm layer: be more
 conservative leave "name" in param, only set lr_mult and decay_mult to zero

---
 src/caffe/util/upgrade_proto.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/caffe/util/upgrade_proto.cpp b/src/caffe/util/upgrade_proto.cpp
index a0aacbe92f8..94771c8c050 100644
--- a/src/caffe/util/upgrade_proto.cpp
+++ b/src/caffe/util/upgrade_proto.cpp
@@ -1018,7 +1018,13 @@ void UpgradeNetBatchNorm(NetParameter* net_param) {
     // the previous BatchNorm layer definition.
     if (net_param->layer(i).type() == "BatchNorm"
         && net_param->layer(i).param_size() == 3) {
-      net_param->mutable_layer(i)->clear_param();
+      // set lr_mult and decay_mult to zero. leave all other param intact.
+      for (int ip = 0; ip < net_param->layer(i).param_size(); ip++) {
+        ParamSpec* fixed_param_spec =
+          net_param->mutable_layer(i)->mutable_param(ip);
+        fixed_param_spec->set_lr_mult(0.f);
+        fixed_param_spec->set_decay_mult(0.f);
+      }
     }
   }
 }

From ceb25c8abe1e70558d8cc72545e4381cd1b4f273 Mon Sep 17 00:00:00 2001
From: Adam Browne <adamo.browne@gmail.com>
Date: Wed, 18 Jan 2017 15:25:02 -0500
Subject: [PATCH 179/264] Fix various documentation typos (#4172)

* fix typo (standaraized->standardized)
* fix typo (convet->convert, etc..)
* fix typo (incompartible->incompatible)
* fix typo (does't->doesn't)
* fix typo (decoded->decode)
---
 cmake/ConfigGen.cmake          | 2 +-
 cmake/Cuda.cmake               | 2 +-
 cmake/Targets.cmake            | 6 +++---
 examples/CMakeLists.txt        | 2 +-
 src/caffe/data_transformer.cpp | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/cmake/ConfigGen.cmake b/cmake/ConfigGen.cmake
index 056371110b5..fd9dd2d2bb4 100644
--- a/cmake/ConfigGen.cmake
+++ b/cmake/ConfigGen.cmake
@@ -109,7 +109,7 @@ function(caffe_generate_export_configs)
 
   # ---[ Configure and install version file ]---
 
-  # TODO: Lines below are commented because Caffe does't declare its version in headers.
+  # TODO: Lines below are commented because Caffe doesn't declare its version in headers.
   # When the declarations are added, modify `caffe_extract_caffe_version()` macro and uncomment
 
   # configure_file(cmake/Templates/CaffeConfigVersion.cmake.in "${PROJECT_BINARY_DIR}/CaffeConfigVersion.cmake" @ONLY)
diff --git a/cmake/Cuda.cmake b/cmake/Cuda.cmake
index 7146a24450c..0fbf30187e7 100644
--- a/cmake/Cuda.cmake
+++ b/cmake/Cuda.cmake
@@ -284,7 +284,7 @@ mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION)
 if(APPLE)
   caffe_detect_darwin_version(OSX_VERSION)
 
-  # OSX 10.9 and higher uses clang/libc++ by default which is incompartible with old CUDA toolkits
+  # OSX 10.9 and higher uses clang/libc++ by default which is incompatible with old CUDA toolkits
   if(OSX_VERSION VERSION_GREATER 10.8)
     # enabled by default if and only if CUDA version is less than 7.0
     caffe_option(USE_libstdcpp "Use libstdc++ instead of libc++" (CUDA_VERSION VERSION_LESS 7.0))
diff --git a/cmake/Targets.cmake b/cmake/Targets.cmake
index 2cb11584a5c..090f86c5500 100644
--- a/cmake/Targets.cmake
+++ b/cmake/Targets.cmake
@@ -88,7 +88,7 @@ function(caffe_pickup_caffe_sources root)
   file(GLOB_RECURSE proto_files ${root}/src/caffe/*.proto)
   list(APPEND srcs ${proto_files})
 
-  # convet to absolute paths
+  # convert to absolute paths
   caffe_convert_absolute_paths(srcs)
   caffe_convert_absolute_paths(cuda)
   caffe_convert_absolute_paths(test_srcs)
@@ -102,7 +102,7 @@ function(caffe_pickup_caffe_sources root)
 endfunction()
 
 ################################################################################################
-# Short command for setting defeault target properties
+# Short command for setting default target properties
 # Usage:
 #   caffe_default_properties(<target>)
 function(caffe_default_properties target)
@@ -111,7 +111,7 @@ function(caffe_default_properties target)
     ARCHIVE_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/lib"
     LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/lib"
     RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/bin")
-  # make sure we build all external depepdencies first
+  # make sure we build all external dependencies first
   if (DEFINED external_project_dependencies)
     add_dependencies(${target} ${external_project_dependencies})
   endif()
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 663d7360b7d..a59e0df36b0 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -23,7 +23,7 @@ foreach(source_file ${examples_srcs})
 
   if(UNIX OR APPLE)
     # Funny command to make tutorials work
-    # TODO: remove in future as soon as naming is standartaized everywhere
+    # TODO: remove in future as soon as naming is standardized everywhere
     set(__outname ${PROJECT_BINARY_DIR}/examples/${folder}/${name}${Caffe_POSTFIX})
     add_custom_command(TARGET ${name} POST_BUILD
                        COMMAND ln -sf "${__outname}" "${__outname}.bin")
diff --git a/src/caffe/data_transformer.cpp b/src/caffe/data_transformer.cpp
index 7189d67e289..3012251e0a5 100644
--- a/src/caffe/data_transformer.cpp
+++ b/src/caffe/data_transformer.cpp
@@ -130,7 +130,7 @@ void DataTransformer<Dtype>::Transform(const Datum& datum,
 template<typename Dtype>
 void DataTransformer<Dtype>::Transform(const Datum& datum,
                                        Blob<Dtype>* transformed_blob) {
-  // If datum is encoded, decoded and transform the cv::image.
+  // If datum is encoded, decode and transform the cv::image.
   if (datum.encoded()) {
 #ifdef USE_OPENCV
     CHECK(!(param_.force_color() && param_.force_gray()))

From e744056d8f7ebcf7f0410a52d801d9ca552f69ad Mon Sep 17 00:00:00 2001
From: xmyqsh <xmyqsh@gmail.com>
Date: Thu, 19 Jan 2017 05:19:48 +0800
Subject: [PATCH 180/264] remove redundant operations in Crop layer (#5138)

---
 src/caffe/layers/crop_layer.cpp | 40 ++++++++++++++++-----------------
 src/caffe/layers/crop_layer.cu  | 22 +++++++-----------
 2 files changed, 27 insertions(+), 35 deletions(-)

diff --git a/src/caffe/layers/crop_layer.cpp b/src/caffe/layers/crop_layer.cpp
index d36b61ca029..ef8c177c4dd 100644
--- a/src/caffe/layers/crop_layer.cpp
+++ b/src/caffe/layers/crop_layer.cpp
@@ -86,27 +86,25 @@ void CropLayer<Dtype>::crop_copy(const vector<Blob<Dtype>*>& bottom,
     }
   } else {
     // We are at the last dimensions, which is stored continuously in memory
-    for (int i = 0; i < top[0]->shape(cur_dim); ++i) {
-      // prepare index vector reduced(red) and with offsets(off)
-      std::vector<int> ind_red(cur_dim, 0);
-      std::vector<int> ind_off(cur_dim+1, 0);
-      for (int j = 0; j < cur_dim; ++j) {
-          ind_red[j] = indices[j];
-          ind_off[j] = indices[j] + offsets[j];
-      }
-      ind_off[cur_dim] = offsets[cur_dim];
-      // do the copy
-      if (is_forward) {
-        caffe_copy(top[0]->shape(cur_dim),
-            src_data + bottom[0]->offset(ind_off),
-            dest_data + top[0]->offset(ind_red));
-      } else {
-        // in the backwards pass the src_data is top_diff
-        // and the dest_data is bottom_diff
-        caffe_copy(top[0]->shape(cur_dim),
-            src_data + top[0]->offset(ind_red),
-            dest_data + bottom[0]->offset(ind_off));
-      }
+    // prepare index vector reduced(red) and with offsets(off)
+    std::vector<int> ind_red(cur_dim, 0);
+    std::vector<int> ind_off(cur_dim+1, 0);
+    for (int j = 0; j < cur_dim; ++j) {
+      ind_red[j] = indices[j];
+      ind_off[j] = indices[j] + offsets[j];
+    }
+    ind_off[cur_dim] = offsets[cur_dim];
+    // do the copy
+    if (is_forward) {
+      caffe_copy(top[0]->shape(cur_dim),
+          src_data + bottom[0]->offset(ind_off),
+          dest_data + top[0]->offset(ind_red));
+    } else {
+      // in the backwards pass the src_data is top_diff
+      // and the dest_data is bottom_diff
+      caffe_copy(top[0]->shape(cur_dim),
+          src_data + top[0]->offset(ind_red),
+          dest_data + bottom[0]->offset(ind_off));
     }
   }
 }
diff --git a/src/caffe/layers/crop_layer.cu b/src/caffe/layers/crop_layer.cu
index 1ea132531cf..677077cdd8b 100644
--- a/src/caffe/layers/crop_layer.cu
+++ b/src/caffe/layers/crop_layer.cu
@@ -8,14 +8,12 @@ namespace caffe {
 // strides in the last two dimensions.
 template <typename Dtype>
 __global__ void copy_kernel(const int n, const int height, const int width,
-    const int src_outer_stride, const int src_inner_stride,
-    const int dest_outer_stride, const int dest_inner_stride,
+    const int src_inner_stride,
+    const int dest_inner_stride,
     const Dtype* src, Dtype* dest) {
   CUDA_KERNEL_LOOP(index, n) {
-    int src_start = index / height * src_outer_stride
-                  + index % height * src_inner_stride;
-    int dest_start = index / height * dest_outer_stride
-                   + index % height * dest_inner_stride;
+    int src_start = index * src_inner_stride;
+    int dest_start = index * dest_inner_stride;
     for (int i = 0; i < width; ++i) {
       dest[dest_start + i] = src[src_start + i];
     }
@@ -53,11 +51,7 @@ void CropLayer<Dtype>::crop_copy_gpu(const vector<Blob<Dtype>*>& bottom,
     ind_off[cur_dim] = offsets[cur_dim];
     ind_off[cur_dim+1] = offsets[cur_dim+1];
     // Compute copy strides
-    const int src_outer_stride =
-        bottom[0]->shape(cur_dim)*bottom[0]->shape(cur_dim+1);
     const int src_inner_stride = bottom[0]->shape(cur_dim+1);
-    const int dest_outer_stride =
-        top[0]->shape(cur_dim)*top[0]->shape(cur_dim+1);
     const int dest_inner_stride = top[0]->shape(cur_dim+1);
 
     if (is_forward) {
@@ -68,8 +62,8 @@ void CropLayer<Dtype>::crop_copy_gpu(const vector<Blob<Dtype>*>& bottom,
       // NOLINT_NEXT_LINE(whitespace/operators)
       copy_kernel<<<CAFFE_GET_BLOCKS(lines), CAFFE_CUDA_NUM_THREADS>>>(
           lines, height, width,
-          src_outer_stride, src_inner_stride,
-          dest_outer_stride, dest_inner_stride,
+          src_inner_stride,
+          dest_inner_stride,
           bottom_data, top_data);
 
     } else {
@@ -80,8 +74,8 @@ void CropLayer<Dtype>::crop_copy_gpu(const vector<Blob<Dtype>*>& bottom,
       // NOLINT_NEXT_LINE(whitespace/operators)
       copy_kernel<<<CAFFE_GET_BLOCKS(lines), CAFFE_CUDA_NUM_THREADS>>>(
           lines, height, width,
-          dest_outer_stride, dest_inner_stride,
-          src_outer_stride, src_inner_stride,
+          dest_inner_stride,
+          src_inner_stride,
           top_diff, bottom_diff);
     }
   }

From 9b9f6d02ccb664b7f17ce2d3d17072ba578cac09 Mon Sep 17 00:00:00 2001
From: Jonathan L Long <jonlong@cs.berkeley.edu>
Date: Wed, 18 Jan 2017 16:03:55 -0800
Subject: [PATCH 181/264] [build] remove trailing backslash on comment

---
 Makefile.config.example | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile.config.example b/Makefile.config.example
index 541cf8077d5..b590bd16ce9 100644
--- a/Makefile.config.example
+++ b/Makefile.config.example
@@ -68,7 +68,7 @@ PYTHON_INCLUDE := /usr/include/python2.7 \
 # ANACONDA_HOME := $(HOME)/anaconda
 # PYTHON_INCLUDE := $(ANACONDA_HOME)/include \
 		# $(ANACONDA_HOME)/include/python2.7 \
-		# $(ANACONDA_HOME)/lib/python2.7/site-packages/numpy/core/include \
+		# $(ANACONDA_HOME)/lib/python2.7/site-packages/numpy/core/include
 
 # Uncomment to use Python 3 (default is Python 2)
 # PYTHON_LIBRARIES := boost_python3 python3.5m

From ff3158a3d0f974a15981dfdbaa95c11ec2cee097 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Wed, 18 Jan 2017 17:39:35 -0800
Subject: [PATCH 182/264] ignore generated includes for docs

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 281ef3266db..eff292b7f61 100644
--- a/.gitignore
+++ b/.gitignore
@@ -84,6 +84,7 @@ cmake_build
 
 # Generated documentation
 docs/_site
+docs/_includes
 docs/gathered
 _site
 doxygen

From 9ab67099e08c03bf57e6a67538ca4746365beda8 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Wed, 18 Jan 2017 17:40:36 -0800
Subject: [PATCH 183/264] copyright spans 2014-2017

---
 LICENSE | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/LICENSE b/LICENSE
index d69d16f5bc7..0c99adc182c 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,11 +1,11 @@
 COPYRIGHT
 
 All contributions by the University of California:
-Copyright (c) 2014, 2015, The Regents of the University of California (Regents)
+Copyright (c) 2014-2017 The Regents of the University of California (Regents)
 All rights reserved.
 
 All other contributions:
-Copyright (c) 2014, 2015, the respective contributors
+Copyright (c) 2014-2017, the respective contributors
 All rights reserved.
 
 Caffe uses a shared copyright model: each contributor holds copyright over

From 4056f79f9d8ebf261db45883470a0e2939f725e9 Mon Sep 17 00:00:00 2001
From: Cyprien Noel <cyprien.noel@gmail.com>
Date: Tue, 17 Jan 2017 20:10:15 -0800
Subject: [PATCH 184/264] Docker refresh: simplified & update to 16.04, cuda8,
 cudnn5, nccl

---
 docker/Makefile                        | 50 ------------------
 docker/README.md                       | 70 ++++++++++++--------------
 docker/{standalone => }/cpu/Dockerfile | 12 +++--
 docker/{standalone => }/gpu/Dockerfile | 15 +++---
 docker/templates/Dockerfile.template   | 42 ----------------
 5 files changed, 49 insertions(+), 140 deletions(-)
 delete mode 100644 docker/Makefile
 rename docker/{standalone => }/cpu/Dockerfile (76%)
 rename docker/{standalone => }/gpu/Dockerfile (66%)
 delete mode 100644 docker/templates/Dockerfile.template

diff --git a/docker/Makefile b/docker/Makefile
deleted file mode 100644
index 3a6575b0c43..00000000000
--- a/docker/Makefile
+++ /dev/null
@@ -1,50 +0,0 @@
-# A makefile to build the docker images for caffe.
-# Two caffe images will be built:
-#   caffe:cpu --> A CPU-only build of caffe.
-#   caffe:gpu --> A GPU-enabled build using the latest CUDA and CUDNN versions.
-
-DOCKER ?= docker
-
-all: docker_files standalone
-
-.PHONY: standalone devel
-
-standalone: cpu_standalone gpu_standalone
-
-
-cpu_standalone: standalone/cpu/Dockerfile
-	$(DOCKER) build -t caffe:cpu standalone/cpu
-
-gpu_standalone: standalone/gpu/Dockerfile
-	$(DOCKER) build -t caffe:gpu standalone/gpu
-
-docker_files: standalone_files
-
-standalone_files: standalone/cpu/Dockerfile standalone/gpu/Dockerfile
-
-FROM_GPU = "nvidia/cuda:7.5-cudnn5-devel-ubuntu14.04"
-FROM_CPU = "ubuntu:14.04"
-GPU_CMAKE_ARGS = -DUSE_CUDNN=1
-CPU_CMAKE_ARGS = -DCPU_ONLY=1
-
-# A make macro to select the CPU or GPU base image.
-define from_image
-$(if $(strip $(findstring gpu,$@)),$(FROM_GPU),$(FROM_CPU))
-endef
-
-# A make macro to select the CPU or GPU build args.
-define build_args
-$(if $(strip $(findstring gpu,$@)),$(GPU_CMAKE_ARGS),$(CPU_CMAKE_ARGS))
-endef
-
-# A make macro to construct the CPU or GPU Dockerfile from the template
-define create_docker_file
-	@echo creating $@
-	@echo "FROM "$(from_image) > $@
-	@cat $^ | sed 's/$${CMAKE_ARGS}/$(build_args)/' >> $@
-endef
-
-
-standalone/%/Dockerfile: templates/Dockerfile.template
-	$(create_docker_file)
-
diff --git a/docker/README.md b/docker/README.md
index fdab641bdca..11c18157996 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -1,52 +1,48 @@
-# Caffe standalone Dockerfiles.
+### Running an official image
 
-The `standalone` subfolder contains docker files for generating both CPU and GPU executable images for Caffe. The images can be built using make, or by running:
+You can run one of the automatic [builds](https://hub.docker.com/r/bvlc/caffe)
+like this:
 
-```
-docker build -t caffe:cpu standalone/cpu
-```
-for example. (Here `gpu` can be substituted for `cpu`, but to keep the readme simple, only the `cpu` case will be discussed in detail).
+`docker run -ti bvlc/caffe caffe --version`
 
-Note that the GPU standalone requires a CUDA 7.5 capable driver to be installed on the system and [nvidia-docker] for running the Docker containers. Here it is generally sufficient to use `nvidia-docker` instead of `docker` in any of the commands mentioned.
+or for GPU support (You need a CUDA 8.0 capable driver and
+[nvidia-docker](https://github.com/NVIDIA/nvidia-docker)):
 
-# Running Caffe using the docker image
+`nvidia-docker run -ti bvlc/caffe:gpu caffe --version`
 
-In order to test the Caffe image, run:
-```
-docker run -ti caffe:cpu caffe --version
-```
-which should show a message like:
-```
-libdc1394 error: Failed to initialize libdc1394
-caffe version 1.0.0-rc3
-```
+You might see an error about libdc1394, ignore it.
 
-One can also build and run the Caffe tests in the image using:
-```
-docker run -ti caffe:cpu bash -c "cd /opt/caffe/build; make runtest"
-```
+### Docker run options
 
-In order to get the most out of the caffe image, some more advanced `docker run` options could be used. For example, running:
-```
-docker run -ti --volume=$(pwd):/workspace caffe:cpu caffe train --solver=example_solver.prototxt
-```
-will train a network defined in the `example_solver.prototxt` file in the current directory (`$(pwd)` is maped to the container volume `/workspace` using the `--volume=` Docker flag).
+By default caffe runs as root, thus any output files, e.g. snapshots, will be owned
+by root. It also runs by default in a container-private folder.
 
-Note that docker runs all commands as root by default, and thus any output files (e.g. snapshots) generated will be owned by the root user. In order to ensure that the current user is used instead, the following command can be used:
-```
-docker run -ti --volume=$(pwd):/workspace -u $(id -u):$(id -g) caffe:cpu caffe train --solver=example_solver.prototxt
-```
-where the `-u` Docker command line option runs the commands in the container as the specified user, and the shell command `id` is used to determine the user and group ID of the current user. Note that the Caffe docker images have `/workspace` defined as the default working directory. This can be overridden using the `--workdir=` Docker command line option.
+You can change this using flags, like user (-u), current directory, and volumes (-w and -v).
+E.g. this behaves like the usual caffe executable:
 
-# Other use-cases
+`docker run --rm -u $(id -u):$(id -g) -v $(pwd):$(pwd) -w $(pwd) bvlc/caffe caffe train --solver=example_solver.prototxt`
 
-Although running the `caffe` command in the docker containers as described above serves many purposes, the container can also be used for more interactive use cases. For example, specifying `bash` as the command instead of `caffe` yields a shell that can be used for interactive tasks. (Since the caffe build requirements are included in the container, this can also be used to build and run local versions of caffe).
+Containers can also be used interactively, specifying e.g. `bash` or `ipython`
+instead of `caffe`.
 
-Another use case is to run python scripts that depend on `caffe`'s Python modules. Using the `python` command instead of `bash` or `caffe` will allow this, and an interactive interpreter can be started by running:
 ```
-docker run -ti caffe:cpu python
+docker run -ti bvlc/caffe ipython
+import caffe
+...
 ```
-(`ipython` is also available in the container).
 
-Since the `caffe/python` folder is also added to the path, the utility executable scripts defined there can also be used as executables. This includes `draw_net.py`, `classify.py`, and `detect.py`
+The caffe build requirements are included in the container, so this can be used to
+build and run custom versions of caffe. Also, `caffe/python` is in PATH, so python
+utilities can be used directly, e.g. `draw_net.py`, `classify.py`, or `detect.py`.
+
+### Building images yourself
+
+Examples:
+
+`docker build -t caffe cpu`
+
+`docker build -t caffe:gpu gpu`
+
+You can also build Caffe and run the tests in the image:
 
+`docker run -ti caffe bash -c "cd /opt/caffe/build; make runtest"`
diff --git a/docker/standalone/cpu/Dockerfile b/docker/cpu/Dockerfile
similarity index 76%
rename from docker/standalone/cpu/Dockerfile
rename to docker/cpu/Dockerfile
index 4fef25aa6a1..af6c03c6589 100644
--- a/docker/standalone/cpu/Dockerfile
+++ b/docker/cpu/Dockerfile
@@ -1,5 +1,5 @@
-FROM ubuntu:14.04
-MAINTAINER caffe-maint@googlegroups.com
+FROM ubuntu:16.04
+LABEL maintainer caffe-maint@googlegroups.com
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
@@ -20,17 +20,19 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         python-dev \
         python-numpy \
         python-pip \
+        python-setuptools \
         python-scipy && \
     rm -rf /var/lib/apt/lists/*
 
 ENV CAFFE_ROOT=/opt/caffe
 WORKDIR $CAFFE_ROOT
 
-# FIXME: clone a specific git tag and use ARG instead of ENV once DockerHub supports this.
-ENV CLONE_TAG=master
+# FIXME: use ARG instead of ENV once DockerHub supports this
+ENV CLONE_TAG=rc4
 
 RUN git clone -b ${CLONE_TAG} --depth 1 https://github.com/BVLC/caffe.git . && \
-    for req in $(cat python/requirements.txt) pydot; do pip install $req; done && \
+    pip install --upgrade pip && \
+    cd python && for req in $(cat requirements.txt) pydot; do pip install $req; done && cd .. && \
     mkdir build && cd build && \
     cmake -DCPU_ONLY=1 .. && \
     make -j"$(nproc)"
diff --git a/docker/standalone/gpu/Dockerfile b/docker/gpu/Dockerfile
similarity index 66%
rename from docker/standalone/gpu/Dockerfile
rename to docker/gpu/Dockerfile
index daf6a7223ff..0785b10f1e7 100644
--- a/docker/standalone/gpu/Dockerfile
+++ b/docker/gpu/Dockerfile
@@ -1,5 +1,5 @@
-FROM nvidia/cuda:7.5-cudnn5-devel-ubuntu14.04
-MAINTAINER caffe-maint@googlegroups.com
+FROM nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04
+LABEL maintainer caffe-maint@googlegroups.com
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
         build-essential \
@@ -20,19 +20,22 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
         python-dev \
         python-numpy \
         python-pip \
+        python-setuptools \
         python-scipy && \
     rm -rf /var/lib/apt/lists/*
 
 ENV CAFFE_ROOT=/opt/caffe
 WORKDIR $CAFFE_ROOT
 
-# FIXME: clone a specific git tag and use ARG instead of ENV once DockerHub supports this.
-ENV CLONE_TAG=master
+# FIXME: use ARG instead of ENV once DockerHub supports this
+ENV CLONE_TAG=rc4
 
 RUN git clone -b ${CLONE_TAG} --depth 1 https://github.com/BVLC/caffe.git . && \
-    for req in $(cat python/requirements.txt) pydot; do pip install $req; done && \
+    pip install --upgrade pip && \
+    cd python && for req in $(cat requirements.txt) pydot; do pip install $req; done && cd .. && \
+    git clone https://github.com/NVIDIA/nccl.git && cd nccl && make -j install && cd .. && rm -rf nccl && \
     mkdir build && cd build && \
-    cmake -DUSE_CUDNN=1 .. && \
+    cmake -DUSE_CUDNN=1 -DUSE_NCCL=1 .. && \
     make -j"$(nproc)"
 
 ENV PYCAFFE_ROOT $CAFFE_ROOT/python
diff --git a/docker/templates/Dockerfile.template b/docker/templates/Dockerfile.template
deleted file mode 100644
index 8834f057968..00000000000
--- a/docker/templates/Dockerfile.template
+++ /dev/null
@@ -1,42 +0,0 @@
-MAINTAINER caffe-maint@googlegroups.com
-
-RUN apt-get update && apt-get install -y --no-install-recommends \
-        build-essential \
-        cmake \
-        git \
-        wget \
-        libatlas-base-dev \
-        libboost-all-dev \
-        libgflags-dev \
-        libgoogle-glog-dev \
-        libhdf5-serial-dev \
-        libleveldb-dev \
-        liblmdb-dev \
-        libopencv-dev \
-        libprotobuf-dev \
-        libsnappy-dev \
-        protobuf-compiler \
-        python-dev \
-        python-numpy \
-        python-pip \
-        python-scipy && \
-    rm -rf /var/lib/apt/lists/*
-
-ENV CAFFE_ROOT=/opt/caffe
-WORKDIR $CAFFE_ROOT
-
-# FIXME: clone a specific git tag and use ARG instead of ENV once DockerHub supports this.
-ENV CLONE_TAG=master
-
-RUN git clone -b ${CLONE_TAG} --depth 1 https://github.com/BVLC/caffe.git . && \
-    for req in $(cat python/requirements.txt) pydot; do pip install $req; done && \
-    mkdir build && cd build && \
-    cmake ${CMAKE_ARGS} .. && \
-    make -j"$(nproc)"
-
-ENV PYCAFFE_ROOT $CAFFE_ROOT/python
-ENV PYTHONPATH $PYCAFFE_ROOT:$PYTHONPATH
-ENV PATH $CAFFE_ROOT/build/tools:$PYCAFFE_ROOT:$PATH
-RUN echo "$CAFFE_ROOT/build/lib" >> /etc/ld.so.conf.d/caffe.conf && ldconfig
-
-WORKDIR /workspace

From 135440371c7cb2932d5c1e8e671e0d2e231fd2cc Mon Sep 17 00:00:00 2001
From: Zhou Mo <cdluminate@gmail.com>
Date: Sat, 21 Jan 2017 03:06:38 +0000
Subject: [PATCH 185/264] cmake: bump soversion to rc4

---
 CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3af394f7aa2..15a7fe46fcf 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,8 +10,8 @@ endif()
 project(Caffe C CXX)
 
 # ---[ Caffe version
-set(CAFFE_TARGET_VERSION "1.0.0-rc3" CACHE STRING "Caffe logical version")
-set(CAFFE_TARGET_SOVERSION "1.0.0-rc3" CACHE STRING "Caffe soname version")
+set(CAFFE_TARGET_VERSION "1.0.0-rc4" CACHE STRING "Caffe logical version")
+set(CAFFE_TARGET_SOVERSION "1.0.0-rc4" CACHE STRING "Caffe soname version")
 add_definitions(-DCAFFE_VERSION=${CAFFE_TARGET_VERSION})
 
 # ---[ Using cmake scripts and modules

From 3a0b6c6e75ca17bae4c728c6987dc5db1e380ce6 Mon Sep 17 00:00:00 2001
From: Fyodor Tokarev <ftokarev@gmail.com>
Date: Sat, 21 Jan 2017 15:12:38 +0300
Subject: [PATCH 186/264] Update a comment in caffe.proto

---
 src/caffe/proto/caffe.proto | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 430a0dea109..815ead35362 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -128,8 +128,7 @@ message SolverParameter {
   // The states for the train/test nets. Must be unspecified or
   // specified once per net.
   //
-  // By default, all states will have solver = true;
-  // train_state will have phase = TRAIN,
+  // By default, train_state will have phase = TRAIN,
   // and all test_state's will have phase = TEST.
   // Other defaults are set according to the NetState defaults.
   optional NetState train_state = 26;

From e0cd85237c9ea756cf6bd35b8b0e3432ea3e5273 Mon Sep 17 00:00:00 2001
From: Cyprien Noel <cyprien.noel@gmail.com>
Date: Mon, 23 Jan 2017 10:31:26 -0800
Subject: [PATCH 187/264] Restore can be invoked on rank > 0

---
 src/caffe/solver.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp
index 1c1a9e59565..fd4c03724ef 100644
--- a/src/caffe/solver.cpp
+++ b/src/caffe/solver.cpp
@@ -462,7 +462,6 @@ string Solver<Dtype>::SnapshotToHDF5() {
 
 template <typename Dtype>
 void Solver<Dtype>::Restore(const char* state_file) {
-  CHECK(Caffe::root_solver());
   string state_filename(state_file);
   if (state_filename.size() >= 3 &&
       state_filename.compare(state_filename.size() - 3, 3, ".h5") == 0) {

From 29f0cdb9d785459126516dc58f755af5b486cf71 Mon Sep 17 00:00:00 2001
From: Ken Schutte <kenschutte@gmail.com>
Date: Tue, 24 Jan 2017 10:45:52 -0600
Subject: [PATCH 188/264] parse_log.py was not using --verbose argument

---
 tools/extra/parse_log.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/extra/parse_log.py b/tools/extra/parse_log.py
index b47ffd0d842..4248e2b87a3 100755
--- a/tools/extra/parse_log.py
+++ b/tools/extra/parse_log.py
@@ -203,7 +203,7 @@ def main():
     args = parse_args()
     train_dict_list, test_dict_list = parse_log(args.logfile_path)
     save_csv_files(args.logfile_path, args.output_dir, train_dict_list,
-                   test_dict_list, delimiter=args.delimiter)
+                   test_dict_list, delimiter=args.delimiter, verbose=args.verbose)
 
 
 if __name__ == '__main__':

From 6bf10afd20f91366909318fe4e85a098bb742f58 Mon Sep 17 00:00:00 2001
From: "Jonathan R. Williford" <jonathan@neural.vision>
Date: Fri, 20 Jan 2017 11:53:12 +0000
Subject: [PATCH 189/264] Fix broken links in layer documentation, minor fixes.

---
 docs/tutorial/layers/accuracy.md                | 3 +--
 docs/tutorial/layers/argmax.md                  | 3 +--
 docs/tutorial/layers/infogainloss.md            | 5 ++---
 docs/tutorial/layers/lrn.md                     | 4 ++--
 docs/tutorial/layers/memorydata.md              | 2 +-
 docs/tutorial/layers/multinomiallogisticloss.md | 2 +-
 docs/tutorial/layers/silence.md                 | 8 +-------
 7 files changed, 9 insertions(+), 18 deletions(-)

diff --git a/docs/tutorial/layers/accuracy.md b/docs/tutorial/layers/accuracy.md
index ecf84090e61..80293b1c6bf 100644
--- a/docs/tutorial/layers/accuracy.md
+++ b/docs/tutorial/layers/accuracy.md
@@ -10,7 +10,6 @@ title: Accuracy and Top-k
 * [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1AccuracyLayer.html)
 * Header: [`./include/caffe/layers/accuracy_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/accuracy_layer.hpp)
 * CPU implementation: [`./src/caffe/layers/accuracy_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/accuracy_layer.cpp)
-* CUDA GPU implementation: [`./src/caffe/layers/accuracy_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/accuracy_layer.cu)
 
 ## Parameters
 * Parameters (`AccuracyParameter accuracy_param`)
@@ -18,4 +17,4 @@ title: Accuracy and Top-k
 
 {% highlight Protobuf %}
 {% include proto/AccuracyParameter.txt %}
-{% endhighlight %}
\ No newline at end of file
+{% endhighlight %}
diff --git a/docs/tutorial/layers/argmax.md b/docs/tutorial/layers/argmax.md
index f5f173ac731..9eb8b7739f5 100644
--- a/docs/tutorial/layers/argmax.md
+++ b/docs/tutorial/layers/argmax.md
@@ -8,7 +8,6 @@ title: ArgMax Layer
 * [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1ArgMaxLayer.html)
 * Header: [`./include/caffe/layers/argmax_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/argmax_layer.hpp)
 * CPU implementation: [`./src/caffe/layers/argmax_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/argmax_layer.cpp)
-* CUDA GPU implementation: [`./src/caffe/layers/argmax_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/argmax_layer.cu)
 
 ## Parameters
 * Parameters (`ArgMaxParameter argmax_param`)
@@ -16,4 +15,4 @@ title: ArgMax Layer
 
 {% highlight Protobuf %}
 {% include proto/ArgMaxParameter.txt %}
-{% endhighlight %}
\ No newline at end of file
+{% endhighlight %}
diff --git a/docs/tutorial/layers/infogainloss.md b/docs/tutorial/layers/infogainloss.md
index 86140b6cca7..b3b690d2621 100644
--- a/docs/tutorial/layers/infogainloss.md
+++ b/docs/tutorial/layers/infogainloss.md
@@ -8,11 +8,10 @@ title: Infogain Loss Layer
 * [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1InfogainLossLayer.html)
 * Header: [`./include/caffe/layers/infogain_loss_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/infogain_loss_layer.hpp)
 * CPU implementation: [`./src/caffe/layers/infogain_loss_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/infogain_loss_layer.cpp)
-* CUDA GPU implementation: [`./src/caffe/layers/infogain_loss_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/infogain_loss_layer.cu)
 
-A generalization of [MultinomialLogisticLossLayer](layers/multinomiallogisticloss.md) that takes an "information gain" (infogain) matrix specifying the "value" of all label pairs.
+A generalization of [MultinomialLogisticLossLayer](multinomiallogisticloss.html) that takes an "information gain" (infogain) matrix specifying the "value" of all label pairs.
 
-Equivalent to the [MultinomialLogisticLossLayer](layers/multinomiallogisticloss.md) if the infogain matrix is the identity.
+Equivalent to the [MultinomialLogisticLossLayer](multinomiallogisticloss.html) if the infogain matrix is the identity.
 
 ## Parameters
 
diff --git a/docs/tutorial/layers/lrn.md b/docs/tutorial/layers/lrn.md
index 387311c2251..2fbef734663 100644
--- a/docs/tutorial/layers/lrn.md
+++ b/docs/tutorial/layers/lrn.md
@@ -20,9 +20,9 @@ The local response normalization layer performs a kind of "lateral inhibition" b
 
 ## Parameters
 
-* Parameters (`Parameter lrn_param`)
+* Parameters (`LRNParameter lrn_param`)
 * From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
 
 {% highlight Protobuf %}
-{% include proto/BatchNormParameter.txt %}
+{% include proto/LRNParameter.txt %}
 {% endhighlight %}
diff --git a/docs/tutorial/layers/memorydata.md b/docs/tutorial/layers/memorydata.md
index 754e62aef62..afce4a24a28 100644
--- a/docs/tutorial/layers/memorydata.md
+++ b/docs/tutorial/layers/memorydata.md
@@ -7,7 +7,7 @@ title: Memory Data Layer
 * Layer type: `MemoryData`
 * [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1MemoryDataLayer.html)
 * Header: [`./include/caffe/layers/memory_data_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/memory_data_layer.hpp)
-* CPU implementation: [`./src/caffe/layers/memory_data_layer.cpu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/memory_data_layer.cpu)
+* CPU implementation: [`./src/caffe/layers/memory_data_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/memory_data_layer.cpp)
 
 The memory data layer reads data directly from memory, without copying it. In order to use it, one must call `MemoryDataLayer::Reset` (from C++) or `Net.set_input_arrays` (from Python) in order to specify a source of contiguous data (as 4D row major array), which is read one batch-sized chunk at a time.
 
diff --git a/docs/tutorial/layers/multinomiallogisticloss.md b/docs/tutorial/layers/multinomiallogisticloss.md
index a28ab914854..5eab74a8a69 100644
--- a/docs/tutorial/layers/multinomiallogisticloss.md
+++ b/docs/tutorial/layers/multinomiallogisticloss.md
@@ -7,7 +7,7 @@ title: Multinomial Logistic Loss Layer
 * Layer type: `MultinomialLogisticLoss`
 * [Doxygen Documentation](http://caffe.berkeleyvision.org/doxygen/classcaffe_1_1MultinomialLogisticLossLayer.html)
 * Header: [`./include/caffe/layers/multinomial_logistic_loss_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/multinomial_logistic_loss_layer.hpp)
-* CPU implementation: [`./src/caffe/layers/multinomial_logistic_loss_layer.cpu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/multinomial_logistic_loss_layer.cpu)
+* CPU implementation: [`./src/caffe/layers/multinomial_logistic_loss_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/multinomial_logistic_loss_layer.cpp)
 
 ## Parameters
 
diff --git a/docs/tutorial/layers/silence.md b/docs/tutorial/layers/silence.md
index 2c37a9cd67c..8b4579a9935 100644
--- a/docs/tutorial/layers/silence.md
+++ b/docs/tutorial/layers/silence.md
@@ -14,10 +14,4 @@ Silences a blob, so that it is not printed.
 
 ## Parameters
 
-* Parameters (`SilenceParameter silence_param`)
-* From [`./src/caffe/proto/caffe.proto`](https://github.com/BVLC/caffe/blob/master/src/caffe/proto/caffe.proto):
-
-{% highlight Protobuf %}
-{% include proto/BatchNormParameter.txt %}
-{% endhighlight %}
-
+No parameters.

From 7b5731c6a68b6a9372c00eb8e13c697f832d8d1b Mon Sep 17 00:00:00 2001
From: Wenbo Yang <solrex@users.noreply.github.com>
Date: Mon, 30 Jan 2017 16:33:20 +0800
Subject: [PATCH 190/264] Remove sdk version from veclib searching path.

---
 cmake/Modules/FindvecLib.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/Modules/FindvecLib.cmake b/cmake/Modules/FindvecLib.cmake
index 46043367362..8eaab59473c 100644
--- a/cmake/Modules/FindvecLib.cmake
+++ b/cmake/Modules/FindvecLib.cmake
@@ -16,7 +16,7 @@ find_path(vecLib_INCLUDE_DIR vecLib.h
           DOC "vecLib include directory"
           PATHS /System/Library/Frameworks/Accelerate.framework/Versions/Current/${__veclib_include_suffix}
                 /System/Library/${__veclib_include_suffix}
-                /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.9.sdk/System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers/
+                /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers/
           NO_DEFAULT_PATH)
 
 include(FindPackageHandleStandardArgs)

From cd89d4b567529de086e409b66390c961624a84b3 Mon Sep 17 00:00:00 2001
From: Zhou Mo <cdluminate@gmail.com>
Date: Wed, 1 Feb 2017 11:21:00 +0000
Subject: [PATCH 191/264] docs: update install_apt_debian guide

---
 docs/install_apt_debian.md | 76 ++++++++++++++++++++------------------
 1 file changed, 41 insertions(+), 35 deletions(-)

diff --git a/docs/install_apt_debian.md b/docs/install_apt_debian.md
index 0d39e3ae22a..65fe70924e1 100644
--- a/docs/install_apt_debian.md
+++ b/docs/install_apt_debian.md
@@ -5,13 +5,13 @@ title: "Installation: Debian"
 # Debian Installation
 
 Caffe packages are available for several Debian versions, as shown in the
-following chart
+following chart:
 
 ```
 Your Distro     |  CPU_ONLY  |  CUDA  |     Alias
 ----------------+------------+--------+-------------------
 Debian/stable   |     ✘      |   ✘    | Debian Jessie
-Debian/testing  |     ✔      |   ☐    | Debian Stretch/Sid
+Debian/testing  |     ✔      |   ✔    | Debian Stretch/Sid
 Debian/unstable |     ✔      |   ✔    | Debian Sid
 ```
 
@@ -19,30 +19,32 @@ Debian/unstable |     ✔      |   ✔    | Debian Sid
 
 * `✔ ` You can install caffe with a single command line following this guide.
 
-* `☐ ` The same with `✔ `. However it will not work any more when Debian/Stretch becomes the stable branch.
-
-Last update: 2017-01-05
+Last update: 2017-02-01
 
 ## Binary installation with APT
 
 Apart from the installation methods based on source, Debian/unstable
-and Debian/testing users can install pre-compiled Caffe packages via the official archive.
+and Debian/testing users can install pre-compiled Caffe packages from
+the official archive.
+
+Make sure that your `/etc/apt/sources.list` contains `contrib` and `non-free`
+sections if you want to install the CUDA version, for instance:
 
-Make sure that there is something like the follows in your `/etc/apt/sources.list`:
 ```
-deb http://MIRROR/debian CODENAME main contrib non-free
+deb http://ftp2.cn.debian.org/debian sid main contrib non-free
 ```
-where `MIRROR` is your favorate Debian mirror, and `CODENAME ∈ {testing,stretch,sid}`.
 
 Then we update APT cache and directly install Caffe. Note, the cpu version and
-the cuda version cannot be installed at the same time.
+the cuda version cannot coexist.
+
 ```
-# apt update
-# apt install [ caffe-cpu | caffe-cuda ]
-# caffe                                              # command line interface working
-# python3 -c 'import caffe; print(caffe.__path__)'   # python3 interface working
+$ sudo apt update
+$ sudo apt install [ caffe-cpu | caffe-cuda ]
+$ caffe                                              # command line interface working
+$ python3 -c 'import caffe; print(caffe.__path__)'   # python3 interface working
 ```
-It should work out of box.
+
+These Caffe packages should work for you out of box.
 
 #### Customizing caffe packages
 
@@ -50,46 +52,49 @@ Some users may need to customize the Caffe package. The way to customize
 the package is beyond this guide. Here is only a brief guide of producing
 the customized `.deb` packages. 
 
-Make sure that there is something like this in your `/etc/apt/sources.list`:
+Make sure that there is a `dec-src` source in your `/etc/apt/sources.list`,
+for instance:
+
 ```
 deb http://ftp2.cn.debian.org/debian sid main contrib non-free
 deb-src http://ftp2.cn.debian.org/debian sid main contrib non-free
 ```
 
 Then we build caffe deb files with the following commands:
+
 ```
 $ sudo apt update
-$ sudo apt install build-essential debhelper devscripts    # standard package building tools
-$ sudo apt build-dep [ caffe-cpu | caffe-cuda ]            # the most elegant way to pull caffe build dependencies
-$ apt source [ caffe-cpu | caffe-cuda ]               # download the source tarball and extract
+$ sudo apt install build-essential debhelper devscripts  # standard package building tools
+$ sudo apt build-dep [ caffe-cpu | caffe-cuda ]          # the most elegant way to pull caffe build dependencies
+$ apt source [ caffe-cpu | caffe-cuda ]                  # download the source tarball and extract
 $ cd caffe-XXXX
-[ ... optional, customize caffe code/build ... ]
-$ dch -llocal "Modified XXX in order to XXX"          # write your one-line changelog
-$ debuild -B -j4                                      # build caffe with 4 parallel jobs (similar to make -j4)
+[ ... optional, customizing caffe code/build ... ]
+$ dch --local "Modified XXX"                             # bump package version and write changelog
+$ debuild -B -j4                                         # build caffe with 4 parallel jobs (similar to make -j4)
 [ ... building ...]
-$ debc                                                # optional, if you want to check the package contents
-$ sudo debi                                           # optional, install the generated packages
+$ debc                                                   # optional, if you want to check the package contents
+$ sudo debi                                              # optional, install the generated packages
+$ ls ../                                                 # optional, you will see the resulting packages
 ```
-The resulting deb packages can be found under the parent directory of the source tree.
 
-Note, the `dch ...` command line above is for bumping the package version number
-and adding an entry to the package changelog. If you would like to write
-more than one changelog entry, use subsequent `dch` command (see `man 1 dch`)
-instead of manually modifing `debian/changelog` unless you know how to keep its format correct.
+It is a BUG if the package failed to build without any change.
 The changelog will be installed at e.g. `/usr/share/doc/caffe-cpu/changelog.Debian.gz`.
 
 ## Source installation
 
-Source installation under Debian/unstable is similar to that of Ubuntu, but
+Source installation under Debian/unstable and Debian/testing is similar to that of Ubuntu, but
 here is a more elegant way to pull caffe build dependencies:
+
 ```
 $ sudo apt build-dep [ caffe-cpu | caffe-cuda ]
 ```
+
 Note, this requires a `deb-src` entry in your `/etc/apt/sources.list`.
 
 #### Compiler Combinations
 
-Some users may find their favorate compiler doesn't work well with CUDA.
+Some users may find their favorate compiler doesn't work with CUDA.
+
 ```
 CXX compiler |  CUDA 7.5  |  CUDA 8.0  |
 -------------+------------+------------+-
@@ -144,12 +149,13 @@ and hack the packaging scripts, then build your customized package.
 * Where are the examples, the models and other documentation stuff?
 
 ```
-sudo apt install caffe-doc
-dpkg -L caffe-doc
+$ sudo apt install caffe-doc
+$ dpkg -L caffe-doc
 ```
 
 * Where can I find the Debian package status?
 
-https://tracker.debian.org/pkg/caffe  (for the CPU_ONLY version)
-
+```
+https://tracker.debian.org/pkg/caffe          (for the CPU_ONLY version)
 https://tracker.debian.org/pkg/caffe-contrib  (for the CUDA version)
+```

From 734702b3703de0368e901644125ddca91bab4cb7 Mon Sep 17 00:00:00 2001
From: Cyprien Noel <cyprien.noel@gmail.com>
Date: Wed, 8 Feb 2017 11:42:05 -0800
Subject: [PATCH 192/264] Document switch to explicit flags for docker: cpu /
 gpu.

---
 docker/README.md | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/docker/README.md b/docker/README.md
index 11c18157996..f9c7c756fe6 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -1,9 +1,8 @@
 ### Running an official image
 
-You can run one of the automatic [builds](https://hub.docker.com/r/bvlc/caffe)
-like this:
+You can run one of the automatic [builds](https://hub.docker.com/r/bvlc/caffe). E.g. for the CPU version:
 
-`docker run -ti bvlc/caffe caffe --version`
+`docker run -ti bvlc/caffe:cpu caffe --version`
 
 or for GPU support (You need a CUDA 8.0 capable driver and
 [nvidia-docker](https://github.com/NVIDIA/nvidia-docker)):
@@ -20,13 +19,13 @@ by root. It also runs by default in a container-private folder.
 You can change this using flags, like user (-u), current directory, and volumes (-w and -v).
 E.g. this behaves like the usual caffe executable:
 
-`docker run --rm -u $(id -u):$(id -g) -v $(pwd):$(pwd) -w $(pwd) bvlc/caffe caffe train --solver=example_solver.prototxt`
+`docker run --rm -u $(id -u):$(id -g) -v $(pwd):$(pwd) -w $(pwd) bvlc/caffe:cpu caffe train --solver=example_solver.prototxt`
 
 Containers can also be used interactively, specifying e.g. `bash` or `ipython`
 instead of `caffe`.
 
 ```
-docker run -ti bvlc/caffe ipython
+docker run -ti bvlc/caffe:cpu ipython
 import caffe
 ...
 ```
@@ -39,10 +38,10 @@ utilities can be used directly, e.g. `draw_net.py`, `classify.py`, or `detect.py
 
 Examples:
 
-`docker build -t caffe cpu`
+`docker build -t caffe:cpu cpu`
 
 `docker build -t caffe:gpu gpu`
 
 You can also build Caffe and run the tests in the image:
 
-`docker run -ti caffe bash -c "cd /opt/caffe/build; make runtest"`
+`docker run -ti caffe:cpu bash -c "cd /opt/caffe/build; make runtest"`

From 9c201e177994e31df430cf01baa3105aa5c00699 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Wed, 8 Feb 2017 17:13:53 -0800
Subject: [PATCH 193/264] make: bump version to rc4

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 65d08f7d31e..1b73ae0fe2f 100644
--- a/Makefile
+++ b/Makefile
@@ -34,7 +34,7 @@ LIB_BUILD_DIR := $(BUILD_DIR)/lib
 STATIC_NAME := $(LIB_BUILD_DIR)/lib$(LIBRARY_NAME).a
 DYNAMIC_VERSION_MAJOR 		:= 1
 DYNAMIC_VERSION_MINOR 		:= 0
-DYNAMIC_VERSION_REVISION 	:= 0-rc3
+DYNAMIC_VERSION_REVISION 	:= 0-rc4
 DYNAMIC_NAME_SHORT := lib$(LIBRARY_NAME).so
 #DYNAMIC_SONAME_SHORT := $(DYNAMIC_NAME_SHORT).$(DYNAMIC_VERSION_MAJOR)
 DYNAMIC_VERSIONED_NAME_SHORT := $(DYNAMIC_NAME_SHORT).$(DYNAMIC_VERSION_MAJOR).$(DYNAMIC_VERSION_MINOR).$(DYNAMIC_VERSION_REVISION)

From 15dfcc1433441f01b0602474eb068e20e7451dd4 Mon Sep 17 00:00:00 2001
From: Katherine Crowson <crowsonkb@gmail.com>
Date: Thu, 9 Feb 2017 11:40:52 -0800
Subject: [PATCH 194/264] Add Pascal CUDA architectures to
 Makefile.config.example

---
 Makefile.config.example | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/Makefile.config.example b/Makefile.config.example
index b590bd16ce9..d552b38a97c 100644
--- a/Makefile.config.example
+++ b/Makefile.config.example
@@ -31,13 +31,17 @@ CUDA_DIR := /usr/local/cuda
 # CUDA_DIR := /usr
 
 # CUDA architecture setting: going with all of them.
-# For CUDA < 6.0, comment the *_50 lines for compatibility.
+# For CUDA < 6.0, comment the *_50 through *_61 lines for compatibility.
+# For CUDA < 8.0, comment the *_60 and *_61 lines for compatibility.
 CUDA_ARCH := -gencode arch=compute_20,code=sm_20 \
 		-gencode arch=compute_20,code=sm_21 \
 		-gencode arch=compute_30,code=sm_30 \
 		-gencode arch=compute_35,code=sm_35 \
 		-gencode arch=compute_50,code=sm_50 \
-		-gencode arch=compute_50,code=compute_50
+		-gencode arch=compute_52,code=sm_52 \
+		-gencode arch=compute_60,code=sm_60 \
+		-gencode arch=compute_61,code=sm_61 \
+		-gencode arch=compute_61,code=compute_61
 
 # BLAS choice:
 # atlas for ATLAS (default)

From 23fca12e579731cf21c783b4a82de3d0a8b6e2cf Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Thu, 16 Feb 2017 16:40:18 -0800
Subject: [PATCH 195/264] version bump: rc5

---
 CMakeLists.txt | 4 ++--
 Makefile       | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 15a7fe46fcf..32b5bcb47fe 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,8 +10,8 @@ endif()
 project(Caffe C CXX)
 
 # ---[ Caffe version
-set(CAFFE_TARGET_VERSION "1.0.0-rc4" CACHE STRING "Caffe logical version")
-set(CAFFE_TARGET_SOVERSION "1.0.0-rc4" CACHE STRING "Caffe soname version")
+set(CAFFE_TARGET_VERSION "1.0.0-rc5" CACHE STRING "Caffe logical version")
+set(CAFFE_TARGET_SOVERSION "1.0.0-rc5" CACHE STRING "Caffe soname version")
 add_definitions(-DCAFFE_VERSION=${CAFFE_TARGET_VERSION})
 
 # ---[ Using cmake scripts and modules
diff --git a/Makefile b/Makefile
index 1b73ae0fe2f..77900b69b97 100644
--- a/Makefile
+++ b/Makefile
@@ -34,7 +34,7 @@ LIB_BUILD_DIR := $(BUILD_DIR)/lib
 STATIC_NAME := $(LIB_BUILD_DIR)/lib$(LIBRARY_NAME).a
 DYNAMIC_VERSION_MAJOR 		:= 1
 DYNAMIC_VERSION_MINOR 		:= 0
-DYNAMIC_VERSION_REVISION 	:= 0-rc4
+DYNAMIC_VERSION_REVISION 	:= 0-rc5
 DYNAMIC_NAME_SHORT := lib$(LIBRARY_NAME).so
 #DYNAMIC_SONAME_SHORT := $(DYNAMIC_NAME_SHORT).$(DYNAMIC_VERSION_MAJOR)
 DYNAMIC_VERSIONED_NAME_SHORT := $(DYNAMIC_NAME_SHORT).$(DYNAMIC_VERSION_MAJOR).$(DYNAMIC_VERSION_MINOR).$(DYNAMIC_VERSION_REVISION)

From 85ab6100a122042c7dfd4adaf06f4c0b2e71148d Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Mon, 27 Feb 2017 11:54:37 -0800
Subject: [PATCH 196/264] fix broken link to hinge loss

---
 docs/tutorial/layers.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/tutorial/layers.md b/docs/tutorial/layers.md
index a903d5ac985..2faacc5836d 100644
--- a/docs/tutorial/layers.md
+++ b/docs/tutorial/layers.md
@@ -128,7 +128,7 @@ Layers:
 * [Infogain Loss](layers/infogainloss.html) - a generalization of MultinomialLogisticLossLayer.
 * [Softmax with Loss](layers/softmaxwithloss.html) - computes the multinomial logistic loss of the softmax of its inputs. It's conceptually identical to a softmax layer followed by a multinomial logistic loss layer, but provides a more numerically stable gradient.
 * [Sum-of-Squares / Euclidean](layers/euclideanloss.html) - computes the sum of squares of differences of its two inputs, $$\frac 1 {2N} \sum_{i=1}^N \| x^1_i - x^2_i \|_2^2$$.
-* [Hinge / Margin](layers/hiddenloss.html) - The hinge loss layer computes a one-vs-all hinge (L1) or squared hinge loss (L2).
+* [Hinge / Margin](layers/hingeloss.html) - The hinge loss layer computes a one-vs-all hinge (L1) or squared hinge loss (L2).
 * [Sigmoid Cross-Entropy Loss](layers/sigmoidcrossentropyloss.html) - computes the cross-entropy (logistic) loss, often used for predicting targets interpreted as probabilities.
 * [Accuracy / Top-k layer](layers/accuracy.html) - scores the output as an accuracy with respect to target -- it is not actually a loss and has no backward step.
 * [Contrastive Loss](layers/contrastiveloss.html)

From fe9e58d6360d99cde0a883a06590631bb11911e0 Mon Sep 17 00:00:00 2001
From: zhuyuanhao <nju.zhuyuanhao@gmail.com>
Date: Wed, 1 Mar 2017 20:42:30 +0800
Subject: [PATCH 197/264] Remove not used variable in base_conv_layer.cpp

---
 src/caffe/layers/base_conv_layer.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/caffe/layers/base_conv_layer.cpp b/src/caffe/layers/base_conv_layer.cpp
index 4a4c68e009a..35c90145e31 100644
--- a/src/caffe/layers/base_conv_layer.cpp
+++ b/src/caffe/layers/base_conv_layer.cpp
@@ -19,7 +19,6 @@ void BaseConvolutionLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   const int num_axes = bottom[0]->num_axes();
   num_spatial_axes_ = num_axes - first_spatial_axis;
   CHECK_GE(num_spatial_axes_, 0);
-  vector<int> bottom_dim_blob_shape(1, num_spatial_axes_ + 1);
   vector<int> spatial_dim_blob_shape(1, std::max(num_spatial_axes_, 1));
   // Setup filter kernel dimensions (kernel_shape_).
   kernel_shape_.Reshape(spatial_dim_blob_shape);

From 4529f12bdcd27d74655473b6665f5a23cd1214b1 Mon Sep 17 00:00:00 2001
From: gineshidalgo99 <gineshidalgo99@gmail.com>
Date: Thu, 9 Mar 2017 19:24:06 -0500
Subject: [PATCH 198/264] =?UTF-8?q?Removed=20some=20'warning:=20extra=20?=
 =?UTF-8?q?=E2=80=98;=E2=80=99=20[-Wpedantic]'?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 include/caffe/util/math_functions.hpp |  6 +++---
 include/caffe/util/mkl_alternate.hpp  | 18 +++++++++---------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp
index 51068fe2b80..37abce5eccc 100644
--- a/include/caffe/util/math_functions.hpp
+++ b/include/caffe/util/math_functions.hpp
@@ -128,16 +128,16 @@ inline int8_t caffe_sign(Dtype val) {
   }
 
 // output is 1 for the positives, 0 for zero, and -1 for the negatives
-DEFINE_CAFFE_CPU_UNARY_FUNC(sign, y[i] = caffe_sign<Dtype>(x[i]));
+DEFINE_CAFFE_CPU_UNARY_FUNC(sign, y[i] = caffe_sign<Dtype>(x[i]))
 
 // This returns a nonzero value if the input has its sign bit set.
 // The name sngbit is meant to avoid conflicts with std::signbit in the macro.
 // The extra parens are needed because CUDA < 6.5 defines signbit as a macro,
 // and we don't want that to expand here when CUDA headers are also included.
 DEFINE_CAFFE_CPU_UNARY_FUNC(sgnbit, \
-    y[i] = static_cast<bool>((std::signbit)(x[i])));
+    y[i] = static_cast<bool>((std::signbit)(x[i])))
 
-DEFINE_CAFFE_CPU_UNARY_FUNC(fabs, y[i] = std::fabs(x[i]));
+DEFINE_CAFFE_CPU_UNARY_FUNC(fabs, y[i] = std::fabs(x[i]))
 
 template <typename Dtype>
 void caffe_cpu_scale(const int n, const Dtype alpha, const Dtype *x, Dtype* y);
diff --git a/include/caffe/util/mkl_alternate.hpp b/include/caffe/util/mkl_alternate.hpp
index 95df0f93b5e..79b2c32de94 100644
--- a/include/caffe/util/mkl_alternate.hpp
+++ b/include/caffe/util/mkl_alternate.hpp
@@ -36,10 +36,10 @@ extern "C" {
     v##name<double>(n, a, y); \
   }
 
-DEFINE_VSL_UNARY_FUNC(Sqr, y[i] = a[i] * a[i]);
-DEFINE_VSL_UNARY_FUNC(Exp, y[i] = exp(a[i]));
-DEFINE_VSL_UNARY_FUNC(Ln, y[i] = log(a[i]));
-DEFINE_VSL_UNARY_FUNC(Abs, y[i] = fabs(a[i]));
+DEFINE_VSL_UNARY_FUNC(Sqr, y[i] = a[i] * a[i])
+DEFINE_VSL_UNARY_FUNC(Exp, y[i] = exp(a[i]))
+DEFINE_VSL_UNARY_FUNC(Ln, y[i] = log(a[i]))
+DEFINE_VSL_UNARY_FUNC(Abs, y[i] = fabs(a[i]))
 
 // A simple way to define the vsl unary functions with singular parameter b.
 // The operation should be in the form e.g. y[i] = pow(a[i], b)
@@ -58,7 +58,7 @@ DEFINE_VSL_UNARY_FUNC(Abs, y[i] = fabs(a[i]));
     v##name<double>(n, a, b, y); \
   }
 
-DEFINE_VSL_UNARY_FUNC_WITH_PARAM(Powx, y[i] = pow(a[i], b));
+DEFINE_VSL_UNARY_FUNC_WITH_PARAM(Powx, y[i] = pow(a[i], b))
 
 // A simple way to define the vsl binary functions. The operation should
 // be in the form e.g. y[i] = a[i] + b[i]
@@ -77,10 +77,10 @@ DEFINE_VSL_UNARY_FUNC_WITH_PARAM(Powx, y[i] = pow(a[i], b));
     v##name<double>(n, a, b, y); \
   }
 
-DEFINE_VSL_BINARY_FUNC(Add, y[i] = a[i] + b[i]);
-DEFINE_VSL_BINARY_FUNC(Sub, y[i] = a[i] - b[i]);
-DEFINE_VSL_BINARY_FUNC(Mul, y[i] = a[i] * b[i]);
-DEFINE_VSL_BINARY_FUNC(Div, y[i] = a[i] / b[i]);
+DEFINE_VSL_BINARY_FUNC(Add, y[i] = a[i] + b[i])
+DEFINE_VSL_BINARY_FUNC(Sub, y[i] = a[i] - b[i])
+DEFINE_VSL_BINARY_FUNC(Mul, y[i] = a[i] * b[i])
+DEFINE_VSL_BINARY_FUNC(Div, y[i] = a[i] / b[i])
 
 // In addition, MKL comes with an additional function axpby that is not present
 // in standard blas. We will simply use a two-step (inefficient, of course) way

From 1d3e6e4522a95faf954e775b23a2f907e66caf31 Mon Sep 17 00:00:00 2001
From: folz <joachim.folz@dfki.de>
Date: Mon, 13 Mar 2017 11:04:30 +0100
Subject: [PATCH 199/264] Solver_add_nccl accepts any kind of Solver

---
 python/caffe/_caffe.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
index 3589e476f5c..be011699098 100644
--- a/python/caffe/_caffe.cpp
+++ b/python/caffe/_caffe.cpp
@@ -288,7 +288,7 @@ void Solver_add_callback(Solver<Dtype> * solver, bp::object on_start,
 }
 
 // Seems boost cannot call the base method directly
-void Solver_add_nccl(SGDSolver<Dtype>* solver
+void Solver_add_nccl(Solver<Dtype>* solver
 #ifdef USE_NCCL
   , NCCL<Dtype>* nccl
 #endif

From 93993a3c2b25ad683dbf13ef3085b0ea5912911f Mon Sep 17 00:00:00 2001
From: Cyprien Noel <cyprien.noel@gmail.com>
Date: Tue, 14 Mar 2017 15:41:40 -0700
Subject: [PATCH 200/264] Init test net on all GPUs, allows parallel inference

---
 src/caffe/solver.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/caffe/solver.cpp b/src/caffe/solver.cpp
index fd4c03724ef..044269371ad 100644
--- a/src/caffe/solver.cpp
+++ b/src/caffe/solver.cpp
@@ -51,8 +51,8 @@ void Solver<Dtype>::Init(const SolverParameter& param) {
   }
   // Scaffolding code
   InitTrainNet();
+  InitTestNets();
   if (Caffe::root_solver()) {
-    InitTestNets();
     LOG(INFO) << "Solver scaffolding done.";
   }
   iter_ = 0;
@@ -102,7 +102,6 @@ void Solver<Dtype>::InitTrainNet() {
 
 template <typename Dtype>
 void Solver<Dtype>::InitTestNets() {
-  CHECK(Caffe::root_solver());
   const bool has_net_param = param_.has_net_param();
   const bool has_net_file = param_.has_net();
   const int num_generic_nets = has_net_param + has_net_file;

From 802d90fe81f04e5e9c28c088da0f1b22e1b9fed2 Mon Sep 17 00:00:00 2001
From: Guillaume Dumont <dumont.guillaume@gmail.com>
Date: Thu, 16 Mar 2017 23:08:20 -0400
Subject: [PATCH 201/264] Added python 3 compatibility to cpp_lint.py

---
 scripts/cpp_lint.py | 39 ++++++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/scripts/cpp_lint.py b/scripts/cpp_lint.py
index 6ec4fb76e2c..b2016d4b6dd 100755
--- a/scripts/cpp_lint.py
+++ b/scripts/cpp_lint.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python2
+#!/usr/bin/env python
 #
 # Copyright (c) 2009 Google Inc. All rights reserved.
 #
@@ -52,6 +52,10 @@
 import sys
 import unicodedata
 
+import six
+
+from six import iteritems, itervalues
+from six.moves import xrange
 
 _USAGE = """
 Syntax: cpp_lint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...]
@@ -756,7 +760,7 @@ def IncrementErrorCount(self, category):
 
   def PrintErrorCounts(self):
     """Print a summary of errors by category, and the total."""
-    for category, count in self.errors_by_category.iteritems():
+    for category, count in iteritems(self.errors_by_category):
       sys.stderr.write('Category \'%s\' errors found: %d\n' %
                        (category, count))
     sys.stderr.write('Total errors found: %d\n' % self.error_count)
@@ -3444,16 +3448,16 @@ def GetLineWidth(line):
     The width of the line in column positions, accounting for Unicode
     combining characters and wide characters.
   """
-  if isinstance(line, unicode):
-    width = 0
-    for uc in unicodedata.normalize('NFC', line):
-      if unicodedata.east_asian_width(uc) in ('W', 'F'):
-        width += 2
-      elif not unicodedata.combining(uc):
-        width += 1
-    return width
-  else:
-    return len(line)
+  if six.PY2:
+    if isinstance(line, unicode):
+      width = 0
+      for uc in unicodedata.normalize('NFC', line):
+        if unicodedata.east_asian_width(uc) in ('W', 'F'):
+          width += 2
+        elif not unicodedata.combining(uc):
+          width += 1
+      return width
+  return len(line)
 
 
 def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state,
@@ -3774,7 +3778,7 @@ def _GetTextInside(text, start_pattern):
 
   # Give opening punctuations to get the matching close-punctuations.
   matching_punctuation = {'(': ')', '{': '}', '[': ']'}
-  closing_punctuation = set(matching_punctuation.itervalues())
+  closing_punctuation = set(itervalues(matching_punctuation))
 
   # Find the position to start extracting text.
   match = re.search(start_pattern, text, re.M)
@@ -4851,10 +4855,11 @@ def main():
 
   # Change stderr to write with replacement characters so we don't die
   # if we try to print something containing non-ASCII characters.
-  sys.stderr = codecs.StreamReaderWriter(sys.stderr,
-                                         codecs.getreader('utf8'),
-                                         codecs.getwriter('utf8'),
-                                         'replace')
+  if six.PY2:
+    sys.stderr = codecs.StreamReaderWriter(sys.stderr,
+                                          codecs.getreader('utf8'),
+                                          codecs.getwriter('utf8'),
+                                          'replace')
 
   _cpplint_state.ResetErrorCounts()
   for filename in filenames:

From accd188d3241c27a6d24b95cd95a4dca4f4078bc Mon Sep 17 00:00:00 2001
From: max argus <argus.max@gmail.com>
Date: Wed, 8 Mar 2017 15:04:29 +0000
Subject: [PATCH 202/264] sane h5df file type check for weights

---
 src/caffe/net.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index 70d51806d8a..353c2f95b9e 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -769,8 +769,7 @@ void Net<Dtype>::CopyTrainedLayersFrom(const NetParameter& param) {
 
 template <typename Dtype>
 void Net<Dtype>::CopyTrainedLayersFrom(const string trained_filename) {
-  if (trained_filename.size() >= 3 &&
-      trained_filename.compare(trained_filename.size() - 3, 3, ".h5") == 0) {
+  if (H5Fis_hdf5(trained_filename.c_str())) {
     CopyTrainedLayersFromHDF5(trained_filename);
   } else {
     CopyTrainedLayersFromBinaryProto(trained_filename);

From 11930f1416efb66795e1fabc5e362a568446d37d Mon Sep 17 00:00:00 2001
From: "Jonathan R. Williford" <jonathan@neural.vision>
Date: Wed, 22 Mar 2017 22:36:14 +0100
Subject: [PATCH 203/264] Clarify batch norm parameter documentation.

---
 src/caffe/proto/caffe.proto | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index a145c541957..02e0ddf57c1 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -502,11 +502,21 @@ message ConcatParameter {
 }
 
 message BatchNormParameter {
-  // If false, accumulate global mean/variance values via a moving average. If
-  // true, use those accumulated values instead of computing mean/variance
-  // across the batch.
+  // If false, normalization is performed over the current mini-batch
+  // and global statistics are accumulated (but not yet used) by a moving
+  // average.
+  // If true, those accumulated mean and variance values are used for the
+  // normalization.
+  // By default, it is set to false when the network is in the training
+  // phase and true when the network is in the testing phase.
   optional bool use_global_stats = 1;
-  // How much does the moving average decay each iteration?
+  // What fraction of the moving average remains each iteration?
+  // Smaller values make the moving average decay faster, giving more
+  // weight to the recent values.
+  // Each iteration updates the moving average @f$S_{t-1}@f$ with the
+  // current mean @f$ Y_t @f$ by
+  // @f$ S_t = (1-\beta)Y_t + \beta \cdot S_{t-1} @f$, where @f$ \beta @f$
+  // is the moving_average_fraction parameter.
   optional float moving_average_fraction = 2 [default = .999];
   // Small value to add to the variance estimate so that we don't divide by
   // zero.

From 5c8e3545c650e9d3924f707334bde7cd67cf4e07 Mon Sep 17 00:00:00 2001
From: max argus <argus.max@gmail.com>
Date: Wed, 22 Mar 2017 23:15:34 +0000
Subject: [PATCH 204/264] [caffe][build] added Atlas lapack Library name
 atllapack

---
 cmake/Modules/FindAtlas.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cmake/Modules/FindAtlas.cmake b/cmake/Modules/FindAtlas.cmake
index 9c665a47bd5..7ffa6393bbc 100644
--- a/cmake/Modules/FindAtlas.cmake
+++ b/cmake/Modules/FindAtlas.cmake
@@ -28,7 +28,7 @@ find_path(Atlas_CLAPACK_INCLUDE_DIR NAMES clapack.h PATHS ${Atlas_INCLUDE_SEARCH
 
 find_library(Atlas_CBLAS_LIBRARY NAMES  ptcblas_r ptcblas cblas_r cblas       PATHS ${Atlas_LIB_SEARCH_PATHS})
 find_library(Atlas_BLAS_LIBRARY NAMES   atlas_r   atlas                       PATHS ${Atlas_LIB_SEARCH_PATHS})
-find_library(Atlas_LAPACK_LIBRARY NAMES lapack alapack_r alapack lapack_atlas PATHS ${Atlas_LIB_SEARCH_PATHS})
+find_library(Atlas_LAPACK_LIBRARY NAMES lapack alapack_r alapack lapack_atlas atllapack PATHS ${Atlas_LIB_SEARCH_PATHS})
 
 set(LOOKED_FOR
   Atlas_CBLAS_INCLUDE_DIR
@@ -47,6 +47,6 @@ if(ATLAS_FOUND)
   set(Atlas_LIBRARIES ${Atlas_LAPACK_LIBRARY} ${Atlas_CBLAS_LIBRARY} ${Atlas_BLAS_LIBRARY})
   mark_as_advanced(${LOOKED_FOR})
 
-  message(STATUS "Found Atlas (include: ${Atlas_CBLAS_INCLUDE_DIR}, library: ${Atlas_BLAS_LIBRARY})")
+  message(STATUS "Found Atlas (include: ${Atlas_CBLAS_INCLUDE_DIR} library: ${Atlas_BLAS_LIBRARY} lapack: ${Atlas_LAPACK_LIBRARY}")
 endif(ATLAS_FOUND)
 

From 1e02d622da5aa01fbcf1185bced8e4b0daa0a50b Mon Sep 17 00:00:00 2001
From: max argus <argus.max@gmail.com>
Date: Wed, 22 Mar 2017 23:24:13 +0000
Subject: [PATCH 205/264] [caffe][build] added ABS_TEST_DATA_DIR var.

---
 cmake/Templates/caffe_config.h.in             | 15 ++++-----------
 include/caffe/test/test_caffe_main.hpp        |  3 +--
 src/caffe/test/test_gradient_based_solver.cpp |  2 +-
 src/caffe/test/test_hdf5_output_layer.cpp     |  3 +--
 src/caffe/test/test_hdf5data_layer.cpp        |  3 +--
 5 files changed, 8 insertions(+), 18 deletions(-)

diff --git a/cmake/Templates/caffe_config.h.in b/cmake/Templates/caffe_config.h.in
index 45465b98305..2080c63df36 100644
--- a/cmake/Templates/caffe_config.h.in
+++ b/cmake/Templates/caffe_config.h.in
@@ -4,16 +4,9 @@
 /* Binaries directory */
 #define BINARY_FOLDER "${PROJECT_BINARY_DIR}"
 
+/* This is an absolute path so that we can run test from any build
+ * directory */
+#define ABS_TEST_DATA_DIR "${PROJECT_SOURCE_DIR}/src/caffe/test/test_data/"
+
 /* Test device */
 #define CUDA_TEST_DEVICE ${CUDA_TEST_DEVICE}
-
-/* Temporary (TODO: remove) */
-#if 1
-  #define CMAKE_SOURCE_DIR SOURCE_FOLDER "/src/"
-  #define EXAMPLES_SOURCE_DIR BINARY_FOLDER "/examples/"
-  #define CMAKE_EXT ".gen.cmake"
-#else
-  #define CMAKE_SOURCE_DIR "src/"
-  #define EXAMPLES_SOURCE_DIR "examples/"
-  #define CMAKE_EXT ""
-#endif
diff --git a/include/caffe/test/test_caffe_main.hpp b/include/caffe/test/test_caffe_main.hpp
index fc156091476..294f7e5011a 100644
--- a/include/caffe/test/test_caffe_main.hpp
+++ b/include/caffe/test/test_caffe_main.hpp
@@ -18,9 +18,8 @@ using std::endl;
   #include "caffe_config.h"
 #else
   #define CUDA_TEST_DEVICE -1
-  #define CMAKE_SOURCE_DIR "src/"
   #define EXAMPLES_SOURCE_DIR "examples/"
-  #define CMAKE_EXT ""
+  #define ABS_TEST_DATA_DIR "src/caffe/test/test_data"
 #endif
 
 int main(int argc, char** argv);
diff --git a/src/caffe/test/test_gradient_based_solver.cpp b/src/caffe/test/test_gradient_based_solver.cpp
index 6ad0d8f6544..465140f28a6 100644
--- a/src/caffe/test/test_gradient_based_solver.cpp
+++ b/src/caffe/test/test_gradient_based_solver.cpp
@@ -28,7 +28,7 @@ class GradientBasedSolverTest : public MultiDeviceTest<TypeParam> {
       seed_(1701), num_(4), channels_(3), height_(10), width_(10),
       share_(false) {
         input_file_ = new string(
-        CMAKE_SOURCE_DIR "caffe/test/test_data/solver_data_list.txt" CMAKE_EXT);
+        ABS_TEST_DATA_DIR "/solver_data_list.txt");
       }
   ~GradientBasedSolverTest() {
     delete input_file_;
diff --git a/src/caffe/test/test_hdf5_output_layer.cpp b/src/caffe/test/test_hdf5_output_layer.cpp
index 2bc2de1e647..f94dd57e7de 100644
--- a/src/caffe/test/test_hdf5_output_layer.cpp
+++ b/src/caffe/test/test_hdf5_output_layer.cpp
@@ -20,8 +20,7 @@ class HDF5OutputLayerTest : public MultiDeviceTest<TypeParam> {
 
  protected:
   HDF5OutputLayerTest()
-      : input_file_name_(
-        CMAKE_SOURCE_DIR "caffe/test/test_data/sample_data.h5"),
+      : input_file_name_(ABS_TEST_DATA_DIR "/sample_data.h5"),
         blob_data_(new Blob<Dtype>()),
         blob_label_(new Blob<Dtype>()),
         num_(5),
diff --git a/src/caffe/test/test_hdf5data_layer.cpp b/src/caffe/test/test_hdf5data_layer.cpp
index 487f5176caf..3977c4866c7 100644
--- a/src/caffe/test/test_hdf5data_layer.cpp
+++ b/src/caffe/test/test_hdf5data_layer.cpp
@@ -30,8 +30,7 @@ class HDF5DataLayerTest : public MultiDeviceTest<TypeParam> {
     blob_top_vec_.push_back(blob_top_label2_);
 
     // Check out generate_sample_data.py in the same directory.
-    filename = new string(
-    CMAKE_SOURCE_DIR "caffe/test/test_data/sample_data_list.txt" CMAKE_EXT);
+    filename = new string(ABS_TEST_DATA_DIR "/sample_data_list.txt");
     LOG(INFO)<< "Using sample HDF5 data file " << filename;
   }
 

From 8602a238a712d50ac5a2d7dffadee2f34d755e3f Mon Sep 17 00:00:00 2001
From: Cyprien Noel <cyprien.noel@gmail.com>
Date: Mon, 27 Mar 2017 11:33:06 -0700
Subject: [PATCH 206/264] Expose share_weights to python to allow running test
 nets

---
 python/caffe/_caffe.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
index be011699098..276f21f85a5 100644
--- a/python/caffe/_caffe.cpp
+++ b/python/caffe/_caffe.cpp
@@ -298,6 +298,10 @@ void Solver_add_nccl(Solver<Dtype>* solver
 #endif
 }
 
+void share_weights(Solver<Dtype>* solver, Net<Dtype>* net) {
+  net->ShareTrainedLayersWith(solver->net().get());
+}
+
 template<typename Dtype>
 class NetCallback: public Net<Dtype>::Callback {
  public:
@@ -459,6 +463,7 @@ BOOST_PYTHON_MODULE(_caffe) {
     .def("step", &Solver<Dtype>::Step)
     .def("restore", &Solver<Dtype>::Restore)
     .def("snapshot", &Solver<Dtype>::Snapshot)
+    .def("share_weights", &share_weights)
     .add_property("param", bp::make_function(&Solver<Dtype>::param,
               bp::return_value_policy<bp::copy_const_reference>()));
   BP_REGISTER_SHARED_PTR_TO_PYTHON(Solver<Dtype>);

From 850ffd8d1cf18cabe36eb269b63d693db2b167ef Mon Sep 17 00:00:00 2001
From: Cyprien Noel <cyprien.noel@gmail.com>
Date: Mon, 27 Mar 2017 13:15:18 -0700
Subject: [PATCH 207/264] Remove missed legacy parallel code

---
 include/caffe/layers/base_data_layer.hpp   | 2 --
 include/caffe/layers/data_layer.hpp        | 2 --
 include/caffe/layers/dummy_data_layer.hpp  | 2 --
 include/caffe/layers/hdf5_data_layer.hpp   | 2 --
 include/caffe/layers/hdf5_output_layer.hpp | 2 --
 include/caffe/layers/input_layer.hpp       | 2 --
 include/caffe/layers/python_layer.hpp      | 4 ----
 src/caffe/proto/caffe.proto                | 4 +---
 8 files changed, 1 insertion(+), 19 deletions(-)

diff --git a/include/caffe/layers/base_data_layer.hpp b/include/caffe/layers/base_data_layer.hpp
index 21d3ada50d0..c8b6998c8f2 100644
--- a/include/caffe/layers/base_data_layer.hpp
+++ b/include/caffe/layers/base_data_layer.hpp
@@ -26,8 +26,6 @@ class BaseDataLayer : public Layer<Dtype> {
   // This method may not be overridden except by the BasePrefetchingDataLayer.
   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top);
-  // Data layers should be shared by multiple solvers in parallel
-  virtual inline bool ShareInParallel() const { return true; }
   virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {}
   // Data layers have no bottoms, so reshaping is trivial.
diff --git a/include/caffe/layers/data_layer.hpp b/include/caffe/layers/data_layer.hpp
index dec58180976..667a4ae43a5 100644
--- a/include/caffe/layers/data_layer.hpp
+++ b/include/caffe/layers/data_layer.hpp
@@ -20,8 +20,6 @@ class DataLayer : public BasePrefetchingDataLayer<Dtype> {
   virtual ~DataLayer();
   virtual void DataLayerSetUp(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top);
-  // DataLayer uses DataReader instead for sharing for parallelism
-  virtual inline bool ShareInParallel() const { return false; }
   virtual inline const char* type() const { return "Data"; }
   virtual inline int ExactNumBottomBlobs() const { return 0; }
   virtual inline int MinTopBlobs() const { return 1; }
diff --git a/include/caffe/layers/dummy_data_layer.hpp b/include/caffe/layers/dummy_data_layer.hpp
index 4180f1d01e4..13a63d47ec4 100644
--- a/include/caffe/layers/dummy_data_layer.hpp
+++ b/include/caffe/layers/dummy_data_layer.hpp
@@ -22,8 +22,6 @@ class DummyDataLayer : public Layer<Dtype> {
       : Layer<Dtype>(param) {}
   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top);
-  // Data layers should be shared by multiple solvers in parallel
-  virtual inline bool ShareInParallel() const { return true; }
   // Data layers have no bottoms, so reshaping is trivial.
   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {}
diff --git a/include/caffe/layers/hdf5_data_layer.hpp b/include/caffe/layers/hdf5_data_layer.hpp
index 650a3fb0c87..601b36c6b89 100644
--- a/include/caffe/layers/hdf5_data_layer.hpp
+++ b/include/caffe/layers/hdf5_data_layer.hpp
@@ -27,8 +27,6 @@ class HDF5DataLayer : public Layer<Dtype> {
   virtual ~HDF5DataLayer();
   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top);
-  // Data layers should be shared by multiple solvers in parallel
-  virtual inline bool ShareInParallel() const { return true; }
   // Data layers have no bottoms, so reshaping is trivial.
   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {}
diff --git a/include/caffe/layers/hdf5_output_layer.hpp b/include/caffe/layers/hdf5_output_layer.hpp
index 487d08fc06c..061e279d7a0 100644
--- a/include/caffe/layers/hdf5_output_layer.hpp
+++ b/include/caffe/layers/hdf5_output_layer.hpp
@@ -28,8 +28,6 @@ class HDF5OutputLayer : public Layer<Dtype> {
   virtual ~HDF5OutputLayer();
   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top);
-  // Data layers should be shared by multiple solvers in parallel
-  virtual inline bool ShareInParallel() const { return true; }
   // Data layers have no bottoms, so reshaping is trivial.
   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {}
diff --git a/include/caffe/layers/input_layer.hpp b/include/caffe/layers/input_layer.hpp
index f4472678c69..0ffdc724894 100644
--- a/include/caffe/layers/input_layer.hpp
+++ b/include/caffe/layers/input_layer.hpp
@@ -22,8 +22,6 @@ class InputLayer : public Layer<Dtype> {
       : Layer<Dtype>(param) {}
   virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top);
-  // Data layers should be shared by multiple solvers in parallel
-  virtual inline bool ShareInParallel() const { return true; }
   // Data layers have no bottoms, so reshaping is trivial.
   virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {}
diff --git a/include/caffe/layers/python_layer.hpp b/include/caffe/layers/python_layer.hpp
index 10c4bfd0250..1407d9217aa 100644
--- a/include/caffe/layers/python_layer.hpp
+++ b/include/caffe/layers/python_layer.hpp
@@ -34,10 +34,6 @@ class PythonLayer : public Layer<Dtype> {
     self_.attr("reshape")(bottom, top);
   }
 
-  virtual inline bool ShareInParallel() const {
-    return this->layer_param_.python_param().share_in_parallel();
-  }
-
   virtual inline const char* type() const { return "Python"; }
 
  protected:
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index 02e0ddf57c1..8e528e8e083 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -937,9 +937,7 @@ message PythonParameter {
   // string, dictionary in Python dict format, JSON, etc. You may parse this
   // string in `setup` method and use it in `forward` and `backward`.
   optional string param_str = 3 [default = ''];
-  // Whether this PythonLayer is shared among worker solvers during data parallelism.
-  // If true, each worker solver sequentially run forward from this layer.
-  // This value should be set true if you are using it as a data layer.
+  // DEPRECATED
   optional bool share_in_parallel = 4 [default = false];
 }
 

From 9bd80b2f12649c6336b64c8ebcc2d1210755d1c7 Mon Sep 17 00:00:00 2001
From: Yuduo Wu <yuduowu@users.noreply.github.com>
Date: Wed, 29 Mar 2017 14:42:36 -0700
Subject: [PATCH 208/264] Fix typo in test_caffe_main.cpp: defice -> device

---
 src/caffe/test/test_caffe_main.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/caffe/test/test_caffe_main.cpp b/src/caffe/test/test_caffe_main.cpp
index 6473b74d0a6..8f333bd7105 100644
--- a/src/caffe/test/test_caffe_main.cpp
+++ b/src/caffe/test/test_caffe_main.cpp
@@ -15,7 +15,7 @@ int main(int argc, char** argv) {
   ::testing::InitGoogleTest(&argc, argv);
   caffe::GlobalInit(&argc, &argv);
 #ifndef CPU_ONLY
-  // Before starting testing, let's first print out a few cuda defice info.
+  // Before starting testing, let's first print out a few cuda device info.
   int device;
   cudaGetDeviceCount(&device);
   cout << "Cuda number of devices: " << device << endl;

From a32114e6b2e098e2fdef47e397542b105eb58b66 Mon Sep 17 00:00:00 2001
From: Will Crichton <wcrichto@stanford.edu>
Date: Fri, 31 Mar 2017 11:22:22 -0400
Subject: [PATCH 209/264] Fixed memory leaks in cudnn conv and relu

---
 src/caffe/layers/cudnn_conv_layer.cpp | 1 +
 src/caffe/layers/cudnn_relu_layer.cpp | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/caffe/layers/cudnn_conv_layer.cpp b/src/caffe/layers/cudnn_conv_layer.cpp
index 1987fb096b0..efc9e04e8c0 100644
--- a/src/caffe/layers/cudnn_conv_layer.cpp
+++ b/src/caffe/layers/cudnn_conv_layer.cpp
@@ -252,6 +252,7 @@ CuDNNConvolutionLayer<Dtype>::~CuDNNConvolutionLayer() {
   }
 
   cudaFree(workspaceData);
+  delete [] workspace;
   delete [] stream_;
   delete [] handle_;
   delete [] fwd_algo_;
diff --git a/src/caffe/layers/cudnn_relu_layer.cpp b/src/caffe/layers/cudnn_relu_layer.cpp
index 795e0a9efb0..687c905763e 100644
--- a/src/caffe/layers/cudnn_relu_layer.cpp
+++ b/src/caffe/layers/cudnn_relu_layer.cpp
@@ -36,6 +36,7 @@ CuDNNReLULayer<Dtype>::~CuDNNReLULayer() {
 
   cudnnDestroyTensorDescriptor(this->bottom_desc_);
   cudnnDestroyTensorDescriptor(this->top_desc_);
+  cudnnDestroyActivationDescriptor(this->activ_desc_);
   cudnnDestroy(this->handle_);
 }
 

From a2601eddf65bab54429244e350899b6d994f4f37 Mon Sep 17 00:00:00 2001
From: Luke Yeager <lukeyeager@users.noreply.github.com>
Date: Fri, 31 Mar 2017 11:01:13 -0700
Subject: [PATCH 210/264] Revert "Fix Python net drawing script"

This reverts commit db6cf0a728cad63c93b345f2203f3ad1f5d5c2f4.
---
 python/caffe/draw.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/caffe/draw.py b/python/caffe/draw.py
index e4fd7aacce7..9eecf6d7b46 100644
--- a/python/caffe/draw.py
+++ b/python/caffe/draw.py
@@ -104,11 +104,11 @@ def get_layer_label(layer, rankdir):
                       pooling_types_dict[layer.pooling_param.pool],
                       layer.type,
                       separator,
-                      layer.pooling_param.kernel_size[0] if len(layer.pooling_param.kernel_size._values) else 1,
+                      layer.pooling_param.kernel_size,
                       separator,
-                      layer.pooling_param.stride[0] if len(layer.pooling_param.stride._values) else 1,
+                      layer.pooling_param.stride,
                       separator,
-                      layer.pooling_param.pad[0] if len(layer.pooling_param.pad._values) else 0)
+                      layer.pooling_param.pad)
     else:
         node_label = '"%s%s(%s)"' % (layer.name, separator, layer.type)
     return node_label

From 0096fe3d270a4833479076e18492de8b28564c80 Mon Sep 17 00:00:00 2001
From: Felix Abecassis <fabecassis@nvidia.com>
Date: Fri, 31 Mar 2017 11:18:39 -0700
Subject: [PATCH 211/264] Add support for cuDNN v6

Support for cuDNN v4 and v5 is preserved.
---
 docs/installation.md           |  4 ++--
 include/caffe/util/cudnn.hpp   | 10 ++++++++++
 scripts/travis/install-deps.sh |  2 +-
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/docs/installation.md b/docs/installation.md
index 2e558027678..42f1d0ce09b 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -42,14 +42,14 @@ Optional dependencies:
 
 * [OpenCV](http://opencv.org/) >= 2.4 including 3.0
 * IO libraries: `lmdb`, `leveldb` (note: leveldb requires `snappy`)
-* cuDNN for GPU acceleration (v5)
+* cuDNN for GPU acceleration (v6)
 
 Pycaffe and Matcaffe interfaces have their own natural needs.
 
 * For Python Caffe:  `Python 2.7` or `Python 3.3+`, `numpy (>= 1.7)`, boost-provided `boost.python`
 * For MATLAB Caffe: MATLAB with the `mex` compiler.
 
-**cuDNN Caffe**: for fastest operation Caffe is accelerated by drop-in integration of [NVIDIA cuDNN](https://developer.nvidia.com/cudnn). To speed up your Caffe models, install cuDNN then uncomment the `USE_CUDNN := 1` flag in `Makefile.config` when installing Caffe. Acceleration is automatic. The current version is cuDNN v5; older versions are supported in older Caffe.
+**cuDNN Caffe**: for fastest operation Caffe is accelerated by drop-in integration of [NVIDIA cuDNN](https://developer.nvidia.com/cudnn). To speed up your Caffe models, install cuDNN then uncomment the `USE_CUDNN := 1` flag in `Makefile.config` when installing Caffe. Acceleration is automatic. The current version is cuDNN v6; older versions are supported in older Caffe.
 
 **CPU-only Caffe**: for cold-brewed CPU-only Caffe uncomment the `CPU_ONLY := 1` flag in `Makefile.config` to configure and build Caffe without CUDA. This is helpful for cloud or cluster deployment.
 
diff --git a/include/caffe/util/cudnn.hpp b/include/caffe/util/cudnn.hpp
index a7d8dbbad4c..498cfe385de 100644
--- a/include/caffe/util/cudnn.hpp
+++ b/include/caffe/util/cudnn.hpp
@@ -41,6 +41,10 @@ inline const char* cudnnGetErrorString(cudnnStatus_t status) {
       return "CUDNN_STATUS_NOT_SUPPORTED";
     case CUDNN_STATUS_LICENSE_ERROR:
       return "CUDNN_STATUS_LICENSE_ERROR";
+#if CUDNN_VERSION_MIN(6, 0, 0)
+    case CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING:
+      return "CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING";
+#endif
   }
   return "Unknown cudnn status";
 }
@@ -109,8 +113,14 @@ template <typename Dtype>
 inline void setConvolutionDesc(cudnnConvolutionDescriptor_t* conv,
     cudnnTensorDescriptor_t bottom, cudnnFilterDescriptor_t filter,
     int pad_h, int pad_w, int stride_h, int stride_w) {
+#if CUDNN_VERSION_MIN(6, 0, 0)
   CUDNN_CHECK(cudnnSetConvolution2dDescriptor(*conv,
+      pad_h, pad_w, stride_h, stride_w, 1, 1, CUDNN_CROSS_CORRELATION,
+      dataType<Dtype>::type));
+#else
+    CUDNN_CHECK(cudnnSetConvolution2dDescriptor(*conv,
       pad_h, pad_w, stride_h, stride_w, 1, 1, CUDNN_CROSS_CORRELATION));
+#endif
 }
 
 template <typename Dtype>
diff --git a/scripts/travis/install-deps.sh b/scripts/travis/install-deps.sh
index 1900b16df54..1593ed8b59a 100755
--- a/scripts/travis/install-deps.sh
+++ b/scripts/travis/install-deps.sh
@@ -104,7 +104,7 @@ if $WITH_CUDA ; then
   ln -s /usr/local/cuda-$CUDA_VERSION /usr/local/cuda
 
   if $WITH_CUDNN ; then
-    apt-get install -y --no-install-recommends libcudnn5-dev
+    apt-get install -y --no-install-recommends libcudnn6-dev
   fi
 fi
 

From 179dafdb1a930cf86ff0956618bf8411b8dcd90e Mon Sep 17 00:00:00 2001
From: Luke Yeager <lukeyeager@users.noreply.github.com>
Date: Fri, 31 Mar 2017 11:24:56 -0700
Subject: [PATCH 212/264] Add test for caffe.draw.draw_net()

---
 python/caffe/test/test_draw.py        | 33 +++++++++++++++++++++++++++
 scripts/travis/install-deps.sh        |  2 ++
 scripts/travis/install-python-deps.sh |  1 +
 3 files changed, 36 insertions(+)
 create mode 100644 python/caffe/test/test_draw.py

diff --git a/python/caffe/test/test_draw.py b/python/caffe/test/test_draw.py
new file mode 100644
index 00000000000..1634145ee9d
--- /dev/null
+++ b/python/caffe/test/test_draw.py
@@ -0,0 +1,33 @@
+import os
+import unittest
+
+from google import protobuf
+
+import caffe.draw
+from caffe.proto import caffe_pb2
+
+def getFilenames():
+    """Yields files in the source tree which are Net prototxts."""
+    result = []
+
+    root_dir = os.path.abspath(os.path.join(
+        os.path.dirname(__file__), '..', '..', '..'))
+    assert os.path.exists(root_dir)
+
+    for dirname in ('models', 'examples'):
+        dirname = os.path.join(root_dir, dirname)
+        assert os.path.exists(dirname)
+        for cwd, _, filenames in os.walk(dirname):
+            for filename in filenames:
+                filename = os.path.join(cwd, filename)
+                if filename.endswith('.prototxt') and 'solver' not in filename:
+                    yield os.path.join(dirname, filename)
+
+
+class TestDraw(unittest.TestCase):
+    def test_draw_net(self):
+        for filename in getFilenames():
+            net = caffe_pb2.NetParameter()
+            with open(filename) as infile:
+                protobuf.text_format.Merge(infile.read(), net)
+            caffe.draw.draw_net(net, 'LR')
diff --git a/scripts/travis/install-deps.sh b/scripts/travis/install-deps.sh
index 1900b16df54..59a9163d5fc 100755
--- a/scripts/travis/install-deps.sh
+++ b/scripts/travis/install-deps.sh
@@ -8,6 +8,7 @@ source $BASEDIR/defaults.sh
 apt-get -y update
 apt-get install -y --no-install-recommends \
   build-essential \
+  graphviz \
   libboost-filesystem-dev \
   libboost-python-dev \
   libboost-system-dev \
@@ -31,6 +32,7 @@ if ! $WITH_PYTHON3 ; then
     python-dev \
     python-numpy \
     python-protobuf \
+    python-pydot \
     python-skimage
 else
   # Python3
diff --git a/scripts/travis/install-python-deps.sh b/scripts/travis/install-python-deps.sh
index eeec302791f..910d35a93be 100755
--- a/scripts/travis/install-python-deps.sh
+++ b/scripts/travis/install-python-deps.sh
@@ -11,4 +11,5 @@ if ! $WITH_PYTHON3 ; then
 else
   # Python3
   pip install --pre protobuf==3.0.0b3
+  pip install pydot
 fi

From 41e34c9061e9577c2b1dd56be65fd23ef26457fd Mon Sep 17 00:00:00 2001
From: Nitheesh <nitheeshas91@gmail.com>
Date: Tue, 4 Apr 2017 13:36:20 +0530
Subject: [PATCH 213/264] Minor fix for net drawing script

---
 python/caffe/draw.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/caffe/draw.py b/python/caffe/draw.py
index 9eecf6d7b46..8411a41d1d4 100644
--- a/python/caffe/draw.py
+++ b/python/caffe/draw.py
@@ -91,11 +91,11 @@ def get_layer_label(layer, rankdir):
                       separator,
                       layer.type,
                       separator,
-                      layer.convolution_param.kernel_size[0] if len(layer.convolution_param.kernel_size._values) else 1,
+                      layer.convolution_param.kernel_size[0] if len(layer.convolution_param.kernel_size) else 1,
                       separator,
-                      layer.convolution_param.stride[0] if len(layer.convolution_param.stride._values) else 1,
+                      layer.convolution_param.stride[0] if len(layer.convolution_param.stride) else 1,
                       separator,
-                      layer.convolution_param.pad[0] if len(layer.convolution_param.pad._values) else 0)
+                      layer.convolution_param.pad[0] if len(layer.convolution_param.pad) else 0)
     elif layer.type == 'Pooling':
         pooling_types_dict = get_pooling_types_dict()
         node_label = '"%s%s(%s %s)%skernel size: %d%sstride: %d%spad: %d"' %\

From 31bfe8fb498ea2e528da6463c9045b397992e028 Mon Sep 17 00:00:00 2001
From: Nitheesh <nitheeshas91@gmail.com>
Date: Tue, 4 Apr 2017 13:40:31 +0530
Subject: [PATCH 214/264] Add main() for draw_net unittest, fix import errors

---
 python/caffe/test/test_draw.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/python/caffe/test/test_draw.py b/python/caffe/test/test_draw.py
index 1634145ee9d..835bb5df010 100644
--- a/python/caffe/test/test_draw.py
+++ b/python/caffe/test/test_draw.py
@@ -1,7 +1,7 @@
 import os
 import unittest
 
-from google import protobuf
+from google.protobuf import text_format
 
 import caffe.draw
 from caffe.proto import caffe_pb2
@@ -29,5 +29,9 @@ def test_draw_net(self):
         for filename in getFilenames():
             net = caffe_pb2.NetParameter()
             with open(filename) as infile:
-                protobuf.text_format.Merge(infile.read(), net)
+                text_format.Merge(infile.read(), net)
             caffe.draw.draw_net(net, 'LR')
+
+
+if __name__ == "__main__":
+    unittest.main()

From 5f1ca848f8c9daa73f61f64413e15ab2cd6602e7 Mon Sep 17 00:00:00 2001
From: "Jonathan R. Williford" <jonathan@neural.vision>
Date: Wed, 5 Apr 2017 10:03:31 +0000
Subject: [PATCH 215/264] Add example and small blurb about sigmoid layer.

---
 docs/tutorial/layers/sigmoid.md | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/docs/tutorial/layers/sigmoid.md b/docs/tutorial/layers/sigmoid.md
index 505318352c9..f18ac4b84ec 100644
--- a/docs/tutorial/layers/sigmoid.md
+++ b/docs/tutorial/layers/sigmoid.md
@@ -9,6 +9,16 @@ title: Sigmoid Layer
 * Header: [`./include/caffe/layers/sigmoid_layer.hpp`](https://github.com/BVLC/caffe/blob/master/include/caffe/layers/sigmoid_layer.hpp)
 * CPU implementation: [`./src/caffe/layers/sigmoid_layer.cpp`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/sigmoid_layer.cpp)
 * CUDA GPU implementation: [`./src/caffe/layers/sigmoid_layer.cu`](https://github.com/BVLC/caffe/blob/master/src/caffe/layers/sigmoid_layer.cu)
+* Example (from [`./examples/mnist/mnist_autoencoder.prototxt`](https://github.com/BVLC/caffe/blob/master/examples/mnist/mnist_autoencoder.prototxt)):
+
+      layer {
+        name: "encode1neuron"
+        bottom: "encode1"
+        top: "encode1neuron"
+        type: "Sigmoid"
+      }
+
+The `Sigmoid` layer computes `sigmoid(x)` for each element `x` in the bottom blob.
 
 ## Parameters
 

From ce7193c7385298825c8cabebd20f664f3f93f06a Mon Sep 17 00:00:00 2001
From: Guillaume Dumont <dumont.guillaume@gmail.com>
Date: Sat, 8 Apr 2017 12:59:24 -0400
Subject: [PATCH 216/264] Removed repeated import Layer, get_solver

---
 python/caffe/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/caffe/__init__.py b/python/caffe/__init__.py
index 43a0c49be63..80f51716d82 100644
--- a/python/caffe/__init__.py
+++ b/python/caffe/__init__.py
@@ -1,5 +1,5 @@
 from .pycaffe import Net, SGDSolver, NesterovSolver, AdaGradSolver, RMSPropSolver, AdaDeltaSolver, AdamSolver, NCCL, Timer
-from ._caffe import init_log, log, set_mode_cpu, set_mode_gpu, set_device, Layer, get_solver, layer_type_list, set_random_seed, solver_count, set_solver_count, solver_rank, set_solver_rank, set_multiprocess, Layer, get_solver
+from ._caffe import init_log, log, set_mode_cpu, set_mode_gpu, set_device, Layer, get_solver, layer_type_list, set_random_seed, solver_count, set_solver_count, solver_rank, set_solver_rank, set_multiprocess
 from ._caffe import __version__
 from .proto.caffe_pb2 import TRAIN, TEST
 from .classifier import Classifier

From b2a95fa7fcba2089b981eb30b47d9aeba2b89ce9 Mon Sep 17 00:00:00 2001
From: Bruno Bowden <github@brunobowden.com>
Date: Sat, 8 Apr 2017 15:54:04 -0700
Subject: [PATCH 217/264] Log shape dimensions for eltwise layer shape mismatch

When layer shapes mismatch for the eltwise layer, caffe will fail a
check but doesn't give any information on how the shapes mismatch.
This logging information will make it easier to debug. Additionally
this reorders the variables to CHECK(expected == actual), matching
the JUnit convention.

BEFORE: Check failed: bottom[i]->shape() == bottom[0]->shape()

AFTER:  Check failed: bottom[0]->shape() == bottom[i]->shape()
        bottom[0]: 1 4 (4), bottom[3]: 1 6 (6)

NOTE: This removes use of CHECK_EQ in an earlier version of this PR,
which caused a build warning due to include of glog/stl_logging.h.
---
 src/caffe/layers/eltwise_layer.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/caffe/layers/eltwise_layer.cpp b/src/caffe/layers/eltwise_layer.cpp
index 21256166bfa..3d82b0e1cbf 100644
--- a/src/caffe/layers/eltwise_layer.cpp
+++ b/src/caffe/layers/eltwise_layer.cpp
@@ -31,7 +31,9 @@ template <typename Dtype>
 void EltwiseLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top) {
   for (int i = 1; i < bottom.size(); ++i) {
-    CHECK(bottom[i]->shape() == bottom[0]->shape());
+    CHECK(bottom[0]->shape() == bottom[i]->shape())
+        << "bottom[0]: " << bottom[0]->shape_string()
+        << ", bottom[" << i << "]: " << bottom[i]->shape_string();
   }
   top[0]->ReshapeLike(*bottom[0]);
   // If max operation, we will initialize the vector index part.

From 51728d1532dbee2853acb89a8a9653e82219953b Mon Sep 17 00:00:00 2001
From: Cyprien Noel <cyprien.noel@gmail.com>
Date: Wed, 12 Apr 2017 01:42:59 -0700
Subject: [PATCH 218/264] Fix log parsing #5422

---
 tools/extra/parse_log.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/extra/parse_log.sh b/tools/extra/parse_log.sh
index 9892c897682..122eb9e6eed 100755
--- a/tools/extra/parse_log.sh
+++ b/tools/extra/parse_log.sh
@@ -39,7 +39,7 @@ rm aux.txt aux0.txt aux1.txt aux2.txt aux3.txt aux4.txt
 grep '] Solving ' $1 > aux.txt
 grep ', loss = ' $1 >> aux.txt
 grep 'Iteration ' aux.txt | sed  's/.*Iteration \([[:digit:]]*\).*/\1/g' > aux0.txt
-grep ', loss = ' $1 | awk '{print $9}' > aux1.txt
+grep ', loss = ' $1 | awk -F = '{print $2}' > aux1.txt
 grep ', lr = ' $1 | awk '{print $9}' > aux2.txt
 
 # Extracting elapsed seconds

From bac59bed485dfa195600b5b12031401613fade05 Mon Sep 17 00:00:00 2001
From: Cyprien Noel <cyprien.noel@gmail.com>
Date: Wed, 12 Apr 2017 02:05:34 -0700
Subject: [PATCH 219/264] Allow using env vars for glog init from python

---
 python/caffe/_caffe.cpp | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
index 276f21f85a5..01b34b84190 100644
--- a/python/caffe/_caffe.cpp
+++ b/python/caffe/_caffe.cpp
@@ -51,14 +51,18 @@ const int NPY_DTYPE = NPY_FLOAT32;
 void set_mode_cpu() { Caffe::set_mode(Caffe::CPU); }
 void set_mode_gpu() { Caffe::set_mode(Caffe::GPU); }
 
-void InitLog(int level) {
-  FLAGS_logtostderr = 1;
-  FLAGS_minloglevel = level;
+void InitLog() {
   ::google::InitGoogleLogging("");
   ::google::InstallFailureSignalHandler();
 }
-void InitLogInfo() {
-  InitLog(google::INFO);
+void InitLogLevel(int level) {
+  FLAGS_minloglevel = level;
+  InitLog();
+}
+void InitLogLevelPipe(int level, bool stderr) {
+  FLAGS_minloglevel = level;
+  FLAGS_logtostderr = stderr;
+  InitLog();
 }
 void Log(const string& s) {
   LOG(INFO) << s;
@@ -353,7 +357,8 @@ BOOST_PYTHON_MODULE(_caffe) {
 
   // Caffe utility functions
   bp::def("init_log", &InitLog);
-  bp::def("init_log", &InitLogInfo);
+  bp::def("init_log", &InitLogLevel);
+  bp::def("init_log", &InitLogLevelPipe);
   bp::def("log", &Log);
   bp::def("set_mode_cpu", &set_mode_cpu);
   bp::def("set_mode_gpu", &set_mode_gpu);

From 35a7b87ad87457291dfc79bf8a7e7cf7ef278cbb Mon Sep 17 00:00:00 2001
From: Noiredd <snowball91b@gmail.com>
Date: Wed, 12 Apr 2017 11:59:06 +0200
Subject: [PATCH 220/264] fixes pycaffe forward() and backward() behavior for
 nets whose layer names do not match respective tops

---
 python/caffe/pycaffe.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/caffe/pycaffe.py b/python/caffe/pycaffe.py
index 63606591bb4..4a7b5a24c46 100644
--- a/python/caffe/pycaffe.py
+++ b/python/caffe/pycaffe.py
@@ -113,7 +113,7 @@ def _Net_forward(self, blobs=None, start=None, end=None, **kwargs):
 
     if end is not None:
         end_ind = list(self._layer_names).index(end)
-        outputs = set([end] + blobs)
+        outputs = set(self.top_names[end] + blobs)
     else:
         end_ind = len(self.layers) - 1
         outputs = set(self.outputs + blobs)
@@ -161,7 +161,7 @@ def _Net_backward(self, diffs=None, start=None, end=None, **kwargs):
 
     if end is not None:
         end_ind = list(self._layer_names).index(end)
-        outputs = set([end] + diffs)
+        outputs = set(self.bottom_names[end] + diffs)
     else:
         end_ind = 0
         outputs = set(self.inputs + diffs)

From 3a987960d6a08b179eb6c0c526b27ab761ea2d6e Mon Sep 17 00:00:00 2001
From: Kang Kim <kangk@qti.qualcomm.com>
Date: Thu, 13 Apr 2017 15:23:26 +0900
Subject: [PATCH 221/264] remove redundant check in LSTMUnitLayer

---
 src/caffe/layers/lstm_unit_layer.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/caffe/layers/lstm_unit_layer.cpp b/src/caffe/layers/lstm_unit_layer.cpp
index 277c031ad15..d1ab59c4bd1 100644
--- a/src/caffe/layers/lstm_unit_layer.cpp
+++ b/src/caffe/layers/lstm_unit_layer.cpp
@@ -31,7 +31,6 @@ void LSTMUnitLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
     CHECK_EQ(num_instances, bottom[i]->shape(1));
   }
   hidden_dim_ = bottom[0]->shape(2);
-  CHECK_EQ(num_instances, bottom[1]->shape(1));
   CHECK_EQ(4 * hidden_dim_, bottom[1]->shape(2));
   top[0]->ReshapeLike(*bottom[0]);
   top[1]->ReshapeLike(*bottom[0]);

From 96870628698090813d92a9b1f8af9a8311469354 Mon Sep 17 00:00:00 2001
From: Guillaume Dumont <dumont.guillaume@gmail.com>
Date: Thu, 13 Apr 2017 13:15:24 -0400
Subject: [PATCH 222/264] Bump boost version to 1.55 in CMake build

---
 cmake/Dependencies.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index 02c81525bce..4a5bac471b4 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -5,7 +5,7 @@ set(Caffe_DEFINITIONS "")
 set(Caffe_COMPILE_OPTIONS "")
 
 # ---[ Boost
-find_package(Boost 1.46 REQUIRED COMPONENTS system thread filesystem)
+find_package(Boost 1.55 REQUIRED COMPONENTS system thread filesystem)
 list(APPEND Caffe_INCLUDE_DIRS PUBLIC ${Boost_INCLUDE_DIRS})
 list(APPEND Caffe_LINKER_LIBS PUBLIC ${Boost_LIBRARIES})
 

From 0c9cc62379e4061b58b0dfa257d79c2ecaeb2be8 Mon Sep 17 00:00:00 2001
From: Guillaume Dumont <dumont.guillaume@gmail.com>
Date: Sat, 11 Mar 2017 20:12:40 -0500
Subject: [PATCH 223/264] Added support for python 3 and NCCL

---
 python/caffe/__init__.py       |  2 +-
 python/caffe/_caffe.cpp        | 32 +++++++++++++++++++++++++++++++-
 python/caffe/test/test_nccl.py | 19 +++++++++++++++++++
 3 files changed, 51 insertions(+), 2 deletions(-)
 create mode 100644 python/caffe/test/test_nccl.py

diff --git a/python/caffe/__init__.py b/python/caffe/__init__.py
index 80f51716d82..776945eec88 100644
--- a/python/caffe/__init__.py
+++ b/python/caffe/__init__.py
@@ -1,5 +1,5 @@
 from .pycaffe import Net, SGDSolver, NesterovSolver, AdaGradSolver, RMSPropSolver, AdaDeltaSolver, AdamSolver, NCCL, Timer
-from ._caffe import init_log, log, set_mode_cpu, set_mode_gpu, set_device, Layer, get_solver, layer_type_list, set_random_seed, solver_count, set_solver_count, solver_rank, set_solver_rank, set_multiprocess
+from ._caffe import init_log, log, set_mode_cpu, set_mode_gpu, set_device, Layer, get_solver, layer_type_list, set_random_seed, solver_count, set_solver_count, solver_rank, set_solver_rank, set_multiprocess, has_nccl
 from ._caffe import __version__
 from .proto.caffe_pb2 import TRAIN, TEST
 from .classifier import Classifier
diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
index 01b34b84190..7fc06c08f73 100644
--- a/python/caffe/_caffe.cpp
+++ b/python/caffe/_caffe.cpp
@@ -347,6 +347,35 @@ class NCCL {
 };
 #endif
 
+bool HasNCCL() {
+#ifdef USE_NCCL
+  return true;
+#else
+  return false;
+#endif
+}
+
+#ifdef USE_NCCL
+bp::object NCCL_New_Uid() {
+  std::string uid = NCCL<Dtype>::new_uid();
+#if PY_MAJOR_VERSION >= 3
+  // Convert std::string to bytes so that Python does not
+  // try to decode the string using the current locale.
+
+  // Since boost 1.53 boost.python will convert str and bytes
+  // to std::string but will convert std::string to str. Here we
+  // force a bytes object to be returned. When this object
+  // is passed back to the NCCL constructor boost.python will
+  // correctly convert the bytes to std::string automatically
+  PyObject* py_uid = PyBytes_FromString(uid.c_str());
+  return bp::object(bp::handle<>(py_uid));
+#else
+  // automatic conversion is correct for python 2.
+  return uid;
+#endif
+}
+#endif
+
 BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(SolveOverloads, Solve, 0, 1);
 
 BOOST_PYTHON_MODULE(_caffe) {
@@ -360,6 +389,7 @@ BOOST_PYTHON_MODULE(_caffe) {
   bp::def("init_log", &InitLogLevel);
   bp::def("init_log", &InitLogLevelPipe);
   bp::def("log", &Log);
+  bp::def("has_nccl", &HasNCCL);
   bp::def("set_mode_cpu", &set_mode_cpu);
   bp::def("set_mode_gpu", &set_mode_gpu);
   bp::def("set_random_seed", &set_random_seed);
@@ -518,7 +548,7 @@ BOOST_PYTHON_MODULE(_caffe) {
     boost::noncopyable>("NCCL",
                         bp::init<shared_ptr<Solver<Dtype> >, const string&>())
 #ifdef USE_NCCL
-    .def("new_uid", &NCCL<Dtype>::new_uid).staticmethod("new_uid")
+    .def("new_uid", NCCL_New_Uid).staticmethod("new_uid")
     .def("bcast", &NCCL<Dtype>::Broadcast)
 #endif
     /* NOLINT_NEXT_LINE(whitespace/semicolon) */
diff --git a/python/caffe/test/test_nccl.py b/python/caffe/test/test_nccl.py
new file mode 100644
index 00000000000..127a9337040
--- /dev/null
+++ b/python/caffe/test/test_nccl.py
@@ -0,0 +1,19 @@
+import sys
+import unittest
+
+import caffe
+
+
+class TestNCCL(unittest.TestCase):
+
+    def test_newuid(self):
+        """
+        Test that NCCL uids are of the proper type
+        according to python version
+        """
+        if caffe.has_nccl():
+            uid = caffe.NCCL.new_uid()
+            if sys.version_info.major >= 3:
+                self.assertTrue(isinstance(uid, bytes))
+            else:
+                self.assertTrue(isinstance(uid, str))

From e98023af4a570e3105486b661e4c4d1855c0dd79 Mon Sep 17 00:00:00 2001
From: Patrick Follmann <follmann@mvtec.com>
Date: Thu, 29 Dec 2016 14:37:21 +0100
Subject: [PATCH 224/264] Add GPU sqrt functions

---
 include/caffe/util/math_functions.hpp |  3 +++
 src/caffe/util/math_functions.cu      | 21 +++++++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp
index 37abce5eccc..60a8404a044 100644
--- a/include/caffe/util/math_functions.hpp
+++ b/include/caffe/util/math_functions.hpp
@@ -214,6 +214,9 @@ void caffe_gpu_log(const int n, const Dtype* a, Dtype* y);
 template <typename Dtype>
 void caffe_gpu_powx(const int n, const Dtype* a, const Dtype b, Dtype* y);
 
+template <typename Dtype>
+void caffe_gpu_sqrt(const int n, const Dtype* a, Dtype* y);
+
 // caffe_gpu_rng_uniform with two arguments generates integers in the range
 // [0, UINT_MAX].
 void caffe_gpu_rng_uniform(const int n, unsigned int* r);
diff --git a/src/caffe/util/math_functions.cu b/src/caffe/util/math_functions.cu
index 6d001026082..314e6ba0f63 100644
--- a/src/caffe/util/math_functions.cu
+++ b/src/caffe/util/math_functions.cu
@@ -387,6 +387,27 @@ void caffe_gpu_powx<double>(const int N, const double* a,
       N, a, alpha, y);
 }
 
+template <typename Dtype>
+__global__ void sqrt_kernel(const int n, const Dtype* a, Dtype* y) {
+  CUDA_KERNEL_LOOP(index, n) {
+    y[index] = sqrt(a[index]);
+  }
+}
+
+template <>
+void caffe_gpu_sqrt<float>(const int N, const float* a, float* y) {
+  // NOLINT_NEXT_LINE(whitespace/operators)
+  sqrt_kernel<float><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(
+      N, a, y);
+}
+
+template <>
+void caffe_gpu_sqrt<double>(const int N, const double* a, double* y) {
+  // NOLINT_NEXT_LINE(whitespace/operators)
+  sqrt_kernel<double><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(
+      N, a, y);
+}
+
 DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(sign, y[index] = (Dtype(0) < x[index])
                                       - (x[index] < Dtype(0)));
 DEFINE_AND_INSTANTIATE_GPU_UNARY_FUNC(sgnbit, y[index] = signbit(x[index]));

From e93fcd267582888f960ca48d6e0c2e719d4ea09b Mon Sep 17 00:00:00 2001
From: Patrick Follmann <follmann@mvtec.com>
Date: Thu, 29 Dec 2016 14:46:16 +0100
Subject: [PATCH 225/264] GPU BatchNormLayer: replace powx with mul and sqrt

---
 src/caffe/layers/batch_norm_layer.cu | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/caffe/layers/batch_norm_layer.cu b/src/caffe/layers/batch_norm_layer.cu
index c21713c81d9..a35e778e2f1 100644
--- a/src/caffe/layers/batch_norm_layer.cu
+++ b/src/caffe/layers/batch_norm_layer.cu
@@ -48,14 +48,14 @@ void BatchNormLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
 
   if (!use_global_stats_) {
     // compute variance using var(X) = E((X-EX)^2)
-    caffe_gpu_powx(top[0]->count(), top_data, Dtype(2),
+    caffe_gpu_mul(top[0]->count(), top[0]->gpu_data(), top[0]->gpu_data(),
         temp_.mutable_gpu_data());  // (X-EX)^2
     caffe_gpu_gemv<Dtype>(CblasNoTrans, channels_ * num, spatial_dim,
         1. / (num * spatial_dim), temp_.gpu_data(),
         spatial_sum_multiplier_.gpu_data(), 0.,
         num_by_chans_.mutable_gpu_data());
-    caffe_gpu_gemv<Dtype>(CblasTrans, num, channels_, 1.,
-        num_by_chans_.gpu_data(), batch_sum_multiplier_.gpu_data(), 0.,
+    caffe_gpu_gemv<Dtype>(CblasTrans, num, channels_, Dtype(1.),
+        num_by_chans_.gpu_data(), batch_sum_multiplier_.gpu_data(), Dtype(0.),
         variance_.mutable_gpu_data());  // E((X_EX)^2)
 
     // compute and save moving average
@@ -72,7 +72,7 @@ void BatchNormLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
 
   // normalize variance
   caffe_gpu_add_scalar(variance_.count(), eps_, variance_.mutable_gpu_data());
-  caffe_gpu_powx(variance_.count(), variance_.gpu_data(), Dtype(0.5),
+  caffe_gpu_sqrt(variance_.count(), variance_.gpu_data(),
       variance_.mutable_gpu_data());
 
   // replicate variance to input size

From ab3398832964c1ff1bf6b78501e4e43a11f282a1 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Thu, 13 Apr 2017 13:25:16 -0700
Subject: [PATCH 226/264] Add CPU sqrt functions

---
 include/caffe/util/math_functions.hpp |  3 +++
 src/caffe/util/math_functions.cpp     | 10 ++++++++++
 2 files changed, 13 insertions(+)

diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp
index 60a8404a044..e549120a933 100644
--- a/include/caffe/util/math_functions.hpp
+++ b/include/caffe/util/math_functions.hpp
@@ -52,6 +52,9 @@ void caffe_scal(const int N, const Dtype alpha, Dtype *X);
 template <typename Dtype>
 void caffe_sqr(const int N, const Dtype* a, Dtype* y);
 
+template <typename Dtype>
+void caffe_sqrt(const int N, const Dtype* a, Dtype* y);
+
 template <typename Dtype>
 void caffe_add(const int N, const Dtype* a, const Dtype* b, Dtype* y);
 
diff --git a/src/caffe/util/math_functions.cpp b/src/caffe/util/math_functions.cpp
index 71c02274a75..59625bc05ce 100644
--- a/src/caffe/util/math_functions.cpp
+++ b/src/caffe/util/math_functions.cpp
@@ -196,6 +196,16 @@ void caffe_sqr<double>(const int n, const double* a, double* y) {
   vdSqr(n, a, y);
 }
 
+template <>
+void caffe_sqrt<float>(const int n, const float* a, float* y) {
+  vsSqrt(n, a, y);
+}
+
+template <>
+void caffe_sqrt<double>(const int n, const double* a, double* y) {
+  vdSqrt(n, a, y);
+}
+
 template <>
 void caffe_exp<float>(const int n, const float* a, float* y) {
   vsExp(n, a, y);

From 1c15d94f7da736945450e6ed321077f3045445b1 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Thu, 13 Apr 2017 13:26:16 -0700
Subject: [PATCH 227/264] CPU BatchNormLayer: replace powx with sqr and sqrt

---
 src/caffe/layers/batch_norm_layer.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/caffe/layers/batch_norm_layer.cpp b/src/caffe/layers/batch_norm_layer.cpp
index 0a08ed4cb07..c6a1d5b1b2c 100644
--- a/src/caffe/layers/batch_norm_layer.cpp
+++ b/src/caffe/layers/batch_norm_layer.cpp
@@ -124,8 +124,8 @@ void BatchNormLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
 
   if (!use_global_stats_) {
     // compute variance using var(X) = E((X-EX)^2)
-    caffe_powx(top[0]->count(), top_data, Dtype(2),
-        temp_.mutable_cpu_data());  // (X-EX)^2
+    caffe_sqr<Dtype>(top[0]->count(), top_data,
+                     temp_.mutable_cpu_data());  // (X-EX)^2
     caffe_cpu_gemv<Dtype>(CblasNoTrans, channels_ * num, spatial_dim,
         1. / (num * spatial_dim), temp_.cpu_data(),
         spatial_sum_multiplier_.cpu_data(), 0.,
@@ -148,7 +148,7 @@ void BatchNormLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
 
   // normalize variance
   caffe_add_scalar(variance_.count(), eps_, variance_.mutable_cpu_data());
-  caffe_powx(variance_.count(), variance_.cpu_data(), Dtype(0.5),
+  caffe_sqrt(variance_.count(), variance_.cpu_data(),
              variance_.mutable_cpu_data());
 
   // replicate variance to input size

From 3d5bed06a9b6b8a5dfd3db8da33f2fa3bc9a1213 Mon Sep 17 00:00:00 2001
From: Jeff Donahue <jeff.donahue@gmail.com>
Date: Thu, 13 Apr 2017 14:15:16 -0700
Subject: [PATCH 228/264] fix: add non-MKL sqrt (should have been included in
 ab33988)

---
 include/caffe/util/mkl_alternate.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/caffe/util/mkl_alternate.hpp b/include/caffe/util/mkl_alternate.hpp
index 79b2c32de94..8c2294c7c86 100644
--- a/include/caffe/util/mkl_alternate.hpp
+++ b/include/caffe/util/mkl_alternate.hpp
@@ -37,6 +37,7 @@ extern "C" {
   }
 
 DEFINE_VSL_UNARY_FUNC(Sqr, y[i] = a[i] * a[i])
+DEFINE_VSL_UNARY_FUNC(Sqrt, y[i] = sqrt(a[i]))
 DEFINE_VSL_UNARY_FUNC(Exp, y[i] = exp(a[i]))
 DEFINE_VSL_UNARY_FUNC(Ln, y[i] = log(a[i]))
 DEFINE_VSL_UNARY_FUNC(Abs, y[i] = fabs(a[i]))

From 2ec19b6177111526d2df362d29d0e08aa5645a22 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Thu, 13 Apr 2017 14:22:30 -0700
Subject: [PATCH 229/264] deprecate WindowData layer type

---
 include/caffe/layers/window_data_layer.hpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/caffe/layers/window_data_layer.hpp b/include/caffe/layers/window_data_layer.hpp
index 35f41b80e63..b9b66b7cf1d 100644
--- a/include/caffe/layers/window_data_layer.hpp
+++ b/include/caffe/layers/window_data_layer.hpp
@@ -16,7 +16,8 @@ namespace caffe {
 
 /**
  * @brief Provides data to the Net from windows of images files, specified
- *        by a window data file.
+ *        by a window data file. This layer is *DEPRECATED* and only kept for
+ *        archival purposes for use by the original R-CNN.
  *
  * TODO(dox): thorough documentation for Forward and proto params.
  */

From e7163f650885b9f7b9cae1c3253aa97d9fe30d86 Mon Sep 17 00:00:00 2001
From: Guillaume Dumont <dumont.guillaume@gmail.com>
Date: Thu, 13 Apr 2017 20:32:40 -0400
Subject: [PATCH 230/264] Updated Travis boost dependencies

---
 scripts/travis/install-deps.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/travis/install-deps.sh b/scripts/travis/install-deps.sh
index 2fa2a74a486..dac5d2f9d37 100755
--- a/scripts/travis/install-deps.sh
+++ b/scripts/travis/install-deps.sh
@@ -9,10 +9,10 @@ apt-get -y update
 apt-get install -y --no-install-recommends \
   build-essential \
   graphviz \
-  libboost-filesystem-dev \
-  libboost-python-dev \
-  libboost-system-dev \
-  libboost-thread-dev \
+  libboost-filesystem1.55-dev \
+  libboost-python1.55-dev \
+  libboost-system1.55-dev \
+  libboost-thread1.55-dev \
   libgflags-dev \
   libgoogle-glog-dev \
   libhdf5-serial-dev \

From 8bc82c635914676d51ecd2849cc69f6fb6042496 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Thu, 13 Apr 2017 19:14:57 -0700
Subject: [PATCH 231/264] [examples] switch cifar-10 back to proto instead of
 h5 serialization

(it's more common)
---
 examples/cifar10/cifar10_quick_solver.prototxt | 1 -
 examples/cifar10/train_full.sh                 | 4 ++--
 examples/cifar10/train_quick.sh                | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/examples/cifar10/cifar10_quick_solver.prototxt b/examples/cifar10/cifar10_quick_solver.prototxt
index 5de276f722f..14b4401ba16 100644
--- a/examples/cifar10/cifar10_quick_solver.prototxt
+++ b/examples/cifar10/cifar10_quick_solver.prototxt
@@ -20,7 +20,6 @@ display: 100
 max_iter: 4000
 # snapshot intermediate results
 snapshot: 4000
-snapshot_format: HDF5
 snapshot_prefix: "examples/cifar10/cifar10_quick"
 # solver mode: CPU or GPU
 solver_mode: GPU
diff --git a/examples/cifar10/train_full.sh b/examples/cifar10/train_full.sh
index 06ecc2dccb0..fe46e60d795 100755
--- a/examples/cifar10/train_full.sh
+++ b/examples/cifar10/train_full.sh
@@ -9,9 +9,9 @@ $TOOLS/caffe train \
 # reduce learning rate by factor of 10
 $TOOLS/caffe train \
     --solver=examples/cifar10/cifar10_full_solver_lr1.prototxt \
-    --snapshot=examples/cifar10/cifar10_full_iter_60000.solverstate.h5 $@
+    --snapshot=examples/cifar10/cifar10_full_iter_60000.solverstate $@
 
 # reduce learning rate by factor of 10
 $TOOLS/caffe train \
     --solver=examples/cifar10/cifar10_full_solver_lr2.prototxt \
-    --snapshot=examples/cifar10/cifar10_full_iter_65000.solverstate.h5 $@
+    --snapshot=examples/cifar10/cifar10_full_iter_65000.solverstate $@
diff --git a/examples/cifar10/train_quick.sh b/examples/cifar10/train_quick.sh
index d2b875340ee..257479e0d77 100755
--- a/examples/cifar10/train_quick.sh
+++ b/examples/cifar10/train_quick.sh
@@ -9,4 +9,4 @@ $TOOLS/caffe train \
 # reduce learning rate by factor of 10 after 8 epochs
 $TOOLS/caffe train \
   --solver=examples/cifar10/cifar10_quick_solver_lr1.prototxt \
-  --snapshot=examples/cifar10/cifar10_quick_iter_4000.solverstate.h5 $@
+  --snapshot=examples/cifar10/cifar10_quick_iter_4000.solverstate $@

From aa29eba26b781349174cb856b6ea96360ebbb3f2 Mon Sep 17 00:00:00 2001
From: Guillaume Dumont <dumont.guillaume@gmail.com>
Date: Thu, 13 Apr 2017 22:37:13 -0400
Subject: [PATCH 232/264] Explicit std::string to bp::object conversion

---
 python/caffe/_caffe.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
index 7fc06c08f73..d7f43fff62d 100644
--- a/python/caffe/_caffe.cpp
+++ b/python/caffe/_caffe.cpp
@@ -371,7 +371,7 @@ bp::object NCCL_New_Uid() {
   return bp::object(bp::handle<>(py_uid));
 #else
   // automatic conversion is correct for python 2.
-  return uid;
+  return bp::object(uid);
 #endif
 }
 #endif

From c19c9602d031274ce77eb6a94ce2a9e8d843d98f Mon Sep 17 00:00:00 2001
From: Carl Doersch <cdoersch@cs.cmu.edu>
Date: Tue, 25 Aug 2015 11:26:14 -0700
Subject: [PATCH 233/264] Test for python forward and backward with start and
 end layer.

---
 python/caffe/test/test_net.py | 45 +++++++++++++++++++++++++++++++----
 1 file changed, 41 insertions(+), 4 deletions(-)

diff --git a/python/caffe/test/test_net.py b/python/caffe/test/test_net.py
index 24391cc50c4..afd27690981 100644
--- a/python/caffe/test/test_net.py
+++ b/python/caffe/test/test_net.py
@@ -25,11 +25,11 @@ def simple_net_file(num_output):
         bias_filler { type: 'constant' value: 2 } }
         param { decay_mult: 1 } param { decay_mult: 0 }
         }
-    layer { type: 'InnerProduct' name: 'ip' bottom: 'conv' top: 'ip'
+    layer { type: 'InnerProduct' name: 'ip' bottom: 'conv' top: 'ip_blob'
       inner_product_param { num_output: """ + str(num_output) + """
         weight_filler { type: 'gaussian' std: 2.5 }
         bias_filler { type: 'constant' value: -3 } } }
-    layer { type: 'SoftmaxWithLoss' name: 'loss' bottom: 'ip' bottom: 'label'
+    layer { type: 'SoftmaxWithLoss' name: 'loss' bottom: 'ip_blob' bottom: 'label'
       top: 'loss' }""")
     f.close()
     return f.name
@@ -71,6 +71,43 @@ def test_forward_backward(self):
         self.net.forward()
         self.net.backward()
 
+    def test_forward_start_end(self):
+        conv_blob=self.net.blobs['conv'];
+        ip_blob=self.net.blobs['ip_blob'];
+        sample_data=np.random.uniform(size=conv_blob.data.shape);
+        sample_data=sample_data.astype(np.float32);
+        conv_blob.data[:]=sample_data;
+        forward_blob=self.net.forward(start='ip',end='ip');
+        self.assertIn('ip_blob',forward_blob);
+
+        manual_forward=[];
+        for i in range(0,conv_blob.data.shape[0]):
+          dot=np.dot(self.net.params['ip'][0].data,
+                     conv_blob.data[i].reshape(-1));
+          manual_forward.append(dot+self.net.params['ip'][1].data);
+        manual_forward=np.array(manual_forward);
+
+        np.testing.assert_allclose(ip_blob.data,manual_forward,rtol=1e-3);
+
+    def test_backward_start_end(self):
+        conv_blob=self.net.blobs['conv'];
+        ip_blob=self.net.blobs['ip_blob'];
+        sample_data=np.random.uniform(size=ip_blob.data.shape)
+        sample_data=sample_data.astype(np.float32);
+        ip_blob.diff[:]=sample_data;
+        backward_blob=self.net.backward(start='ip',end='ip');
+        self.assertIn('conv',backward_blob);
+
+        manual_backward=[];
+        for i in range(0,conv_blob.data.shape[0]):
+          dot=np.dot(self.net.params['ip'][0].data.transpose(),
+                     sample_data[i].reshape(-1));
+          manual_backward.append(dot);
+        manual_backward=np.array(manual_backward);
+        manual_backward=manual_backward.reshape(conv_blob.data.shape);
+
+        np.testing.assert_allclose(conv_blob.diff,manual_backward,rtol=1e-3);
+
     def test_clear_param_diffs(self):
         # Run a forward/backward step to have non-zero diffs
         self.net.forward()
@@ -90,13 +127,13 @@ def test_top_bottom_names(self):
         self.assertEqual(self.net.top_names,
                          OrderedDict([('data', ['data', 'label']),
                                       ('conv', ['conv']),
-                                      ('ip', ['ip']),
+                                      ('ip', ['ip_blob']),
                                       ('loss', ['loss'])]))
         self.assertEqual(self.net.bottom_names,
                          OrderedDict([('data', []),
                                       ('conv', ['data']),
                                       ('ip', ['conv']),
-                                      ('loss', ['ip', 'label'])]))
+                                      ('loss', ['ip_blob', 'label'])]))
 
     def test_save_and_read(self):
         f = tempfile.NamedTemporaryFile(mode='w+', delete=False)

From 451944333510e1ea9b0bdac11e4ec201e5284714 Mon Sep 17 00:00:00 2001
From: jgyllinsky <jgyllinsky@users.noreply.github.com>
Date: Fri, 14 Apr 2017 03:11:59 -0400
Subject: [PATCH 234/264] [docs] added apt command to install OpenBLAS (#4718)

---
 docs/install_apt.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/install_apt.md b/docs/install_apt.md
index bc1566b0be9..ee2cd287701 100644
--- a/docs/install_apt.md
+++ b/docs/install_apt.md
@@ -14,7 +14,7 @@ The NVIDIA package tends to follow more recent library and driver versions, but
 If installing from packages, install the library and latest driver separately; the driver bundled with the library is usually out-of-date.
 This can be skipped for CPU-only installation.
 
-**BLAS**: install ATLAS by `sudo apt-get install libatlas-base-dev` or install OpenBLAS or MKL for better CPU performance.
+**BLAS**: install ATLAS by `sudo apt-get install libatlas-base-dev` or install OpenBLAS by `sudo apt-get install libopenblas-dev` or MKL for better CPU performance.
 
 **Python** (optional): if you use the default Python you will need to `sudo apt-get install` the `python-dev` package to have the Python headers for building the pycaffe interface.
 

From 80073497045d3101492a28a8a2c87dff65d64ff4 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Fri, 14 Apr 2017 10:17:52 -0700
Subject: [PATCH 235/264] fix lint errors that snuck in by #4566

---
 src/caffe/test/test_gradient_based_solver.cpp | 12 ++++++++----
 src/caffe/test/test_neuron_layer.cpp          |  9 ++++++---
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/src/caffe/test/test_gradient_based_solver.cpp b/src/caffe/test/test_gradient_based_solver.cpp
index 05cab909798..f4395f5311c 100644
--- a/src/caffe/test/test_gradient_based_solver.cpp
+++ b/src/caffe/test/test_gradient_based_solver.cpp
@@ -558,9 +558,11 @@ class GradientBasedSolverTest : public MultiDeviceTest<TypeParam> {
     const vector<Blob<Dtype>*>& params = solver_->net()->learnable_params();
     for (int i = 0; i < params.size(); ++i) {
       for (int j = 0; j < params[i]->count(); ++j) {
-        EXPECT_FLOAT_EQ(param_copies[i]->cpu_data()[j], params[i]->cpu_data()[j])
+        EXPECT_FLOAT_EQ(param_copies[i]->cpu_data()[j],
+            params[i]->cpu_data()[j])
             << "param " << i << " data differed at dim " << j;
-        EXPECT_FLOAT_EQ(param_copies[i]->cpu_diff()[j], params[i]->cpu_diff()[j])
+        EXPECT_FLOAT_EQ(param_copies[i]->cpu_diff()[j],
+            params[i]->cpu_diff()[j])
             << "param " << i << " diff differed at dim " << j;
       }
     }
@@ -569,9 +571,11 @@ class GradientBasedSolverTest : public MultiDeviceTest<TypeParam> {
     const vector<shared_ptr<Blob<Dtype> > >& history = solver_->history();
     for (int i = 0; i < history.size(); ++i) {
       for (int j = 0; j < history[i]->count(); ++j) {
-        EXPECT_FLOAT_EQ(history_copies[i]->cpu_data()[j], history[i]->cpu_data()[j])
+        EXPECT_FLOAT_EQ(history_copies[i]->cpu_data()[j],
+            history[i]->cpu_data()[j])
             << "history blob " << i << " data differed at dim " << j;
-        EXPECT_FLOAT_EQ(history_copies[i]->cpu_diff()[j], history[i]->cpu_diff()[j])
+        EXPECT_FLOAT_EQ(history_copies[i]->cpu_diff()[j],
+            history[i]->cpu_diff()[j])
             << "history blob " << i << " diff differed at dim " << j;
       }
     }
diff --git a/src/caffe/test/test_neuron_layer.cpp b/src/caffe/test/test_neuron_layer.cpp
index 57bd47b3a2e..180871a29ee 100644
--- a/src/caffe/test/test_neuron_layer.cpp
+++ b/src/caffe/test/test_neuron_layer.cpp
@@ -791,13 +791,16 @@ TYPED_TEST(NeuronLayerTest, TestPReLUInPlace) {
   ip2.Backward(blob_middle_vec_2, propagate_down, blob_bottom_vec_2);
   // Check numbers
   for (int s = 0; s < blob_bottom_2->count(); ++s) {
-    EXPECT_FLOAT_EQ(this->blob_bottom_->cpu_diff()[s], blob_bottom_2->cpu_diff()[s]);
+    EXPECT_FLOAT_EQ(this->blob_bottom_->cpu_diff()[s],
+        blob_bottom_2->cpu_diff()[s]);
   }
   for (int s = 0; s < ip.blobs()[0]->count(); ++s) {
-    EXPECT_FLOAT_EQ(ip.blobs()[0]->cpu_diff()[s], ip2.blobs()[0]->cpu_diff()[s]);
+    EXPECT_FLOAT_EQ(ip.blobs()[0]->cpu_diff()[s],
+        ip2.blobs()[0]->cpu_diff()[s]);
   }
   for (int s = 0; s < ip.blobs()[1]->count(); ++s) {
-    EXPECT_FLOAT_EQ(ip.blobs()[1]->cpu_diff()[s], ip2.blobs()[1]->cpu_diff()[s]);
+    EXPECT_FLOAT_EQ(ip.blobs()[1]->cpu_diff()[s],
+        ip2.blobs()[1]->cpu_diff()[s]);
   }
   for (int s = 0; s < prelu.blobs()[0]->count(); ++s) {
     EXPECT_FLOAT_EQ(prelu.blobs()[0]->cpu_diff()[s],

From 4db619aec9cd384b11a1c55fac257d14b704bb15 Mon Sep 17 00:00:00 2001
From: Cyprien Noel <cyprien.noel@gmail.com>
Date: Fri, 14 Apr 2017 12:30:50 -0700
Subject: [PATCH 236/264] Docker update to cuDNN 6

---
 docker/cpu/Dockerfile | 3 ++-
 docker/gpu/Dockerfile | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/docker/cpu/Dockerfile b/docker/cpu/Dockerfile
index af6c03c6589..67e2e61bd57 100644
--- a/docker/cpu/Dockerfile
+++ b/docker/cpu/Dockerfile
@@ -28,7 +28,8 @@ ENV CAFFE_ROOT=/opt/caffe
 WORKDIR $CAFFE_ROOT
 
 # FIXME: use ARG instead of ENV once DockerHub supports this
-ENV CLONE_TAG=rc4
+# https://github.com/docker/hub-feedback/issues/460
+ENV CLONE_TAG=1.0
 
 RUN git clone -b ${CLONE_TAG} --depth 1 https://github.com/BVLC/caffe.git . && \
     pip install --upgrade pip && \
diff --git a/docker/gpu/Dockerfile b/docker/gpu/Dockerfile
index 0785b10f1e7..dcdbdf326fb 100644
--- a/docker/gpu/Dockerfile
+++ b/docker/gpu/Dockerfile
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:8.0-cudnn5-devel-ubuntu16.04
+FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu16.04
 LABEL maintainer caffe-maint@googlegroups.com
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
@@ -28,7 +28,8 @@ ENV CAFFE_ROOT=/opt/caffe
 WORKDIR $CAFFE_ROOT
 
 # FIXME: use ARG instead of ENV once DockerHub supports this
-ENV CLONE_TAG=rc4
+# https://github.com/docker/hub-feedback/issues/460
+ENV CLONE_TAG=1.0
 
 RUN git clone -b ${CLONE_TAG} --depth 1 https://github.com/BVLC/caffe.git . && \
     pip install --upgrade pip && \

From 44da39f662a24de746fa83b92bd670fe41b3a7da Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Fri, 14 Apr 2017 12:36:41 -0700
Subject: [PATCH 237/264] BVLC -> BAIR

Berkeley AI Research (BAIR) is the the successor to
the Berkeley Vision and Learning Center (BVLC).
---
 CONTRIBUTORS.md                               |  2 +-
 README.md                                     |  6 +++---
 docs/_layouts/default.html                    |  2 +-
 docs/development.md                           |  4 ++--
 docs/index.md                                 | 10 +++++-----
 docs/model_zoo.md                             | 18 +++++++++---------
 docs/multigpu.md                              |  4 ++--
 docs/performance_hardware.md                  |  2 +-
 docs/tutorial/interfaces.md                   |  4 ++--
 examples/finetune_flickr_style/readme.md      |  2 +-
 models/bvlc_alexnet/readme.md                 |  2 +-
 models/bvlc_googlenet/readme.md               |  2 +-
 models/bvlc_reference_caffenet/readme.md      |  2 +-
 models/bvlc_reference_rcnn_ilsvrc13/readme.md |  2 +-
 14 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index 8db66ea82c6..3fd767812e9 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -1,6 +1,6 @@
 # Contributors
 
-Caffe is developed by a core set of BVLC members and the open-source community.
+Caffe is developed by a core set of BAIR members and the open-source community.
 
 We thank all of our [contributors](https://github.com/BVLC/caffe/graphs/contributors)!
 
diff --git a/README.md b/README.md
index 44b9e62c157..0ae3616b4a6 100644
--- a/README.md
+++ b/README.md
@@ -4,13 +4,13 @@
 [![License](https://img.shields.io/badge/license-BSD-blue.svg)](LICENSE)
 
 Caffe is a deep learning framework made with expression, speed, and modularity in mind.
-It is developed by the Berkeley Vision and Learning Center ([BVLC](http://bvlc.eecs.berkeley.edu)) and community contributors.
+It is developed by Berkeley AI Research ([BAIR](http://bair.berkeley.edu))/The Berkeley Vision and Learning Center (BVLC) and community contributors.
 
 Check out the [project site](http://caffe.berkeleyvision.org) for all the details like
 
 - [DIY Deep Learning for Vision with Caffe](https://docs.google.com/presentation/d/1UeKXVgRvvxg9OUdh_UiC5G71UMscNPlvArsWER41PsU/edit#slide=id.p)
 - [Tutorial Documentation](http://caffe.berkeleyvision.org/tutorial/)
-- [BVLC reference models](http://caffe.berkeleyvision.org/model_zoo.html) and the [community model zoo](https://github.com/BVLC/caffe/wiki/Model-Zoo)
+- [BAIR reference models](http://caffe.berkeleyvision.org/model_zoo.html) and the [community model zoo](https://github.com/BVLC/caffe/wiki/Model-Zoo)
 - [Installation instructions](http://caffe.berkeleyvision.org/installation.html)
 
 and step-by-step examples.
@@ -25,7 +25,7 @@ Happy brewing!
 ## License and Citation
 
 Caffe is released under the [BSD 2-Clause license](https://github.com/BVLC/caffe/blob/master/LICENSE).
-The BVLC reference models are released for unrestricted use.
+The BAIR/BVLC reference models are released for unrestricted use.
 
 Please cite Caffe in your publications if it helps your research:
 
diff --git a/docs/_layouts/default.html b/docs/_layouts/default.html
index b8efe60bc3b..3799e95afde 100644
--- a/docs/_layouts/default.html
+++ b/docs/_layouts/default.html
@@ -36,7 +36,7 @@
       <header>
         <h1 class="header"><a href="/">Caffe</a></h1>
         <p class="header">
-          Deep learning framework by the <a class="header name" href="http://bvlc.eecs.berkeley.edu/">BVLC</a>
+          Deep learning framework by <a class="header name" href="http://bair.berkeley.edu/">BAIR</a>
         </p>
         <p class="header">
           Created by
diff --git a/docs/development.md b/docs/development.md
index 107c2c3b281..ec05bbee102 100644
--- a/docs/development.md
+++ b/docs/development.md
@@ -4,7 +4,7 @@ title: Developing and Contributing
 # Development and Contributing
 
 Caffe is developed with active participation of the community.<br>
-The [BVLC](http://bvlc.eecs.berkeley.edu/) brewers welcome all contributions!
+The [BAIR](http://bair.berkeley.edu/)/BVLC brewers welcome all contributions!
 
 The exact details of contributions are recorded by versioning and cited in our [acknowledgements](http://caffe.berkeleyvision.org/#acknowledgements).
 This method is impartial and always up-to-date.
@@ -37,7 +37,7 @@ We absolutely appreciate any contribution to this effort!
 
 The `master` branch receives all new development including community contributions.
 We try to keep it in a reliable state, but it is the bleeding edge, and things do get broken every now and then.
-BVLC maintainers will periodically make releases by marking stable checkpoints as tags and maintenance branches. [Past releases](https://github.com/BVLC/caffe/releases) are catalogued online.
+BAIR maintainers will periodically make releases by marking stable checkpoints as tags and maintenance branches. [Past releases](https://github.com/BVLC/caffe/releases) are catalogued online.
 
 #### Issues & Pull Request Protocol
 
diff --git a/docs/index.md b/docs/index.md
index 932b3b58d1d..302a7d56f88 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -5,7 +5,7 @@ title: Deep Learning Framework
 # Caffe
 
 Caffe is a deep learning framework made with expression, speed, and modularity in mind.
-It is developed by the Berkeley Vision and Learning Center ([BVLC](http://bvlc.eecs.berkeley.edu)) and by community contributors.
+It is developed by Berkeley AI Research ([BAIR](http://bair.berkeley.edu)) and by community contributors.
 [Yangqing Jia](http://daggerfs.com) created the project during his PhD at UC Berkeley.
 Caffe is released under the [BSD 2-Clause license](https://github.com/BVLC/caffe/blob/master/LICENSE).
 
@@ -45,7 +45,7 @@ A 4-page report for the ACM Multimedia Open Source competition (arXiv:1408.5093v
 - [Installation instructions](/installation.html)<br>
 Tested on Ubuntu, Red Hat, OS X.
 * [Model Zoo](/model_zoo.html)<br>
-BVLC suggests a standard distribution format for Caffe models, and provides trained models.
+BAIR suggests a standard distribution format for Caffe models, and provides trained models.
 * [Developing & Contributing](/development.html)<br>
 Guidelines for development and contributing to Caffe.
 * [API Documentation](/doxygen/annotated.html)<br>
@@ -92,9 +92,9 @@ The core Caffe developers offer [consulting services](mailto:caffe-coldpress@goo
 
 ## Acknowledgements
 
-The BVLC Caffe developers would like to thank NVIDIA for GPU donation, A9 and Amazon Web Services for a research grant in support of Caffe development and reproducible research in deep learning, and BVLC PI [Trevor Darrell](http://www.eecs.berkeley.edu/~trevor/) for guidance.
+The BAIR Caffe developers would like to thank NVIDIA for GPU donation, A9 and Amazon Web Services for a research grant in support of Caffe development and reproducible research in deep learning, and BAIR PI [Trevor Darrell](http://www.eecs.berkeley.edu/~trevor/) for guidance.
 
-The BVLC members who have contributed to Caffe are (alphabetical by first name):
+The BAIR members who have contributed to Caffe are (alphabetical by first name):
 [Eric Tzeng](https://github.com/erictzeng), [Evan Shelhamer](http://imaginarynumber.net/), [Jeff Donahue](http://jeffdonahue.com/), [Jon Long](https://github.com/longjon), [Ross Girshick](http://www.cs.berkeley.edu/~rbg/), [Sergey Karayev](http://sergeykarayev.com/), [Sergio Guadarrama](http://www.eecs.berkeley.edu/~sguada/), and [Yangqing Jia](http://daggerfs.com/).
 
 The open-source community plays an important and growing role in Caffe's development.
@@ -103,4 +103,4 @@ Check out the Github [project pulse](https://github.com/BVLC/caffe/pulse) for re
 We sincerely appreciate your interest and contributions!
 If you'd like to contribute, please read the [developing & contributing](development.html) guide.
 
-Yangqing would like to give a personal thanks to the NVIDIA Academic program for providing GPUs, [Oriol Vinyals](http://www1.icsi.berkeley.edu/~vinyals/) for discussions along the journey, and BVLC PI [Trevor Darrell](http://www.eecs.berkeley.edu/~trevor/) for advice.
+Yangqing would like to give a personal thanks to the NVIDIA Academic program for providing GPUs, [Oriol Vinyals](http://www1.icsi.berkeley.edu/~vinyals/) for discussions along the journey, and BAIR PI [Trevor Darrell](http://www.eecs.berkeley.edu/~trevor/) for advice.
diff --git a/docs/model_zoo.md b/docs/model_zoo.md
index 06dc0a49ec7..f9078718a8b 100644
--- a/docs/model_zoo.md
+++ b/docs/model_zoo.md
@@ -14,15 +14,15 @@ To help share these models, we introduce the model zoo framework:
 
 ## Where to get trained models
 
-First of all, we bundle BVLC-trained models for unrestricted, out of the box use.
+First of all, we bundle BAIR-trained models for unrestricted, out of the box use.
 <br>
-See the [BVLC model license](#bvlc-model-license) for details.
+See the [BAIR model license](#bair-model-license) for details.
 Each one of these can be downloaded by running `scripts/download_model_binary.py <dirname>` where `<dirname>` is specified below:
 
-- **BVLC Reference CaffeNet** in `models/bvlc_reference_caffenet`: AlexNet trained on ILSVRC 2012, with a minor variation from the version as described in [ImageNet classification with deep convolutional neural networks](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks) by Krizhevsky et al. in NIPS 2012. (Trained by Jeff Donahue @jeffdonahue)
-- **BVLC AlexNet** in `models/bvlc_alexnet`: AlexNet trained on ILSVRC 2012, almost exactly as described in [ImageNet classification with deep convolutional neural networks](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks) by Krizhevsky et al. in NIPS 2012. (Trained by Evan Shelhamer @shelhamer)
-- **BVLC Reference R-CNN ILSVRC-2013** in `models/bvlc_reference_rcnn_ilsvrc13`: pure Caffe implementation of [R-CNN](https://github.com/rbgirshick/rcnn) as described by Girshick et al. in CVPR 2014. (Trained by Ross Girshick @rbgirshick)
-- **BVLC GoogLeNet** in `models/bvlc_googlenet`: GoogLeNet trained on ILSVRC 2012, almost exactly as described in [Going Deeper with Convolutions](http://arxiv.org/abs/1409.4842) by Szegedy et al. in ILSVRC 2014. (Trained by Sergio Guadarrama @sguada)
+- **BAIR Reference CaffeNet** in `models/bvlc_reference_caffenet`: AlexNet trained on ILSVRC 2012, with a minor variation from the version as described in [ImageNet classification with deep convolutional neural networks](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks) by Krizhevsky et al. in NIPS 2012. (Trained by Jeff Donahue @jeffdonahue)
+- **BAIR AlexNet** in `models/bvlc_alexnet`: AlexNet trained on ILSVRC 2012, almost exactly as described in [ImageNet classification with deep convolutional neural networks](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks) by Krizhevsky et al. in NIPS 2012. (Trained by Evan Shelhamer @shelhamer)
+- **BAIR Reference R-CNN ILSVRC-2013** in `models/bvlc_reference_rcnn_ilsvrc13`: pure Caffe implementation of [R-CNN](https://github.com/rbgirshick/rcnn) as described by Girshick et al. in CVPR 2014. (Trained by Ross Girshick @rbgirshick)
+- **BAIR GoogLeNet** in `models/bvlc_googlenet`: GoogLeNet trained on ILSVRC 2012, almost exactly as described in [Going Deeper with Convolutions](http://arxiv.org/abs/1409.4842) by Szegedy et al. in ILSVRC 2014. (Trained by Sergio Guadarrama @sguada)
 
 **Community models** made by Caffe users are posted to a publicly editable [wiki page](https://github.com/BVLC/caffe/wiki/Model-Zoo).
 These models are subject to conditions of their respective authors such as citation and license.
@@ -55,14 +55,14 @@ Downloading model info is done just as easily with `scripts/download_model_from_
 ### Hosting trained models
 
 It is up to the user where to host the `.caffemodel` file.
-We host our BVLC-provided models on our own server.
+We host our BAIR-provided models on our own server.
 Dropbox also works fine (tip: make sure that `?dl=1` is appended to the end of the URL).
 
 `scripts/download_model_binary.py <dirname>` downloads the `.caffemodel` from the URL specified in the `<dirname>/readme.md` frontmatter and confirms SHA1.
 
-## BVLC model license
+## BAIR model license
 
-The Caffe models bundled by the BVLC are released for unrestricted use.
+The Caffe models bundled by the BAIR are released for unrestricted use.
 
 These models are trained on data from the [ImageNet project](http://www.image-net.org/) and training data includes internet photos that may be subject to copyright.
 
diff --git a/docs/multigpu.md b/docs/multigpu.md
index d91acef980d..e04ebb0b7c8 100644
--- a/docs/multigpu.md
+++ b/docs/multigpu.md
@@ -13,7 +13,7 @@ The GPUs to be used for training can be set with the "-gpu" flag on the command
 # Hardware Configuration Assumptions
 
 The current implementation uses a tree reduction strategy.  e.g. if there are 4 GPUs in the system, 0:1, 2:3 will exchange gradients, then 0:2 (top of the tree) will exchange gradients, 0 will calculate
-updated model, 0\-\>2, and then 0\-\>1, 2\-\>3. 
+updated model, 0\-\>2, and then 0\-\>1, 2\-\>3.
 
 For best performance, P2P DMA access between devices is needed. Without P2P access, for example crossing PCIe root complex, data is copied through host and effective exchange bandwidth is greatly reduced.
 
@@ -23,4 +23,4 @@ Current implementation has a "soft" assumption that the devices being used are h
 
 # Scaling Performance
 
-Performance is **heavily** dependent on the PCIe topology of the system, the configuration of the neural network you are training, and the speed of each of the layers.  Systems like the DIGITS DevBox have an optimized PCIe topology (X99-E WS chipset).  In general, scaling on 2 GPUs tends to be ~1.8X on average for networks like AlexNet, CaffeNet, VGG, GoogleNet.  4 GPUs begins to have falloff in scaling.  Generally with "weak scaling" where the batchsize increases with the number of GPUs you will see 3.5x scaling or so.  With "strong scaling", the system can become communication bound, especially with layer performance optimizations like those in [cuDNNv3](http://nvidia.com/cudnn), and you will likely see closer to mid 2.x scaling in performance.  Networks that have heavy computation compared to the number of parameters tend to have the best scaling performance.
\ No newline at end of file
+Performance is **heavily** dependent on the PCIe topology of the system, the configuration of the neural network you are training, and the speed of each of the layers.  Systems like the DIGITS DevBox have an optimized PCIe topology (X99-E WS chipset).  In general, scaling on 2 GPUs tends to be ~1.8X on average for networks like AlexNet, CaffeNet, VGG, GoogleNet.  4 GPUs begins to have falloff in scaling.  Generally with "weak scaling" where the batchsize increases with the number of GPUs you will see 3.5x scaling or so.  With "strong scaling", the system can become communication bound, especially with layer performance optimizations like those in [cuDNNv3](http://nvidia.com/cudnn), and you will likely see closer to mid 2.x scaling in performance.  Networks that have heavy computation compared to the number of parameters tend to have the best scaling performance.
diff --git a/docs/performance_hardware.md b/docs/performance_hardware.md
index cdd4b361dea..fbf256842f1 100644
--- a/docs/performance_hardware.md
+++ b/docs/performance_hardware.md
@@ -8,7 +8,7 @@ To measure performance on different NVIDIA GPUs we use CaffeNet, the Caffe refer
 
 For training, each time point is 20 iterations/minibatches of 256 images for 5,120 images total. For testing, a 50,000 image validation set is classified.
 
-**Acknowledgements**: BVLC members are very grateful to NVIDIA for providing several GPUs to conduct this research.
+**Acknowledgements**: BAIR members are very grateful to NVIDIA for providing several GPUs to conduct this research.
 
 ## NVIDIA K40
 
diff --git a/docs/tutorial/interfaces.md b/docs/tutorial/interfaces.md
index d7ff378239d..b5a4f1ad069 100644
--- a/docs/tutorial/interfaces.md
+++ b/docs/tutorial/interfaces.md
@@ -91,7 +91,7 @@ In MatCaffe, you can
 * Run for a certain number of iterations and give back control to Matlab
 * Intermingle arbitrary Matlab code with gradient steps
 
-An ILSVRC image classification demo is in caffe/matlab/demo/classification_demo.m (you need to download BVLC CaffeNet from [Model Zoo](http://caffe.berkeleyvision.org/model_zoo.html) to run it).
+An ILSVRC image classification demo is in caffe/matlab/demo/classification_demo.m (you need to download BAIR CaffeNet from [Model Zoo](http://caffe.berkeleyvision.org/model_zoo.html) to run it).
 
 ### Build MatCaffe
 
@@ -114,7 +114,7 @@ You can save your Matlab search PATH by running `savepath` so that you don't hav
 
 MatCaffe is very similar to PyCaffe in usage.
 
-Examples below shows detailed usages and assumes you have downloaded BVLC CaffeNet from [Model Zoo](http://caffe.berkeleyvision.org/model_zoo.html) and started `matlab` from caffe root folder.
+Examples below shows detailed usages and assumes you have downloaded BAIR CaffeNet from [Model Zoo](http://caffe.berkeleyvision.org/model_zoo.html) and started `matlab` from caffe root folder.
 
     model = './models/bvlc_reference_caffenet/deploy.prototxt';
     weights = './models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel';
diff --git a/examples/finetune_flickr_style/readme.md b/examples/finetune_flickr_style/readme.md
index 188dedf1b9a..dacfd01c8e1 100644
--- a/examples/finetune_flickr_style/readme.md
+++ b/examples/finetune_flickr_style/readme.md
@@ -9,7 +9,7 @@ priority: 5
 # Fine-tuning CaffeNet for Style Recognition on "Flickr Style" Data
 
 Fine-tuning takes an already learned model, adapts the architecture, and resumes training from the already learned model weights.
-Let's fine-tune the BVLC-distributed CaffeNet model on a different dataset, [Flickr Style](http://sergeykarayev.com/files/1311.3715v3.pdf), to predict image style instead of object category.
+Let's fine-tune the BAIR-distributed CaffeNet model on a different dataset, [Flickr Style](http://sergeykarayev.com/files/1311.3715v3.pdf), to predict image style instead of object category.
 
 ## Explanation
 
diff --git a/models/bvlc_alexnet/readme.md b/models/bvlc_alexnet/readme.md
index 008d690f7f4..a83e3d4e27c 100644
--- a/models/bvlc_alexnet/readme.md
+++ b/models/bvlc_alexnet/readme.md
@@ -1,5 +1,5 @@
 ---
-name: BVLC AlexNet Model
+name: BAIR/BVLC AlexNet Model
 caffemodel: bvlc_alexnet.caffemodel
 caffemodel_url: http://dl.caffe.berkeleyvision.org/bvlc_alexnet.caffemodel
 license: unrestricted
diff --git a/models/bvlc_googlenet/readme.md b/models/bvlc_googlenet/readme.md
index 061b6d74530..ef04db62ab2 100644
--- a/models/bvlc_googlenet/readme.md
+++ b/models/bvlc_googlenet/readme.md
@@ -1,5 +1,5 @@
 ---
-name: BVLC GoogleNet Model
+name: BAIR/BVLC GoogleNet Model
 caffemodel: bvlc_googlenet.caffemodel
 caffemodel_url: http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel
 license: unrestricted
diff --git a/models/bvlc_reference_caffenet/readme.md b/models/bvlc_reference_caffenet/readme.md
index 671e47a5056..5352e536a07 100644
--- a/models/bvlc_reference_caffenet/readme.md
+++ b/models/bvlc_reference_caffenet/readme.md
@@ -1,5 +1,5 @@
 ---
-name: BVLC CaffeNet Model
+name: BAIR/BVLC CaffeNet Model
 caffemodel: bvlc_reference_caffenet.caffemodel
 caffemodel_url: http://dl.caffe.berkeleyvision.org/bvlc_reference_caffenet.caffemodel
 license: unrestricted
diff --git a/models/bvlc_reference_rcnn_ilsvrc13/readme.md b/models/bvlc_reference_rcnn_ilsvrc13/readme.md
index 9a11a24d8f8..12543b2bd2c 100644
--- a/models/bvlc_reference_rcnn_ilsvrc13/readme.md
+++ b/models/bvlc_reference_rcnn_ilsvrc13/readme.md
@@ -1,5 +1,5 @@
 ---
-name: BVLC Reference RCNN ILSVRC13 Model
+name: BAIR/BVLC Reference RCNN ILSVRC13 Model
 caffemodel: bvlc_reference_rcnn_ilsvrc13.caffemodel
 caffemodel_url: http://dl.caffe.berkeleyvision.org/bvlc_reference_rcnn_ilsvrc13.caffemodel
 license: unrestricted

From 3562698afb4b1f12f51eca752740e279f85714c4 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Fri, 14 Apr 2017 12:45:21 -0700
Subject: [PATCH 238/264] drop performance + hardware page and switch to sheet

simpler to read and update
---
 docs/index.md                |  9 +++--
 docs/performance_hardware.md | 73 ------------------------------------
 2 files changed, 5 insertions(+), 77 deletions(-)
 delete mode 100644 docs/performance_hardware.md

diff --git a/docs/index.md b/docs/index.md
index 302a7d56f88..bbfd91fc7b9 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -23,15 +23,14 @@ Thanks to these contributors the framework tracks the state-of-the-art in both c
 
 **Speed** makes Caffe perfect for research experiments and industry deployment.
 Caffe can process **over 60M images per day** with a single NVIDIA K40 GPU\*.
-That's 1 ms/image for inference and 4 ms/image for learning.
-We believe that Caffe is the fastest convnet implementation available.
+That's 1 ms/image for inference and 4 ms/image for learning and more recent library versions and hardware are faster still.
+We believe that Caffe is among the fastest convnet implementations available.
 
 **Community**: Caffe already powers academic research projects, startup prototypes, and even large-scale industrial applications in vision, speech, and multimedia.
 Join our community of brewers on the [caffe-users group](https://groups.google.com/forum/#!forum/caffe-users) and [Github](https://github.com/BVLC/caffe/).
 
 <p class="footnote" markdown="1">
-\* With the ILSVRC2012-winning [SuperVision](http://www.image-net.org/challenges/LSVRC/2012/supervision.pdf) model and caching IO.
-Consult performance [details](/performance_hardware.html).
+\* With the ILSVRC2012-winning [SuperVision](http://www.image-net.org/challenges/LSVRC/2012/supervision.pdf) model and prefetching IO.
 </p>
 
 ## Documentation
@@ -50,6 +49,8 @@ BAIR suggests a standard distribution format for Caffe models, and provides trai
 Guidelines for development and contributing to Caffe.
 * [API Documentation](/doxygen/annotated.html)<br>
 Developer documentation automagically generated from code comments.
+* [Benchmarking](https://docs.google.com/spreadsheets/d/1Yp4rqHpT7mKxOPbpzYeUfEFLnELDAgxSSBQKp5uKDGQ/edit#gid=0)<br>
+Comparison of inference and learning for different networks and GPUs.
 
 ### Examples
 
diff --git a/docs/performance_hardware.md b/docs/performance_hardware.md
deleted file mode 100644
index fbf256842f1..00000000000
--- a/docs/performance_hardware.md
+++ /dev/null
@@ -1,73 +0,0 @@
----
-title: Performance and Hardware Configuration
----
-
-# Performance and Hardware Configuration
-
-To measure performance on different NVIDIA GPUs we use CaffeNet, the Caffe reference ImageNet model.
-
-For training, each time point is 20 iterations/minibatches of 256 images for 5,120 images total. For testing, a 50,000 image validation set is classified.
-
-**Acknowledgements**: BAIR members are very grateful to NVIDIA for providing several GPUs to conduct this research.
-
-## NVIDIA K40
-
-Performance is best with ECC off and boost clock enabled. While ECC makes a negligible difference in speed, disabling it frees ~1 GB of GPU memory.
-
-Best settings with ECC off and maximum clock speed in standard Caffe:
-
-* Training is 26.5 secs / 20 iterations (5,120 images)
-* Testing is 100 secs / validation set (50,000 images)
-
-Best settings with Caffe + [cuDNN acceleration](http://nvidia.com/cudnn):
-
-* Training is 19.2 secs / 20 iterations (5,120 images)
-* Testing is 60.7 secs / validation set (50,000 images)
-
-Other settings:
-
-* ECC on, max speed: training 26.7 secs / 20 iterations, test 101 secs / validation set
-* ECC on, default speed: training 31 secs / 20 iterations, test 117 secs / validation set
-* ECC off, default speed: training 31 secs / 20 iterations, test 118 secs / validation set
-
-### K40 configuration tips
-
-For maximum K40 performance, turn off ECC and boost the clock speed (at your own risk).
-
-To turn off ECC, do
-
-    sudo nvidia-smi -i 0 --ecc-config=0    # repeat with -i x for each GPU ID
-
-then reboot.
-
-Set the "persistence" mode of the GPU settings by
-
-    sudo nvidia-smi -pm 1
-
-and then set the clock speed with
-
-    sudo nvidia-smi -i 0 -ac 3004,875    # repeat with -i x for each GPU ID
-
-but note that this configuration resets across driver reloading / rebooting. Include these commands in a boot script to initialize these settings. For a simple fix, add these commands to `/etc/rc.local` (on Ubuntu).
-
-## NVIDIA Titan
-
-Training: 26.26 secs / 20 iterations (5,120 images).
-Testing: 100 secs / validation set (50,000 images).
-
-cuDNN Training: 20.25 secs / 20 iterations (5,120 images).
-cuDNN Testing: 66.3 secs / validation set (50,000 images).
-
-
-## NVIDIA K20
-
-Training: 36.0 secs / 20 iterations (5,120 images).
-Testing: 133 secs / validation set (50,000 images).
-
-## NVIDIA GTX 770
-
-Training: 33.0 secs / 20 iterations (5,120 images).
-Testing: 129 secs / validation set (50,000 images).
-
-cuDNN Training: 24.3 secs / 20 iterations (5,120 images).
-cuDNN Testing: 104 secs / validation set (50,000 images).

From 0f5bfc34e0b37b9ab3437d6755eb04a8dc9e8656 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Fri, 14 Apr 2017 12:46:56 -0700
Subject: [PATCH 239/264] favor notebook examples as more clear and popular

---
 docs/index.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/index.md b/docs/index.md
index bbfd91fc7b9..82eb059e325 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -52,13 +52,6 @@ Developer documentation automagically generated from code comments.
 * [Benchmarking](https://docs.google.com/spreadsheets/d/1Yp4rqHpT7mKxOPbpzYeUfEFLnELDAgxSSBQKp5uKDGQ/edit#gid=0)<br>
 Comparison of inference and learning for different networks and GPUs.
 
-### Examples
-
-{% assign examples = site.pages | where:'category','example' | sort: 'priority' %}
-{% for page in examples %}
-- <div><a href="{{page.url}}">{{page.title}}</a><br>{{page.description}}</div>
-{% endfor %}
-
 ### Notebook Examples
 
 {% assign notebooks = site.pages | where:'category','notebook' | sort: 'priority' %}
@@ -66,6 +59,13 @@ Comparison of inference and learning for different networks and GPUs.
 - <div><a href="http://nbviewer.ipython.org/github/BVLC/caffe/blob/master/{{page.original_path}}">{{page.title}}</a><br>{{page.description}}</div>
 {% endfor %}
 
+### Command Line Examples
+
+{% assign examples = site.pages | where:'category','example' | sort: 'priority' %}
+{% for page in examples %}
+- <div><a href="{{page.url}}">{{page.title}}</a><br>{{page.description}}</div>
+{% endfor %}
+
 ## Citing Caffe
 
 Please cite Caffe in your publications if it helps your research:

From 2158bbb2151049dec2486b720c0a351164a0eb6b Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Fri, 14 Apr 2017 12:50:19 -0700
Subject: [PATCH 240/264] model zoo: point out wiki link immediately, explain
 manual editing

---
 docs/model_zoo.md | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/model_zoo.md b/docs/model_zoo.md
index f9078718a8b..3f77e82572c 100644
--- a/docs/model_zoo.md
+++ b/docs/model_zoo.md
@@ -3,7 +3,7 @@ title: Model Zoo
 ---
 # Caffe Model Zoo
 
-Lots of researchers and engineers have made Caffe models for different tasks with all kinds of architectures and data.
+Lots of researchers and engineers have made Caffe models for different tasks with all kinds of architectures and data: check out the [model zoo](https://github.com/BVLC/caffe/wiki/Model-Zoo)!
 These models are learned and applied for problems ranging from simple regression, to large-scale visual classification, to Siamese networks for image similarity, to speech and robotics applications.
 
 To help share these models, we introduce the model zoo framework:
@@ -24,7 +24,7 @@ Each one of these can be downloaded by running `scripts/download_model_binary.py
 - **BAIR Reference R-CNN ILSVRC-2013** in `models/bvlc_reference_rcnn_ilsvrc13`: pure Caffe implementation of [R-CNN](https://github.com/rbgirshick/rcnn) as described by Girshick et al. in CVPR 2014. (Trained by Ross Girshick @rbgirshick)
 - **BAIR GoogLeNet** in `models/bvlc_googlenet`: GoogLeNet trained on ILSVRC 2012, almost exactly as described in [Going Deeper with Convolutions](http://arxiv.org/abs/1409.4842) by Szegedy et al. in ILSVRC 2014. (Trained by Sergio Guadarrama @sguada)
 
-**Community models** made by Caffe users are posted to a publicly editable [wiki page](https://github.com/BVLC/caffe/wiki/Model-Zoo).
+**Community models** made by Caffe users are posted to a publicly editable [model zoo wiki page](https://github.com/BVLC/caffe/wiki/Model-Zoo).
 These models are subject to conditions of their respective authors such as citation and license.
 Thank you for sharing your models!
 
@@ -42,6 +42,8 @@ A caffe model is distributed as a directory containing:
     - License information.
 - [optional] Other helpful scripts.
 
+This simple format can be handled through bundled scripts or manually if need be.
+
 ### Hosting model info
 
 Github Gist is a good format for model info distribution because it can contain multiple files, is versionable, and has in-browser syntax highlighting and markdown rendering.

From 414b74c06038c17924745b68954ef10827fe1edd Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Fri, 14 Apr 2017 13:19:53 -0700
Subject: [PATCH 241/264] add missing names to BAIR roster

---
 docs/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/index.md b/docs/index.md
index 82eb059e325..db8eaffbe34 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -96,7 +96,7 @@ The core Caffe developers offer [consulting services](mailto:caffe-coldpress@goo
 The BAIR Caffe developers would like to thank NVIDIA for GPU donation, A9 and Amazon Web Services for a research grant in support of Caffe development and reproducible research in deep learning, and BAIR PI [Trevor Darrell](http://www.eecs.berkeley.edu/~trevor/) for guidance.
 
 The BAIR members who have contributed to Caffe are (alphabetical by first name):
-[Eric Tzeng](https://github.com/erictzeng), [Evan Shelhamer](http://imaginarynumber.net/), [Jeff Donahue](http://jeffdonahue.com/), [Jon Long](https://github.com/longjon), [Ross Girshick](http://www.cs.berkeley.edu/~rbg/), [Sergey Karayev](http://sergeykarayev.com/), [Sergio Guadarrama](http://www.eecs.berkeley.edu/~sguada/), and [Yangqing Jia](http://daggerfs.com/).
+[Carl Doersch](http://www.carldoersch.com/), [Eric Tzeng](https://github.com/erictzeng), [Evan Shelhamer](http://imaginarynumber.net/), [Jeff Donahue](http://jeffdonahue.com/), [Jon Long](https://github.com/longjon), [Philipp Krähenbühl](http://www.philkr.net/), [Ronghang Hu](http://ronghanghu.com/), [Ross Girshick](http://www.cs.berkeley.edu/~rbg/), [Sergey Karayev](http://sergeykarayev.com/), [Sergio Guadarrama](http://www.eecs.berkeley.edu/~sguada/), [Takuya Narihira](https://github.com/tnarihi), and [Yangqing Jia](http://daggerfs.com/).
 
 The open-source community plays an important and growing role in Caffe's development.
 Check out the Github [project pulse](https://github.com/BVLC/caffe/pulse) for recent activity and the [contributors](https://github.com/BVLC/caffe/graphs/contributors) for the full list.

From e90a6a6ca29423afb15f39adb1157bff9e6f8655 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Fri, 14 Apr 2017 13:24:30 -0700
Subject: [PATCH 242/264] retire caffe-dev and caffe-coldpress

dev has diffused into the community from the original Caffe core
---
 docs/index.md | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/docs/index.md b/docs/index.md
index db8eaffbe34..0e21ae821b0 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -86,11 +86,6 @@ Join the [caffe-users group](https://groups.google.com/forum/#!forum/caffe-users
 
 Framework development discussions and thorough bug reports are collected on [Issues](https://github.com/BVLC/caffe/issues).
 
-Contact [caffe-dev](mailto:caffe-dev@googlegroups.com) if you have a confidential proposal for the framework *and the ability to act on it*.
-Requests for features, explanations, or personal help will be ignored; post to [caffe-users](https://groups.google.com/forum/#!forum/caffe-users) instead.
-
-The core Caffe developers offer [consulting services](mailto:caffe-coldpress@googlegroups.com) for appropriate projects.
-
 ## Acknowledgements
 
 The BAIR Caffe developers would like to thank NVIDIA for GPU donation, A9 and Amazon Web Services for a research grant in support of Caffe development and reproducible research in deep learning, and BAIR PI [Trevor Darrell](http://www.eecs.berkeley.edu/~trevor/) for guidance.

From 8985818e4fbb5fc207e4f383c63c28d80fd286f2 Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Fri, 14 Apr 2017 13:28:24 -0700
Subject: [PATCH 243/264] track publications by google scholar and not the wiki

---
 docs/index.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/docs/index.md b/docs/index.md
index 0e21ae821b0..3385747c565 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -77,8 +77,7 @@ Please cite Caffe in your publications if it helps your research:
       Year = {2014}
     }
 
-If you do publish a paper where Caffe helped your research, we encourage you to update the [publications wiki](https://github.com/BVLC/caffe/wiki/Publications).
-Citations are also tracked automatically by [Google Scholar](http://scholar.google.com/scholar?oi=bibs&hl=en&cites=17333247995453974016).
+If you do publish a paper where Caffe helped your research, we encourage you to cite the framework for tracking by [Google Scholar](https://scholar.google.com/citations?view_op=view_citation&hl=en&citation_for_view=-ltRSM0AAAAJ:u5HHmVD_uO8C).
 
 ## Contacting Us
 

From 8b8f2dd40ba87543f066cb157c6d65dd8187253f Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Fri, 14 Apr 2017 15:26:30 -0700
Subject: [PATCH 244/264] link to new full-day crash course

---
 docs/index.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/index.md b/docs/index.md
index 3385747c565..b633f7cfddc 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -35,8 +35,8 @@ Join our community of brewers on the [caffe-users group](https://groups.google.c
 
 ## Documentation
 
-- [DIY Deep Learning for Vision with Caffe](https://docs.google.com/presentation/d/1UeKXVgRvvxg9OUdh_UiC5G71UMscNPlvArsWER41PsU/edit#slide=id.p)<br>
-Tutorial presentation.
+- [DIY Deep Learning for Vision with Caffe](https://docs.google.com/presentation/d/1UeKXVgRvvxg9OUdh_UiC5G71UMscNPlvArsWER41PsU/edit#slide=id.p) and [Caffe in a Day](https://docs.google.com/presentation/d/1HxGdeq8MPktHaPb-rlmYYQ723iWzq9ur6Gjo71YiG0Y/edit#slide=id.gc2fcdcce7_216_0)<br>
+Tutorial presentation of the framework and a full-day crash course.
 - [Tutorial Documentation](/tutorial)<br>
 Practical guide and framework reference.
 - [arXiv / ACM MM '14 paper](http://arxiv.org/abs/1408.5093)<br>

From 49761d34d18b7063af995b13ecca0fee1bdaf02c Mon Sep 17 00:00:00 2001
From: Evan Shelhamer <shelhamer@imaginarynumber.net>
Date: Fri, 14 Apr 2017 15:32:50 -0700
Subject: [PATCH 245/264] Caffe 1.0

---
 CMakeLists.txt | 4 ++--
 Makefile       | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c52ff466471..08f56a33a59 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,8 +10,8 @@ endif()
 project(Caffe C CXX)
 
 # ---[ Caffe version
-set(CAFFE_TARGET_VERSION "1.0.0-rc5" CACHE STRING "Caffe logical version")
-set(CAFFE_TARGET_SOVERSION "1.0.0-rc5" CACHE STRING "Caffe soname version")
+set(CAFFE_TARGET_VERSION "1.0.0" CACHE STRING "Caffe logical version")
+set(CAFFE_TARGET_SOVERSION "1.0.0" CACHE STRING "Caffe soname version")
 add_definitions(-DCAFFE_VERSION=${CAFFE_TARGET_VERSION})
 
 # ---[ Using cmake scripts and modules
diff --git a/Makefile b/Makefile
index 77900b69b97..4d324160c08 100644
--- a/Makefile
+++ b/Makefile
@@ -34,7 +34,7 @@ LIB_BUILD_DIR := $(BUILD_DIR)/lib
 STATIC_NAME := $(LIB_BUILD_DIR)/lib$(LIBRARY_NAME).a
 DYNAMIC_VERSION_MAJOR 		:= 1
 DYNAMIC_VERSION_MINOR 		:= 0
-DYNAMIC_VERSION_REVISION 	:= 0-rc5
+DYNAMIC_VERSION_REVISION 	:= 0
 DYNAMIC_NAME_SHORT := lib$(LIBRARY_NAME).so
 #DYNAMIC_SONAME_SHORT := $(DYNAMIC_NAME_SHORT).$(DYNAMIC_VERSION_MAJOR)
 DYNAMIC_VERSIONED_NAME_SHORT := $(DYNAMIC_NAME_SHORT).$(DYNAMIC_VERSION_MAJOR).$(DYNAMIC_VERSION_MINOR).$(DYNAMIC_VERSION_REVISION)

From 33f86122970392fcda19ef80ed5cd349279b896d Mon Sep 17 00:00:00 2001
From: Eric Tzeng <etzeng@pinterest.com>
Date: Tue, 18 Apr 2017 18:22:38 -0700
Subject: [PATCH 246/264] Rewrite crop cuda kernel

---
 include/caffe/layers/crop_layer.hpp |   6 +-
 src/caffe/layers/crop_layer.cpp     |  21 +++--
 src/caffe/layers/crop_layer.cu      | 122 +++++++++++-----------------
 3 files changed, 69 insertions(+), 80 deletions(-)

diff --git a/include/caffe/layers/crop_layer.hpp b/include/caffe/layers/crop_layer.hpp
index c4fda1220c3..5219fa5cb5f 100644
--- a/include/caffe/layers/crop_layer.hpp
+++ b/include/caffe/layers/crop_layer.hpp
@@ -41,13 +41,15 @@ class CropLayer : public Layer<Dtype> {
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 
-  vector<int> offsets;
+  Blob<int> offsets;
+  Blob<int> src_strides_;
+  Blob<int> dest_strides_;
 
  private:
   // Recursive copy function.
   void crop_copy(const vector<Blob<Dtype>*>& bottom,
                const vector<Blob<Dtype>*>& top,
-               const vector<int>& offsets,
+               const int* offsets,
                vector<int> indices,
                int cur_dim,
                const Dtype* src_data,
diff --git a/src/caffe/layers/crop_layer.cpp b/src/caffe/layers/crop_layer.cpp
index ef8c177c4dd..65ea8f8b7d0 100644
--- a/src/caffe/layers/crop_layer.cpp
+++ b/src/caffe/layers/crop_layer.cpp
@@ -40,8 +40,10 @@ void CropLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
   const int start_axis = bottom[0]->CanonicalAxisIndex(param.axis());
 
   // Initialize offsets to 0 and the new shape to the current shape of the data.
-  offsets = vector<int>(input_dim, 0);
   vector<int> new_shape(bottom[0]->shape());
+  vector<int> offsets_shape(1, input_dim);
+  offsets.Reshape(offsets_shape);
+  int* offset_data = offsets.mutable_cpu_data();
 
   // Determine crop offsets and the new shape post-crop.
   for (int i = 0; i < input_dim; ++i) {
@@ -63,15 +65,22 @@ void CropLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
           << "size " << bottom[1]->shape(i) << " and offset " << crop_offset;
     }
     new_shape[i] = new_size;
-    offsets[i] = crop_offset;
+    offset_data[i] = crop_offset;
   }
   top[0]->Reshape(new_shape);
+  // Compute strides
+  src_strides_.Reshape(offsets_shape);
+  dest_strides_.Reshape(offsets_shape);
+  for (int i = 0; i < input_dim; ++i) {
+    src_strides_.mutable_cpu_data()[i] = bottom[0]->count(i + 1, input_dim);
+    dest_strides_.mutable_cpu_data()[i] = top[0]->count(i + 1, input_dim);
+  }
 }
 
 template <typename Dtype>
 void CropLayer<Dtype>::crop_copy(const vector<Blob<Dtype>*>& bottom,
              const vector<Blob<Dtype>*>& top,
-             const vector<int>& offsets,
+             const int* offsets,
              vector<int> indices,
              int cur_dim,
              const Dtype* src_data,
@@ -115,7 +124,8 @@ void CropLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
   std::vector<int> indices(top[0]->num_axes(), 0);
   const Dtype* bottom_data = bottom[0]->cpu_data();
   Dtype* top_data = top[0]->mutable_cpu_data();
-  crop_copy(bottom, top, offsets, indices, 0, bottom_data, top_data, true);
+  crop_copy(bottom, top, offsets.cpu_data(), indices, 0, bottom_data, top_data,
+      true);
 }
 
 template <typename Dtype>
@@ -127,7 +137,8 @@ void CropLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
   if (propagate_down[0]) {
     caffe_set(bottom[0]->count(), static_cast<Dtype>(0), bottom_diff);
     std::vector<int> indices(top[0]->num_axes(), 0);
-    crop_copy(bottom, top, offsets, indices, 0, top_diff, bottom_diff, false);
+    crop_copy(bottom, top, offsets.cpu_data(), indices, 0, top_diff,
+        bottom_diff, false);
   }
 }
 
diff --git a/src/caffe/layers/crop_layer.cu b/src/caffe/layers/crop_layer.cu
index 677077cdd8b..a400f333e14 100644
--- a/src/caffe/layers/crop_layer.cu
+++ b/src/caffe/layers/crop_layer.cu
@@ -4,90 +4,62 @@
 
 namespace caffe {
 
-// Copy (one line per thread) from one array to another, with arbitrary
-// strides in the last two dimensions.
+__device__ int compute_uncropped_index(
+    int index,
+    const int ndims,
+    const int* src_strides,
+    const int* dest_strides,
+    const int* offsets) {
+  int dest_index = index;
+  int src_index = 0;
+  for (int i = 0; i < ndims; ++i) {
+      int coord = dest_index / dest_strides[i];
+      dest_index -= coord * dest_strides[i];
+      src_index += src_strides[i] * (coord + offsets[i]);
+  }
+  return src_index;
+}
+
 template <typename Dtype>
-__global__ void copy_kernel(const int n, const int height, const int width,
-    const int src_inner_stride,
-    const int dest_inner_stride,
+__global__ void crop_kernel_forward(const int nthreads,
+    const int ndims,
+    const int* src_strides,
+    const int* dest_strides,
+    const int* offsets,
     const Dtype* src, Dtype* dest) {
-  CUDA_KERNEL_LOOP(index, n) {
-    int src_start = index * src_inner_stride;
-    int dest_start = index * dest_inner_stride;
-    for (int i = 0; i < width; ++i) {
-      dest[dest_start + i] = src[src_start + i];
-    }
+  CUDA_KERNEL_LOOP(index, nthreads) {
+    int src_index = compute_uncropped_index(
+        index, ndims, src_strides, dest_strides, offsets);
+    dest[index] = src[src_index];
   }
 }
 
 template <typename Dtype>
-void CropLayer<Dtype>::crop_copy_gpu(const vector<Blob<Dtype>*>& bottom,
-             const vector<Blob<Dtype>*>& top,
-             const vector<int>& offsets,
-             vector<int> indices,
-             int cur_dim,
-             const Dtype* src_data,
-             Dtype* dest_data,
-             bool is_forward) {
-  if (cur_dim + 2 < top[0]->num_axes()) {
-    // We are not yet at the final dimension, call copy recursivley
-    for (int i = 0; i < top[0]->shape(cur_dim); ++i) {
-      indices[cur_dim] = i;
-      crop_copy_gpu(bottom, top, offsets, indices, cur_dim+1,
-                src_data, dest_data, is_forward);
-    }
-  } else {
-    // We are at the last two dimensions, which are stored continuously in
-    // memory. With (N,C,H,W)
-    //              (0,1,2,3) cur_dim   -> H
-    //                        cur_dim+1 -> W
-    const int lines = top[0]->shape(cur_dim);
-    const int height = top[0]->shape(cur_dim);
-    const int width = top[0]->shape(cur_dim+1);
-    std::vector<int> ind_off(cur_dim+2, 0);
-    for (int j = 0; j < cur_dim; ++j) {
-        ind_off[j] = indices[j] + offsets[j];
-    }
-    ind_off[cur_dim] = offsets[cur_dim];
-    ind_off[cur_dim+1] = offsets[cur_dim+1];
-    // Compute copy strides
-    const int src_inner_stride = bottom[0]->shape(cur_dim+1);
-    const int dest_inner_stride = top[0]->shape(cur_dim+1);
-
-    if (is_forward) {
-      const Dtype* bottom_data = bottom[0]->gpu_data() +
-          bottom[0]->offset(ind_off);
-      Dtype* top_data = top[0]->mutable_gpu_data() +
-          top[0]->offset(indices);
-      // NOLINT_NEXT_LINE(whitespace/operators)
-      copy_kernel<<<CAFFE_GET_BLOCKS(lines), CAFFE_CUDA_NUM_THREADS>>>(
-          lines, height, width,
-          src_inner_stride,
-          dest_inner_stride,
-          bottom_data, top_data);
-
-    } else {
-      const Dtype* top_diff = top[0]->gpu_diff() +
-          top[0]->offset(indices);
-      Dtype* bottom_diff = bottom[0]->mutable_gpu_diff() +
-          bottom[0]->offset(ind_off);
-      // NOLINT_NEXT_LINE(whitespace/operators)
-      copy_kernel<<<CAFFE_GET_BLOCKS(lines), CAFFE_CUDA_NUM_THREADS>>>(
-          lines, height, width,
-          dest_inner_stride,
-          src_inner_stride,
-          top_diff, bottom_diff);
-    }
+__global__ void crop_kernel_backward(const int nthreads,
+    const int ndims,
+    const int* src_strides,
+    const int* dest_strides,
+    const int* offsets,
+    Dtype* src, const Dtype* dest) {
+  CUDA_KERNEL_LOOP(index, nthreads) {
+    int src_index = compute_uncropped_index(
+        index, ndims, src_strides, dest_strides, offsets);
+    src[src_index] = dest[index];
   }
 }
 
 template <typename Dtype>
 void CropLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
     const vector<Blob<Dtype>*>& top) {
-  std::vector<int> indices(top[0]->num_axes(), 0);
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* top_data = top[0]->mutable_gpu_data();
-  crop_copy_gpu(bottom, top, offsets, indices, 0, bottom_data, top_data, true);
+  int n = top[0]->count();
+  crop_kernel_forward<<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>(n,
+      bottom[0]->num_axes(),
+      src_strides_.gpu_data(),
+      dest_strides_.gpu_data(),
+      offsets.gpu_data(),
+      bottom_data, top_data);
 }
 
 template <typename Dtype>
@@ -95,12 +67,16 @@ void CropLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
     const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
   const Dtype* top_diff = top[0]->gpu_diff();
   Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+  int n = top[0]->count();
 
   if (propagate_down[0]) {
     caffe_gpu_set(bottom[0]->count(), static_cast<Dtype>(0), bottom_diff);
-    std::vector<int> indices(top[0]->num_axes(), 0);
-    crop_copy_gpu(bottom, top, offsets, indices, 0, top_diff, bottom_diff,
-                  false);
+    crop_kernel_backward<<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>(n,
+        bottom[0]->num_axes(),
+        src_strides_.gpu_data(),
+        dest_strides_.gpu_data(),
+        offsets.gpu_data(),
+        bottom_diff, top_diff);
   }
 }
 

From cd1696d00b995a1d8567cb6f3ad7f65ec4df4176 Mon Sep 17 00:00:00 2001
From: Eric Tzeng <etzeng@pinterest.com>
Date: Tue, 18 Apr 2017 18:48:26 -0700
Subject: [PATCH 247/264] Fix crop layer lint errors

---
 src/caffe/layers/crop_layer.cu | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/caffe/layers/crop_layer.cu b/src/caffe/layers/crop_layer.cu
index a400f333e14..4ece9cd1761 100644
--- a/src/caffe/layers/crop_layer.cu
+++ b/src/caffe/layers/crop_layer.cu
@@ -54,6 +54,7 @@ void CropLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
   const Dtype* bottom_data = bottom[0]->gpu_data();
   Dtype* top_data = top[0]->mutable_gpu_data();
   int n = top[0]->count();
+  // NOLINT_NEXT_LINE(whitespace/operators)
   crop_kernel_forward<<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>(n,
       bottom[0]->num_axes(),
       src_strides_.gpu_data(),
@@ -71,6 +72,7 @@ void CropLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
 
   if (propagate_down[0]) {
     caffe_gpu_set(bottom[0]->count(), static_cast<Dtype>(0), bottom_diff);
+    // NOLINT_NEXT_LINE(whitespace/operators)
     crop_kernel_backward<<<CAFFE_GET_BLOCKS(n), CAFFE_CUDA_NUM_THREADS>>>(n,
         bottom[0]->num_axes(),
         src_strides_.gpu_data(),

From ec35395e131a0d5e7c55cbd74dadbd46a49a645c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Malte=20St=C3=A6r=20Nissen?= <nissen@di.ku.dk>
Date: Thu, 4 May 2017 14:33:40 +0200
Subject: [PATCH 248/264] Handling destruction of empty Net objects

---
 matlab/+caffe/Net.m | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/matlab/+caffe/Net.m b/matlab/+caffe/Net.m
index 349e060eb22..bb99ec89049 100644
--- a/matlab/+caffe/Net.m
+++ b/matlab/+caffe/Net.m
@@ -69,7 +69,9 @@
       self.blob_names = self.attributes.blob_names;
     end
     function delete (self)
-      caffe_('delete_net', self.hNet_self);
+      if ~isempty(self.hNet_self)
+        caffe_('delete_net', self.hNet_self);
+      end
     end
     function layer = layers(self, layer_name)
       CHECK(ischar(layer_name), 'layer_name must be a string');

From b7e2b99c7f0aeeb8e24046f8cbf5212065b9ccdf Mon Sep 17 00:00:00 2001
From: Luke Yeager <lukeyeager@users.noreply.github.com>
Date: Fri, 12 May 2017 10:06:51 -0700
Subject: [PATCH 249/264] Downgrade boost requirement from 1.55 to 1.54

---
 cmake/Dependencies.cmake       | 2 +-
 scripts/travis/install-deps.sh | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index 4a5bac471b4..c48255c89f2 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -5,7 +5,7 @@ set(Caffe_DEFINITIONS "")
 set(Caffe_COMPILE_OPTIONS "")
 
 # ---[ Boost
-find_package(Boost 1.55 REQUIRED COMPONENTS system thread filesystem)
+find_package(Boost 1.54 REQUIRED COMPONENTS system thread filesystem)
 list(APPEND Caffe_INCLUDE_DIRS PUBLIC ${Boost_INCLUDE_DIRS})
 list(APPEND Caffe_LINKER_LIBS PUBLIC ${Boost_LIBRARIES})
 
diff --git a/scripts/travis/install-deps.sh b/scripts/travis/install-deps.sh
index dac5d2f9d37..2fa2a74a486 100755
--- a/scripts/travis/install-deps.sh
+++ b/scripts/travis/install-deps.sh
@@ -9,10 +9,10 @@ apt-get -y update
 apt-get install -y --no-install-recommends \
   build-essential \
   graphviz \
-  libboost-filesystem1.55-dev \
-  libboost-python1.55-dev \
-  libboost-system1.55-dev \
-  libboost-thread1.55-dev \
+  libboost-filesystem-dev \
+  libboost-python-dev \
+  libboost-system-dev \
+  libboost-thread-dev \
   libgflags-dev \
   libgoogle-glog-dev \
   libhdf5-serial-dev \

From 30a2ab7e50430911f37ddf981e67e4f36f662f14 Mon Sep 17 00:00:00 2001
From: Zhou Mo <cdluminate@gmail.com>
Date: Mon, 15 May 2017 02:16:19 +0000
Subject: [PATCH 250/264] cmake: rename libproto.a -> libcaffeproto.a

---
 cmake/ConfigGen.cmake    |  2 +-
 src/caffe/CMakeLists.txt | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/cmake/ConfigGen.cmake b/cmake/ConfigGen.cmake
index ad91f542104..09bb09b4ff2 100644
--- a/cmake/ConfigGen.cmake
+++ b/cmake/ConfigGen.cmake
@@ -33,7 +33,7 @@ function(caffe_generate_export_configs)
   configure_file("cmake/Templates/CaffeConfig.cmake.in" "${PROJECT_BINARY_DIR}/CaffeConfig.cmake" @ONLY)
 
   # Add targets to the build-tree export set
-  export(TARGETS caffe proto FILE "${PROJECT_BINARY_DIR}/CaffeTargets.cmake")
+  export(TARGETS caffe caffeproto FILE "${PROJECT_BINARY_DIR}/CaffeTargets.cmake")
   export(PACKAGE Caffe)
 
   # ---[ Configure install-tree CaffeConfig.cmake file ]---
diff --git a/src/caffe/CMakeLists.txt b/src/caffe/CMakeLists.txt
index b9152e9216f..4a805568566 100644
--- a/src/caffe/CMakeLists.txt
+++ b/src/caffe/CMakeLists.txt
@@ -3,12 +3,12 @@ file(GLOB proto_files proto/*.proto)
 caffe_protobuf_generate_cpp_py(${proto_gen_folder} proto_srcs proto_hdrs proto_python ${proto_files})
 
 # include python files either to force generation
-add_library(proto STATIC ${proto_hdrs} ${proto_srcs} ${proto_python})
-caffe_default_properties(proto)
-target_link_libraries(proto PUBLIC ${PROTOBUF_LIBRARIES})
-target_include_directories(proto PUBLIC ${PROTOBUF_INCLUDE_DIR})
+add_library(caffeproto STATIC ${proto_hdrs} ${proto_srcs} ${proto_python})
+caffe_default_properties(caffeproto)
+target_link_libraries(caffeproto PUBLIC ${PROTOBUF_LIBRARIES})
+target_include_directories(caffeproto PUBLIC ${PROTOBUF_INCLUDE_DIR})
 
-list(INSERT Caffe_LINKER_LIBS 0 PUBLIC proto) # note, crucial to prepend!
+list(INSERT Caffe_LINKER_LIBS 0 PUBLIC caffeproto) # note, crucial to prepend!
 
 # --[ Caffe library
 
@@ -42,7 +42,7 @@ set_target_properties(caffe PROPERTIES
 # ---[ Install
 install(DIRECTORY ${Caffe_INCLUDE_DIR}/caffe DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
 install(FILES ${proto_hdrs} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/caffe/proto)
-install(TARGETS caffe proto EXPORT CaffeTargets DESTINATION ${CMAKE_INSTALL_LIBDIR})
+install(TARGETS caffe caffeproto EXPORT CaffeTargets DESTINATION ${CMAKE_INSTALL_LIBDIR})
 
 file(WRITE ${PROJECT_BINARY_DIR}/__init__.py)
 list(APPEND proto_python ${PROJECT_BINARY_DIR}/__init__.py)

From 83814da36d5a44039ddc35f58f9b341e9d1bd935 Mon Sep 17 00:00:00 2001
From: Zhou Mo <cdluminate@gmail.com>
Date: Mon, 15 May 2017 03:04:47 +0000
Subject: [PATCH 251/264] docs/debian guide: update compiler combination table

---
 docs/install_apt_debian.md | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/docs/install_apt_debian.md b/docs/install_apt_debian.md
index 65fe70924e1..bd91124a898 100644
--- a/docs/install_apt_debian.md
+++ b/docs/install_apt_debian.md
@@ -96,18 +96,22 @@ Note, this requires a `deb-src` entry in your `/etc/apt/sources.list`.
 Some users may find their favorate compiler doesn't work with CUDA.
 
 ```
-CXX compiler |  CUDA 7.5  |  CUDA 8.0  |
--------------+------------+------------+-
-GCC-7        |     ?      |     ?      |
-GCC-6        |     ✘      |     ✘      |
-GCC-5        |     ✔ [1]  |     ✔      |
-CLANG-4.0    |     ?      |     ?      |
-CLANG-3.9    |     ✘      |     ✘      |
-CLANG-3.8    |     ?      |     ✔      |
+CXX compiler |  CUDA 7.5  |  CUDA 8.0  |  CUDA 9.0  |
+-------------+------------+------------+------------+
+GCC-8        |     ?      |     ?      |     ?      |
+GCC-7        |     ?      |     ?      |     ?      |
+GCC-6        |     ✘      |     ✘      |     ✔      |
+GCC-5        |     ✔ [1]  |     ✔      |     ✔      |
+-------------+------------+------------+------------+
+CLANG-4.0    |     ?      |     ?      |     ?      |
+CLANG-3.9    |     ✘      |     ✘      |     ✔      |
+CLANG-3.8    |     ?      |     ✔      |     ✔      |
 ```
 
 `[1]` CUDA 7.5 's `host_config.h` must be patched before working with GCC-5.
 
+`[2]` CUDA 9.0: https://devblogs.nvidia.com/parallelforall/cuda-9-features-revealed/
+
 BTW, please forget the GCC-4.X series, since its `libstdc++` ABI is not compatible with GCC-5's.
 You may encounter failure linking GCC-4.X object files against GCC-5 libraries.
 (See https://wiki.debian.org/GCC5 )

From 264cf199e4e8bc44bb97762b1018137704157c2c Mon Sep 17 00:00:00 2001
From: Cyprien Noel <cyprien.noel@gmail.com>
Date: Tue, 13 Jun 2017 11:59:26 -0700
Subject: [PATCH 252/264] List branches in readme

---
 README.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/README.md b/README.md
index 0ae3616b4a6..c40aee65c3c 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,14 @@ Check out the [project site](http://caffe.berkeleyvision.org) for all the detail
 
 and step-by-step examples.
 
+## Custom distributions
+
+- [Intel optimized branch](https://github.com/BVLC/caffe/tree/intel) for CPU, in particular Xeon processors (HSW, BDW, Xeon Phi).
+- [OpenCL Caffe](https://github.com/BVLC/caffe/tree/opencl) e.g. for AMD or Intel devices.
+- [Windows Caffe](https://github.com/BVLC/caffe/tree/windows)
+
+## Community
+
 [![Join the chat at https://gitter.im/BVLC/caffe](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/BVLC/caffe?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
 
 Please join the [caffe-users group](https://groups.google.com/forum/#!forum/caffe-users) or [gitter chat](https://gitter.im/BVLC/caffe) to ask questions and talk about methods and models.

From 4efdf7ee49cffefdd7ea099c00dc5ea327640f04 Mon Sep 17 00:00:00 2001
From: Cyprien Noel <cyprien.noel@gmail.com>
Date: Tue, 20 Jun 2017 14:20:42 -0700
Subject: [PATCH 253/264] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c40aee65c3c..5148c69d310 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ and step-by-step examples.
 
 ## Custom distributions
 
-- [Intel optimized branch](https://github.com/BVLC/caffe/tree/intel) for CPU, in particular Xeon processors (HSW, BDW, Xeon Phi).
+ - [Intel Caffe](https://github.com/BVLC/caffe/tree/intel) (Optimized for CPU and support for multi-node), in particular Xeon processors (HSW, BDW, Xeon Phi).
 - [OpenCL Caffe](https://github.com/BVLC/caffe/tree/opencl) e.g. for AMD or Intel devices.
 - [Windows Caffe](https://github.com/BVLC/caffe/tree/windows)
 

From 4a2f2a0f2cbfca5fe8d9fec417a432d0aa345f37 Mon Sep 17 00:00:00 2001
From: Zhou Mo <cdluminate@gmail.com>
Date: Sat, 8 Jul 2017 08:22:07 +0000
Subject: [PATCH 254/264] docs: update apt installation guide for Debian and
 Ubuntu

Caffe package is available for APT since Debian 9.0 and Ubuntu 17.04 .
---
 docs/install_apt.md        | 28 ++++++++++++++++++++++++++++
 docs/install_apt_debian.md | 30 ++++++++++++++----------------
 2 files changed, 42 insertions(+), 16 deletions(-)

diff --git a/docs/install_apt.md b/docs/install_apt.md
index ee2cd287701..43785f56d27 100644
--- a/docs/install_apt.md
+++ b/docs/install_apt.md
@@ -4,6 +4,34 @@ title: "Installation: Ubuntu"
 
 # Ubuntu Installation
 
+### For Ubuntu (>= 17.04)
+
+**Installing pre-compiled Caffe**
+
+Everything including caffe itself is packaged in 17.04 and higher versions.
+To install pre-compiled Caffe package, just do it by
+
+    sudo apt install caffe-cpu
+
+for CPU-only version, or
+
+    sudo apt install caffe-cuda
+
+for CUDA version. Note, the cuda version may break if your NVIDIA driver
+and CUDA toolkit are not installed by APT.
+
+**Installing Caffe from source**
+
+We may install the dependencies by merely one line
+
+    sudo apt build-dep caffe-cpu        # dependencies for CPU-only version
+    sudo apt build-dep caffe-cuda       # dependencies for CUDA version
+
+It requires a `deb-src` line in your `sources.list`.
+Continue with [compilation](installation.html#compilation).
+
+### For Ubuntu (\< 17.04)
+
 **General dependencies**
 
     sudo apt-get install libprotobuf-dev libleveldb-dev libsnappy-dev libopencv-dev libhdf5-serial-dev protobuf-compiler
diff --git a/docs/install_apt_debian.md b/docs/install_apt_debian.md
index bd91124a898..0a6a3b962e5 100644
--- a/docs/install_apt_debian.md
+++ b/docs/install_apt_debian.md
@@ -8,24 +8,28 @@ Caffe packages are available for several Debian versions, as shown in the
 following chart:
 
 ```
-Your Distro     |  CPU_ONLY  |  CUDA  |     Alias
+Your Distro     |  CPU_ONLY  |  CUDA  | Codename
 ----------------+------------+--------+-------------------
-Debian/stable   |     ✘      |   ✘    | Debian Jessie
-Debian/testing  |     ✔      |   ✔    | Debian Stretch/Sid
-Debian/unstable |     ✔      |   ✔    | Debian Sid
+Debian/oldstable|     ✘      |   ✘    | Jessie (8.0)
+Debian/stable   |     ✔      |   ✔    | Stretch (9.0)
+Debian/testing  |     ✔      |   ✔    | Buster
+Debian/unstable |     ✔      |   ✔    | Buster
 ```
 
 * `✘ ` You should take a look at [Ubuntu installation instruction](install_apt.html).
 
 * `✔ ` You can install caffe with a single command line following this guide.
 
-Last update: 2017-02-01
+* [Package status of CPU-only version](https://tracker.debian.org/pkg/caffe)
+
+* [Package status of CUDA version](https://tracker.debian.org/pkg/caffe-contrib)
+
+Last update: 2017-07-08
 
 ## Binary installation with APT
 
-Apart from the installation methods based on source, Debian/unstable
-and Debian/testing users can install pre-compiled Caffe packages from
-the official archive.
+Apart from the installation methods based on source, Debian users can install
+pre-compiled Caffe packages from the official archive with APT.
 
 Make sure that your `/etc/apt/sources.list` contains `contrib` and `non-free`
 sections if you want to install the CUDA version, for instance:
@@ -44,7 +48,8 @@ $ caffe                                              # command line interface wo
 $ python3 -c 'import caffe; print(caffe.__path__)'   # python3 interface working
 ```
 
-These Caffe packages should work for you out of box.
+These Caffe packages should work for you out of box. However, the CUDA version
+may break if your NVIDIA driver and CUDA toolkit are not installed with APT.
 
 #### Customizing caffe packages
 
@@ -156,10 +161,3 @@ and hack the packaging scripts, then build your customized package.
 $ sudo apt install caffe-doc
 $ dpkg -L caffe-doc
 ```
-
-* Where can I find the Debian package status?
-
-```
-https://tracker.debian.org/pkg/caffe          (for the CPU_ONLY version)
-https://tracker.debian.org/pkg/caffe-contrib  (for the CUDA version)
-```

From eedf7c188708e097e63984528c94d3c8616dd5ff Mon Sep 17 00:00:00 2001
From: Zhou Mo <cdluminate@gmail.com>
Date: Sat, 8 Jul 2017 08:24:57 +0000
Subject: [PATCH 255/264] docs: add Ubuntu package tracker link in Ubuntu guide

---
 docs/install_apt.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/install_apt.md b/docs/install_apt.md
index 43785f56d27..b6cb1c2d6f7 100644
--- a/docs/install_apt.md
+++ b/docs/install_apt.md
@@ -20,6 +20,10 @@ for CPU-only version, or
 for CUDA version. Note, the cuda version may break if your NVIDIA driver
 and CUDA toolkit are not installed by APT.
 
+[Package status of CPU-only version](https://launchpad.net/ubuntu/+source/caffe)
+
+[Package status of CUDA version](https://launchpad.net/ubuntu/+source/caffe-contrib)
+
 **Installing Caffe from source**
 
 We may install the dependencies by merely one line

From 3d7cfc40c170f93ac88909f40ca0208269ee26a9 Mon Sep 17 00:00:00 2001
From: Lydorn <nicolas.jp.girard@gmail.com>
Date: Mon, 10 Jul 2017 15:43:47 +0200
Subject: [PATCH 256/264] Update lrn.md

Fixed typo "locaitons " -> "location" in line 17
---
 docs/tutorial/layers/lrn.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/tutorial/layers/lrn.md b/docs/tutorial/layers/lrn.md
index 2fbef734663..f5e4829279d 100644
--- a/docs/tutorial/layers/lrn.md
+++ b/docs/tutorial/layers/lrn.md
@@ -14,7 +14,7 @@ title: Local Response Normalization (LRN)
         - `local_size` [default 5]: the number of channels to sum over (for cross channel LRN) or the side length of the square region to sum over (for within channel LRN)
         - `alpha` [default 1]: the scaling parameter (see below)
         - `beta` [default 5]: the exponent (see below)
-        - `norm_region` [default `ACROSS_CHANNELS`]: whether to sum over adjacent channels (`ACROSS_CHANNELS`) or nearby spatial locaitons (`WITHIN_CHANNEL`)
+        - `norm_region` [default `ACROSS_CHANNELS`]: whether to sum over adjacent channels (`ACROSS_CHANNELS`) or nearby spatial locations (`WITHIN_CHANNEL`)
 
 The local response normalization layer performs a kind of "lateral inhibition" by normalizing over local input regions. In `ACROSS_CHANNELS` mode, the local regions extend across nearby channels, but have no spatial extent (i.e., they have shape `local_size x 1 x 1`). In `WITHIN_CHANNEL` mode, the local regions extend spatially, but are in separate channels (i.e., they have shape `1 x local_size x local_size`). Each input value is divided by $$(1 + (\alpha/n) \sum_i x_i^2)^\beta$$, where $$n$$ is the size of each local region, and the sum is taken over the region centered at that value (zero padding is added where necessary).
 

From af9d6bc0446f479a7c7cff870de4da5df19fddd1 Mon Sep 17 00:00:00 2001
From: downes <downes>
Date: Wed, 12 Jul 2017 14:50:40 -0700
Subject: [PATCH 257/264] update sklearn calls to use latest API

Version 0.18 moved cross-validation to sklearn.model_selection - see http://scikit-learn.org/stable/whats_new.html#version-0-18
Version 0.17 deprecated class_weight="auto" in favor of class_weight="balanced"
---
 examples/brewing-logreg.ipynb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/brewing-logreg.ipynb b/examples/brewing-logreg.ipynb
index c053b73b39f..4b4cd6a3f02 100644
--- a/examples/brewing-logreg.ipynb
+++ b/examples/brewing-logreg.ipynb
@@ -73,7 +73,7 @@
     ")\n",
     "\n",
     "# Split into train and test\n",
-    "X, Xt, y, yt = sklearn.cross_validation.train_test_split(X, y)\n",
+    "X, Xt, y, yt = sklearn.model_selection.train_test_split(X, y)\n",
     "\n",
     "# Visualize sample of the data\n",
     "ind = np.random.permutation(X.shape[0])[:1000]\n",
@@ -111,7 +111,7 @@
     "%%timeit\n",
     "# Train and test the scikit-learn SGD logistic regression.\n",
     "clf = sklearn.linear_model.SGDClassifier(\n",
-    "    loss='log', n_iter=1000, penalty='l2', alpha=5e-4, class_weight='auto')\n",
+    "    loss='log', n_iter=1000, penalty='l2', alpha=5e-4, class_weight='balanced')\n",
     "\n",
     "clf.fit(X, y)\n",
     "yt_pred = clf.predict(Xt)\n",

From 4b98f06c03c7cb84163ba7f681dbe9185fdcc5f9 Mon Sep 17 00:00:00 2001
From: downes <downes>
Date: Wed, 12 Jul 2017 14:52:53 -0700
Subject: [PATCH 258/264] update deprecated pandas call

pd.scatter_matrix -> pd.plotting.scatter_matrix
---
 examples/brewing-logreg.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/brewing-logreg.ipynb b/examples/brewing-logreg.ipynb
index 4b4cd6a3f02..0f87185a35b 100644
--- a/examples/brewing-logreg.ipynb
+++ b/examples/brewing-logreg.ipynb
@@ -78,7 +78,7 @@
     "# Visualize sample of the data\n",
     "ind = np.random.permutation(X.shape[0])[:1000]\n",
     "df = pd.DataFrame(X[ind])\n",
-    "_ = pd.scatter_matrix(df, figsize=(9, 9), diagonal='kde', marker='o', s=40, alpha=.4, c=y[ind])"
+    "_ = pd.plotting.scatter_matrix(df, figsize=(9, 9), diagonal='kde', marker='o', s=40, alpha=.4, c=y[ind])"
    ]
   },
   {

From 315641b7ef8624b756ed042a7e9330ecde3782e7 Mon Sep 17 00:00:00 2001
From: Keith Mok <ek9852@gmail.com>
Date: Fri, 4 Aug 2017 13:55:34 -0700
Subject: [PATCH 259/264] Fix hardcode xcode path

User may not install xcoder into default directory
especially if there are two different versions of xcoder installed.
---
 cmake/Modules/FindvecLib.cmake | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cmake/Modules/FindvecLib.cmake b/cmake/Modules/FindvecLib.cmake
index 8eaab59473c..4d44e613a00 100644
--- a/cmake/Modules/FindvecLib.cmake
+++ b/cmake/Modules/FindvecLib.cmake
@@ -12,11 +12,12 @@ endif()
 
 set(__veclib_include_suffix "Frameworks/vecLib.framework/Versions/Current/Headers")
 
+exec_program(xcode-select ARGS -print-path OUTPUT_VARIABLE CMAKE_XCODE_DEVELOPER_DIR)
 find_path(vecLib_INCLUDE_DIR vecLib.h
           DOC "vecLib include directory"
           PATHS /System/Library/Frameworks/Accelerate.framework/Versions/Current/${__veclib_include_suffix}
                 /System/Library/${__veclib_include_suffix}
-                /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers/
+                ${CMAKE_XCODE_DEVELOPER_DIR}/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers/
           NO_DEFAULT_PATH)
 
 include(FindPackageHandleStandardArgs)

From 1de4cebfb81d50267d0d8c2595372b14e1408248 Mon Sep 17 00:00:00 2001
From: Cyprien Noel <cyprien.noel@gmail.com>
Date: Wed, 16 Aug 2017 18:24:32 -0700
Subject: [PATCH 260/264] Update README.md

Mention SKX support
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 5148c69d310..fe259535865 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ and step-by-step examples.
 
 ## Custom distributions
 
- - [Intel Caffe](https://github.com/BVLC/caffe/tree/intel) (Optimized for CPU and support for multi-node), in particular Xeon processors (HSW, BDW, Xeon Phi).
+ - [Intel Caffe](https://github.com/BVLC/caffe/tree/intel) (Optimized for CPU and support for multi-node), in particular Xeon processors (HSW, BDW, SKX, Xeon Phi).
 - [OpenCL Caffe](https://github.com/BVLC/caffe/tree/opencl) e.g. for AMD or Intel devices.
 - [Windows Caffe](https://github.com/BVLC/caffe/tree/windows)
 

From 3dad3323436e05a4c1890104b0f26f27f6d77d31 Mon Sep 17 00:00:00 2001
From: wasnot <wasnot.apps@gmail.com>
Date: Wed, 6 Sep 2017 12:32:39 +0900
Subject: [PATCH 261/264] modified division operator for compatibility of
 python 3

---
 python/caffe/classifier.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/caffe/classifier.py b/python/caffe/classifier.py
index ea29fed86f9..983760a786d 100644
--- a/python/caffe/classifier.py
+++ b/python/caffe/classifier.py
@@ -92,7 +92,7 @@ def predict(self, inputs, oversample=True):
 
         # For oversampling, average predictions across crops.
         if oversample:
-            predictions = predictions.reshape((len(predictions) / 10, 10, -1))
+            predictions = predictions.reshape((len(predictions) // 10, 10, -1))
             predictions = predictions.mean(1)
 
         return predictions

From 8bdc87f6bc6a7d05d2fdbee2cfc159003297476b Mon Sep 17 00:00:00 2001
From: Takuya Narihira <dvbscb@gmail.com>
Date: Wed, 11 Mar 2015 16:23:39 -0700
Subject: [PATCH 262/264] Expose GPU pointers to Python

The pointers could be used by CUDA wrapper libraries in Python such as
PyCUDA, gnumpy, Theano etc.
---
 python/caffe/_caffe.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
index d7f43fff62d..72659a4f44e 100644
--- a/python/caffe/_caffe.cpp
+++ b/python/caffe/_caffe.cpp
@@ -464,6 +464,14 @@ BOOST_PYTHON_MODULE(_caffe) {
     .add_property("count",    static_cast<int (Blob<Dtype>::*)() const>(
         &Blob<Dtype>::count))
     .def("reshape",           bp::raw_function(&Blob_Reshape))
+#ifndef CPU_ONLY
+    .add_property("_gpu_data_ptr",
+        reinterpret_cast<uintptr_t (Blob<Dtype>::*)()>(
+          &Blob<Dtype>::mutable_gpu_data))
+    .add_property("_gpu_diff_ptr",
+        reinterpret_cast<uintptr_t (Blob<Dtype>::*)()>(
+          &Blob<Dtype>::mutable_gpu_diff))
+#endif
     .add_property("data",     bp::make_function(&Blob<Dtype>::mutable_cpu_data,
           NdarrayCallPolicies()))
     .add_property("diff",     bp::make_function(&Blob<Dtype>::mutable_cpu_diff,

From 32ca94ba6d6bce5e1b5bfff705c49e88b26ae7cd Mon Sep 17 00:00:00 2001
From: arikpoz <arik.com@gmail.com>
Date: Fri, 8 Sep 2017 08:11:26 +0300
Subject: [PATCH 263/264] Merge branch 'deconv-deep-vis-toolbox'

---
 include/caffe/common.hpp                |  15 +++
 include/caffe/layer.hpp                 |  57 +++++++++++
 include/caffe/layers/lrn_layer.hpp      |  12 +++
 include/caffe/layers/relu_layer.hpp     |   5 +
 include/caffe/layers/softmax_layer.hpp  |   1 +
 include/caffe/net.hpp                   |  10 ++
 include/caffe/util/device_alternate.hpp |  19 ++++
 python/caffe/_caffe.cpp                 |   1 +
 python/caffe/pycaffe.py                 | 131 ++++++++++++++++++++++++
 src/caffe/layers/lrn_layer.cpp          |  29 +++++-
 src/caffe/layers/lrn_layer.cu           |  44 +++++++-
 src/caffe/layers/relu_layer.cpp         |  19 +++-
 src/caffe/layers/relu_layer.cu          |  30 +++++-
 src/caffe/net.cpp                       |  51 +++++++++
 src/caffe/proto/caffe.proto             |   8 ++
 15 files changed, 428 insertions(+), 4 deletions(-)

diff --git a/include/caffe/common.hpp b/include/caffe/common.hpp
index 4904d1d8661..f9e7194d1e6 100644
--- a/include/caffe/common.hpp
+++ b/include/caffe/common.hpp
@@ -61,10 +61,25 @@ private:\
       const std::vector<bool>& propagate_down, \
       const std::vector<Blob<double>*>& bottom)
 
+#define INSTANTIATE_LAYER_GPU_DECONV(classname) \
+  template void classname<float>::Deconv_gpu( \
+      const std::vector<Blob<float>*>& top, \
+      const std::vector<bool>& propagate_down, \
+      const std::vector<Blob<float>*>& bottom); \
+  template void classname<double>::Deconv_gpu( \
+      const std::vector<Blob<double>*>& top, \
+      const std::vector<bool>& propagate_down, \
+      const std::vector<Blob<double>*>& bottom)
+
 #define INSTANTIATE_LAYER_GPU_FUNCS(classname) \
   INSTANTIATE_LAYER_GPU_FORWARD(classname); \
   INSTANTIATE_LAYER_GPU_BACKWARD(classname)
 
+#define INSTANTIATE_LAYER_GPU_FUNCS_WITH_DECONV(classname)  \
+  INSTANTIATE_LAYER_GPU_FORWARD(classname); \
+  INSTANTIATE_LAYER_GPU_BACKWARD(classname); \
+  INSTANTIATE_LAYER_GPU_DECONV(classname)
+
 // A simple macro to mark codes that are not implemented, so that when the code
 // is executed we will see a fatal log.
 #define NOT_IMPLEMENTED LOG(FATAL) << "Not Implemented Yet"
diff --git a/include/caffe/layer.hpp b/include/caffe/layer.hpp
index 30dbfd53758..81b35657070 100644
--- a/include/caffe/layer.hpp
+++ b/include/caffe/layer.hpp
@@ -151,6 +151,27 @@ class Layer {
       const vector<bool>& propagate_down,
       const vector<Blob<Dtype>*>& bottom);
 
+  /**
+   * @brief Given the top blob deconv info, compute the bottom blob deconv. Similar to Backward.
+   *
+   * The Deconv wrapper calls the relevant device wrapper function
+   * (Deconv_cpu or Deconv_gpu) to compute the bottom blob diffs given the
+   * top blob diffs.
+   *
+   * Your layer should implement Deconv_cpu and Deconv_gpu.
+   * 
+   * Note: By default, Deconv_gpu will just call Backward_gpu, and
+   * Deconv_cpu will just call Backward_cpu. In many cases this
+   * behavior is desired, e.g. for convolution or innerproduct or
+   * pooling layers. If this is not the desired behavior, override
+   * Deconv_cpu AND Deconv_gpu. If only one of Deconv_{cpu,gpu} is
+   * overridden, the other will still defer to Backward_{cpu,gpu},
+   * which will lead to confusing and inconsistent behavior!
+   */
+  inline void Deconv(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down,
+      const vector<Blob<Dtype>*>& bottom);
+
   /**
    * @brief Returns the vector of learnable parameter blobs.
    */
@@ -338,6 +359,26 @@ class Layer {
     Backward_cpu(top, propagate_down, bottom);
   }
 
+  /**
+   * @brief Using the CPU device, compute the deconv (Zeiler et al, 2013) for the bottom blobs.
+   */
+  virtual void Deconv_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down,
+      const vector<Blob<Dtype>*>& bottom) {
+    // LOG(WARNING) << "Explicit Deconv_cpu not implemented for " << type() << " yet; falling back to backward_cpu.";
+    Backward_cpu(top, propagate_down, bottom);
+  }
+  /**
+   * @brief Using the GPU device, compute the deconv (Zeiler et al, 2013) for the bottom blobs.
+   *        Fall back to Deconv_cpu() if unavailable.
+   */
+  virtual void Deconv_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down,
+      const vector<Blob<Dtype>*>& bottom) {
+    // LOG(WARNING) << "Explicit Deconv_gpu not implemented for " << type() << " yet; falling back to backward_gpu.";
+    Backward_gpu(top, propagate_down, bottom);
+  }
+
   /**
    * Called by the parent Layer's SetUp to check that the number of bottom
    * and top Blobs provided as input match the expected numbers specified by
@@ -461,6 +502,22 @@ inline void Layer<Dtype>::Backward(const vector<Blob<Dtype>*>& top,
   }
 }
 
+template <typename Dtype>
+inline void Layer<Dtype>::Deconv(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down,
+    const vector<Blob<Dtype>*>& bottom) {
+  switch (Caffe::mode()) {
+  case Caffe::CPU:
+    Deconv_cpu(top, propagate_down, bottom);
+    break;
+  case Caffe::GPU:
+    Deconv_gpu(top, propagate_down, bottom);
+    break;
+  default:
+    LOG(FATAL) << "Unknown caffe mode.";
+  }
+}
+
 // Serialize LayerParameter to protocol buffer
 template <typename Dtype>
 void Layer<Dtype>::ToProto(LayerParameter* param, bool write_diff) {
diff --git a/include/caffe/layers/lrn_layer.hpp b/include/caffe/layers/lrn_layer.hpp
index 06cf71a94cb..767f297c7b2 100644
--- a/include/caffe/layers/lrn_layer.hpp
+++ b/include/caffe/layers/lrn_layer.hpp
@@ -43,6 +43,15 @@ class LRNLayer : public Layer<Dtype> {
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 
+  virtual void Deconv_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+  virtual void Deconv_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+  virtual void Deconv_passthrough_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+  virtual void Deconv_passthrough_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
   virtual void CrossChannelForward_cpu(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top);
   virtual void CrossChannelForward_gpu(const vector<Blob<Dtype>*>& bottom,
@@ -87,6 +96,9 @@ class LRNLayer : public Layer<Dtype> {
   shared_ptr<EltwiseLayer<Dtype> > product_layer_;
   Blob<Dtype> product_input_;
   vector<Blob<Dtype>*> product_bottom_vec_;
+
+  // Fields used for deconv
+  bool deconv_ignore_;
 };
 
 }  // namespace caffe
diff --git a/include/caffe/layers/relu_layer.hpp b/include/caffe/layers/relu_layer.hpp
index d7a73f7a8d1..b79dec17671 100644
--- a/include/caffe/layers/relu_layer.hpp
+++ b/include/caffe/layers/relu_layer.hpp
@@ -78,6 +78,11 @@ class ReLULayer : public NeuronLayer<Dtype> {
       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+
+  virtual void Deconv_cpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+  virtual void Deconv_gpu(const vector<Blob<Dtype>*>& top,
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 };
 
 }  // namespace caffe
diff --git a/include/caffe/layers/softmax_layer.hpp b/include/caffe/layers/softmax_layer.hpp
index c65b8703e43..46f57de033b 100644
--- a/include/caffe/layers/softmax_layer.hpp
+++ b/include/caffe/layers/softmax_layer.hpp
@@ -36,6 +36,7 @@ class SoftmaxLayer : public Layer<Dtype> {
   virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 
+
   int outer_num_;
   int inner_num_;
   int softmax_axis_;
diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp
index d3c9306e9cf..f4f4243a365 100644
--- a/include/caffe/net.hpp
+++ b/include/caffe/net.hpp
@@ -74,6 +74,14 @@ class Net {
   void BackwardFrom(int start);
   void BackwardTo(int end);
 
+  /**
+   * The network deconv works similarly to backward and also takes no input and output.
+   */
+  void Deconv();
+  void DeconvFromTo(int start, int end);
+  void DeconvFrom(int start);
+  void DeconvTo(int end);
+
   /**
    * @brief Reshape all layers from bottom to top.
    *
@@ -270,6 +278,8 @@ class Net {
   void ForwardDebugInfo(const int layer_id);
   /// @brief Helper for displaying debug info in Backward.
   void BackwardDebugInfo(const int layer_id);
+  /// @brief Helper for displaying debug info in Deconv.
+  void DeconvDebugInfo(const int layer_id);
   /// @brief Helper for displaying debug info in Update.
   void UpdateDebugInfo(const int param_id);
 
diff --git a/include/caffe/util/device_alternate.hpp b/include/caffe/util/device_alternate.hpp
index e3fe4fe29fd..496d4a073ae 100644
--- a/include/caffe/util/device_alternate.hpp
+++ b/include/caffe/util/device_alternate.hpp
@@ -18,6 +18,19 @@ void classname<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, \
     const vector<bool>& propagate_down, \
     const vector<Blob<Dtype>*>& bottom) { NO_GPU; } \
 
+#define STUB_GPU_WITH_DECONV(classname) \
+template <typename Dtype> \
+void classname<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, \
+    const vector<Blob<Dtype>*>& top) { NO_GPU; } \
+template <typename Dtype> \
+void classname<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, \
+    const vector<bool>& propagate_down, \
+    const vector<Blob<Dtype>*>& bottom) { NO_GPU; } \
+template <typename Dtype> \
+void classname<Dtype>::Deconv_gpu(const vector<Blob<Dtype>*>& top, \
+    const vector<bool>& propagate_down, \
+    const vector<Blob<Dtype>*>& bottom) { NO_GPU; } \
+
 #define STUB_GPU_FORWARD(classname, funcname) \
 template <typename Dtype> \
 void classname<Dtype>::funcname##_##gpu(const vector<Blob<Dtype>*>& bottom, \
@@ -29,6 +42,12 @@ void classname<Dtype>::funcname##_##gpu(const vector<Blob<Dtype>*>& top, \
     const vector<bool>& propagate_down, \
     const vector<Blob<Dtype>*>& bottom) { NO_GPU; } \
 
+#define STUB_GPU_DECONV(classname, funcname) \
+template <typename Dtype> \
+void classname<Dtype>::funcname##_##gpu(const vector<Blob<Dtype>*>& top, \
+    const vector<bool>& propagate_down, \
+    const vector<Blob<Dtype>*>& bottom) { NO_GPU; } \
+
 #else  // Normal GPU + CPU Caffe.
 
 #include <cublas_v2.h>
diff --git a/python/caffe/_caffe.cpp b/python/caffe/_caffe.cpp
index 72659a4f44e..6bc5bbb5b90 100644
--- a/python/caffe/_caffe.cpp
+++ b/python/caffe/_caffe.cpp
@@ -413,6 +413,7 @@ BOOST_PYTHON_MODULE(_caffe) {
     .def("__init__", bp::make_constructor(&Net_Init_Load))
     .def("_forward", &Net<Dtype>::ForwardFromTo)
     .def("_backward", &Net<Dtype>::BackwardFromTo)
+    .def("_deconv", &Net<Dtype>::DeconvFromTo)
     .def("reshape", &Net<Dtype>::Reshape)
     .def("clear_param_diffs", &Net<Dtype>::ClearParamDiffs)
     // The cast is to select a particular overload.
diff --git a/python/caffe/pycaffe.py b/python/caffe/pycaffe.py
index 4a7b5a24c46..2f8deaf20db 100644
--- a/python/caffe/pycaffe.py
+++ b/python/caffe/pycaffe.py
@@ -69,6 +69,133 @@ def _Net_params(self):
     return self._params_dict
 
 
+def _Net_zero(self, zero_param_diffs = True):
+    """
+    Set all activations (data and diffs) in the net to zero.
+
+    Take
+    zero_param_diffs: If True, also zero the parameter blob diffs,
+                      else skip parameter blobs.
+    """
+    
+    for blob_name, blob in self.blobs.items():
+        blob.data[...] = 0
+        blob.diff[...] = 0
+    if zero_param_diffs:
+        for param_name, blob_vec in self.params.items():
+            for blob in blob_vec:
+                blob.diff[...] = 0
+
+
+def _Net_backward_from_layer(self, start_name, start_diff, diffs=None, zero_higher=False):
+    """
+    Backward pass starting from somewhere in the middle of the
+    network, starting with the provided diffs.
+
+    Take
+    start_name: layer at which to begin the backward pass
+    start_diff: diff to set at start_name layer
+    diffs: list of diffs to return in addition to bottom diffs.
+    zero_higher: whether or not to zero out higher layers to reflect the true 0 derivative or leave them alone to save time.
+
+    Give
+    outs: {blob name: diff ndarray} dict.
+    """
+
+    if start_diff.shape != self.blobs[start_name].diff.shape:
+        raise Exception('Expected start_diff of shape %s but got %s' % (self.blobs[start_name].diff.shape, start_diff.shape))
+
+    self.blobs[start_name].diff[...] = start_diff
+
+    if zero_higher:
+        past_start = False
+        for blob_name, blob in self.blobs.items():
+            if past_start:
+                blob.diff[...] = 0
+            if blob_name == start_name:
+                past_start = True
+
+    return self.backward(start=start_name, diffs=diffs)
+
+
+def _Net_deconv_from_layer(self, start_name, start_diff, diffs=None, zero_higher=False):
+    """
+    Deconv pass starting from somewhere in the middle of the
+    network, starting with the provided diffs.
+
+    Take
+    start_name: layer at which to begin the deconv pass
+    start_diff: diff to set at start_name layer
+    diffs: list of diffs to return in addition to bottom diffs.
+    zero_higher: whether or not to zero out higher layers to reflect the true 0 derivative or leave them alone to save time.
+
+    Give
+    outs: {blob name: diff ndarray} dict.
+    """
+
+    if start_diff.shape != self.blobs[start_name].diff.shape:
+        raise Exception('Expected start_diff of shape %s but got %s' % (self.blobs[start_name].diff.shape, start_diff.shape))
+
+    self.blobs[start_name].diff[...] = start_diff
+
+    if zero_higher:
+        past_start = False
+        for blob_name, blob in self.blobs.items():
+            if past_start:
+                blob.diff[...] = 0
+            if blob_name == start_name:
+                past_start = True
+
+    return self.deconv(start=start_name, diffs=diffs)
+
+
+def _Net_deconv(self, diffs=None, start=None, end=None, **kwargs):
+    """
+    Deconv pass: prepare diffs and run the net backward in deconv mode. Just like _Net_Backward but calls Deconv instead.
+
+    Take
+    diffs: list of diffs to return in addition to bottom diffs.
+    kwargs: Keys are output blob names and values are diff ndarrays.
+            If None, top diffs are taken from forward loss.
+    start: optional name of layer at which to begin the backward pass
+    end: optional name of layer at which to finish the backward pass (inclusive)
+
+    Give
+    outs: {blob name: diff ndarray} dict.
+    """
+    if diffs is None:
+        diffs = []
+
+    if start is not None:
+        start_ind = list(self._layer_names).index(start)
+    else:
+        start_ind = len(self.layers) - 1
+
+    if end is not None:
+        end_ind = list(self._layer_names).index(end)
+        outputs = set([end] + diffs)
+    else:
+        end_ind = 0
+        outputs = set(self.inputs + diffs)
+
+    if kwargs:
+        if set(kwargs.keys()) != set(self.outputs):
+            raise Exception('Top diff arguments do not match net outputs.')
+        # Set top diffs according to defined shapes and make arrays single and
+        # C-contiguous as Caffe expects.
+        for top, diff in kwargs.iteritems():
+            if diff.ndim != 4:
+                raise Exception('{} diff is not 4-d'.format(top))
+            if diff.shape[0] != self.blobs[top].num:
+                raise Exception('Diff is not batch sized')
+            self.blobs[top].diff[...] = diff
+
+    self._deconv(start_ind, end_ind)
+
+    # Unpack diffs to extract
+    return {out: self.blobs[out].diff for out in outputs}
+
+
 @property
 def _Net_inputs(self):
     if not hasattr(self, '_input_list'):
@@ -333,8 +460,12 @@ def get_id_name(self):
 Net.blob_loss_weights = _Net_blob_loss_weights
 Net.layer_dict = _Net_layer_dict
 Net.params = _Net_params
+Net.zero = _Net_zero
+Net.backward_from_layer = _Net_backward_from_layer
+Net.deconv_from_layer = _Net_deconv_from_layer
 Net.forward = _Net_forward
 Net.backward = _Net_backward
+Net.deconv = _Net_deconv
 Net.forward_all = _Net_forward_all
 Net.forward_backward_all = _Net_forward_backward_all
 Net.set_input_arrays = _Net_set_input_arrays
diff --git a/src/caffe/layers/lrn_layer.cpp b/src/caffe/layers/lrn_layer.cpp
index 210525e20f3..0be7911a3be 100644
--- a/src/caffe/layers/lrn_layer.cpp
+++ b/src/caffe/layers/lrn_layer.cpp
@@ -14,6 +14,7 @@ void LRNLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
   alpha_ = this->layer_param_.lrn_param().alpha();
   beta_ = this->layer_param_.lrn_param().beta();
   k_ = this->layer_param_.lrn_param().k();
+  deconv_ignore_ = this->layer_param_.lrn_param().deconv_ignore();
   if (this->layer_param_.lrn_param().norm_region() ==
       LRNParameter_NormRegion_WITHIN_CHANNEL) {
     // Set up split_layer_ to use inputs in the numerator and denominator.
@@ -246,10 +247,36 @@ void LRNLayer<Dtype>::WithinChannelBackward(
   }
 }
 
+template <typename Dtype>
+void LRNLayer<Dtype>::Deconv_cpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  if (deconv_ignore_) {
+    // Deconv Option 1: pass through (ignore LRN layer):
+    Deconv_passthrough_cpu(top, propagate_down, bottom);
+  } else {
+    // Deconv Option 2: compute derivatives via backprop:
+    Backward_cpu(top, propagate_down, bottom);
+  }
+}
+
+template <typename Dtype>
+void LRNLayer<Dtype>::Deconv_passthrough_cpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  if (propagate_down[0]) {
+    const Dtype* top_diff = top[0]->cpu_diff();
+    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
+    const int count = bottom[0]->count();
+    for (int i = 0; i < count; ++i) {
+      bottom_diff[i] = top_diff[i];
+    }
+  }  
+}
+
 #ifdef CPU_ONLY
-STUB_GPU(LRNLayer);
+STUB_GPU_WITH_DECONV(LRNLayer);
 STUB_GPU_FORWARD(LRNLayer, CrossChannelForward);
 STUB_GPU_BACKWARD(LRNLayer, CrossChannelBackward);
+STUB_GPU_DECONV(LRNLayer, Deconv_passthrough);
 #endif
 
 INSTANTIATE_CLASS(LRNLayer);
diff --git a/src/caffe/layers/lrn_layer.cu b/src/caffe/layers/lrn_layer.cu
index 26e619c7569..a5ee2387df7 100644
--- a/src/caffe/layers/lrn_layer.cu
+++ b/src/caffe/layers/lrn_layer.cu
@@ -195,8 +195,50 @@ template void LRNLayer<double>::CrossChannelBackward_gpu(
     const vector<Blob<double>*>& top, const vector<bool>& propagate_down,
     const vector<Blob<double>*>& bottom);
 
+template <typename Dtype>
+void LRNLayer<Dtype>::Deconv_gpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  if (deconv_ignore_) {
+    // Deconv Option 1: pass through (ignore LRN layer):
+    Deconv_passthrough_gpu(top, propagate_down, bottom);
+  } else {
+    // Deconv Option 2: compute derivatives via backprop:
+    Backward_gpu(top, propagate_down, bottom);
+  }
+}
+
+template <typename Dtype>
+__global__ void LRNDeconv_passthrough(const int n, const Dtype* in_diff,
+    Dtype* out_diff) {
+  CUDA_KERNEL_LOOP(index, n) {
+    out_diff[index] = in_diff[index];
+  }
+}
+
+template <typename Dtype>
+void LRNLayer<Dtype>::Deconv_passthrough_gpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+  // Option 2: pass through (ignore LRN layer)
+  if (propagate_down[0]) {
+    const Dtype* top_diff = top[0]->gpu_diff();
+    Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+    const int count = bottom[0]->count();
+    // NOLINT_NEXT_LINE(whitespace/operators)
+    LRNDeconv_passthrough<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+        count, top_diff, bottom_diff);
+    CUDA_POST_KERNEL_CHECK;
+  }
+}
+template void LRNLayer<float>::Deconv_passthrough_gpu(
+    const vector<Blob<float>*>& top, const vector<bool>& propagate_down,
+    const vector<Blob<float>*>& bottom);
+template void LRNLayer<double>::Deconv_passthrough_gpu(
+    const vector<Blob<double>*>& top, const vector<bool>& propagate_down,
+    const vector<Blob<double>*>& bottom);
+
 
 
-INSTANTIATE_LAYER_GPU_FUNCS(LRNLayer);
+//INSTANTIATE_LAYER_GPU_FUNCS(LRNLayer);
+INSTANTIATE_LAYER_GPU_FUNCS_WITH_DECONV(LRNLayer);
 
 }  // namespace caffe
diff --git a/src/caffe/layers/relu_layer.cpp b/src/caffe/layers/relu_layer.cpp
index 92a729c81bd..2f0e52b33f0 100644
--- a/src/caffe/layers/relu_layer.cpp
+++ b/src/caffe/layers/relu_layer.cpp
@@ -35,9 +35,26 @@ void ReLULayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
   }
 }
 
+template <typename Dtype>
+void ReLULayer<Dtype>::Deconv_cpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down,
+    const vector<Blob<Dtype>*>& bottom) {
+  if (propagate_down[0]) {
+    const Dtype* top_diff = top[0]->cpu_diff();
+    Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
+    const int count = bottom[0]->count();
+    Dtype negative_slope = this->layer_param_.relu_param().negative_slope();
+    if (negative_slope != Dtype(0))
+      LOG(WARNING) << "negative_slope parameter = " << negative_slope << " but nonzero negative_slope params are not supported for Deconv through RELU.";
+    for (int i = 0; i < count; ++i) {
+      bottom_diff[i] = std::max(top_diff[i], Dtype(0));
+    }
+  }
+}
+
 
 #ifdef CPU_ONLY
-STUB_GPU(ReLULayer);
+STUB_GPU_WITH_DECONV(ReLULayer);
 #endif
 
 INSTANTIATE_CLASS(ReLULayer);
diff --git a/src/caffe/layers/relu_layer.cu b/src/caffe/layers/relu_layer.cu
index 4bf15b3aad3..3905448727b 100644
--- a/src/caffe/layers/relu_layer.cu
+++ b/src/caffe/layers/relu_layer.cu
@@ -57,8 +57,36 @@ void ReLULayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   }
 }
 
+template <typename Dtype>
+__global__ void ReLUDeconv(const int n, const Dtype* in_diff,
+    Dtype* out_diff) {
+  CUDA_KERNEL_LOOP(index, n) {
+    out_diff[index] = in_diff[index] > 0 ? in_diff[index] : 0;
+  }
+}
+
+template <typename Dtype>
+void ReLULayer<Dtype>::Deconv_gpu(const vector<Blob<Dtype>*>& top,
+    const vector<bool>& propagate_down,
+    const vector<Blob<Dtype>*>& bottom) {
+  if (propagate_down[0]) {
+    const Dtype* bottom_data = bottom[0]->gpu_data();
+    const Dtype* top_diff = top[0]->gpu_diff();
+    Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+    const int count = bottom[0]->count();
+    Dtype negative_slope = this->layer_param_.relu_param().negative_slope();
+    if (negative_slope != Dtype(0))
+      LOG(WARNING) << "negative_slope parameter = " << negative_slope << " but nonzero negative_slope params are not supported for Deconv through RELU.";
+    // NOLINT_NEXT_LINE(whitespace/operators)
+    ReLUDeconv<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+        count, top_diff, bottom_diff);
+    CUDA_POST_KERNEL_CHECK;
+  }
+}
+
 
-INSTANTIATE_LAYER_GPU_FUNCS(ReLULayer);
+//INSTANTIATE_LAYER_GPU_FUNCS(ReLULayer);
+INSTANTIATE_LAYER_GPU_FUNCS_WITH_DECONV(ReLULayer);
 
 
 }  // namespace caffe
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index 353c2f95b9e..4759804033f 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -582,6 +582,19 @@ void Net<Dtype>::BackwardFromTo(int start, int end) {
   }
 }
 
+template <typename Dtype>
+void Net<Dtype>::DeconvFromTo(int start, int end) {
+  CHECK_GE(end, 0);
+  CHECK_LT(start, layers_.size());
+  for (int i = start; i >= end; --i) {
+    if (layer_need_backward_[i]) {
+      layers_[i]->Deconv(
+          top_vecs_[i], bottom_need_backward_[i], bottom_vecs_[i]);
+      if (debug_info_) { DeconvDebugInfo(i); }
+    }
+  }
+}
+
 template <typename Dtype>
 void Net<Dtype>::ForwardDebugInfo(const int layer_id) {
   for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) {
@@ -635,6 +648,29 @@ void Net<Dtype>::BackwardDebugInfo(const int layer_id) {
   }
 }
 
+template <typename Dtype>
+void Net<Dtype>::DeconvDebugInfo(const int layer_id) {
+  const vector<Blob<Dtype>*>& bottom_vec = bottom_vecs_[layer_id];
+  for (int bottom_id = 0; bottom_id < bottom_vec.size(); ++bottom_id) {
+    if (!bottom_need_backward_[layer_id][bottom_id]) { continue; }
+    const Blob<Dtype>& blob = *bottom_vec[bottom_id];
+    const string& blob_name = blob_names_[bottom_id_vecs_[layer_id][bottom_id]];
+    const Dtype diff_abs_val_mean = blob.asum_diff() / blob.count();
+    LOG(INFO) << "    [Deconv] "
+        << "Layer " << layer_names_[layer_id] << ", bottom blob " << blob_name
+        << " diff: " << diff_abs_val_mean;
+  }
+  for (int param_id = 0; param_id < layers_[layer_id]->blobs().size();
+       ++param_id) {
+    if (!layers_[layer_id]->param_propagate_down(param_id)) { continue; }
+    const Blob<Dtype>& blob = *layers_[layer_id]->blobs()[param_id];
+    const Dtype diff_abs_val_mean = blob.asum_diff() / blob.count();
+    LOG(INFO) << "    [Deconv] "
+        << "Layer " << layer_names_[layer_id] << ", param blob " << param_id
+        << " diff: " << diff_abs_val_mean;
+  }
+}
+
 template <typename Dtype>
 void Net<Dtype>::UpdateDebugInfo(const int param_id) {
   const Blob<Dtype>& blob = *params_[param_id];
@@ -722,6 +758,21 @@ void Net<Dtype>::Backward() {
   }
 }
 
+template <typename Dtype>
+void Net<Dtype>::DeconvFrom(int start) {
+  DeconvFromTo(start, 0);
+}
+
+template <typename Dtype>
+void Net<Dtype>::DeconvTo(int end) {
+  DeconvFromTo(layers_.size() - 1, end);
+}
+
+template <typename Dtype>
+void Net<Dtype>::Deconv() {
+  DeconvFromTo(layers_.size() - 1, 0);
+}
+
 template <typename Dtype>
 void Net<Dtype>::Reshape() {
   for (int i = 0; i < layers_.size(); ++i) {
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto
index c96966b589d..614f16c240e 100644
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -654,6 +654,7 @@ message DataParameter {
   // DEPRECATED. Each solver accesses a different subset of the database.
   optional uint32 rand_skip = 7 [default = 0];
   optional DB backend = 8 [default = LEVELDB];
+
   // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
   // simple scaling and subtracting the data mean, if provided. Note that the
   // mean subtraction is always carried out before scaling.
@@ -870,6 +871,13 @@ message LRNParameter {
     CUDNN = 2;
   }
   optional Engine engine = 6 [default = DEFAULT];
+
+  // Whether or not to skip the LRN layer during a deconv pass.  If
+  // this is true, activations in a deconv will pass through the LRN
+  // layer unaffected. If it is false, deconv activations will be
+  // affected by LRN layers the same as backprop diffs are (will pass
+  // through the derivative of the layer).
+  optional bool deconv_ignore = 7 [default = false];
 }
 
 message MemoryDataParameter {

From 7fec25bad4405fb97b9a48d7307d8a8bb8bacae9 Mon Sep 17 00:00:00 2001
From: arikpoz <arik.com@gmail.com>
Date: Fri, 8 Sep 2017 08:14:15 +0300
Subject: [PATCH 264/264] - added deconv_type to support 'Guided Backprop' next
 to the existing 'Zeiler & Fergus' - fix bug related to layer name vs blob
 name confusion

---
 include/caffe/common.hpp                |  6 +++--
 include/caffe/layer.hpp                 | 24 ++++++++++-------
 include/caffe/layers/lrn_layer.hpp      |  8 +++---
 include/caffe/layers/relu_layer.hpp     |  4 +--
 include/caffe/net.hpp                   |  8 +++---
 include/caffe/util/device_alternate.hpp |  4 +--
 python/caffe/pycaffe.py                 | 36 ++++++++++++++++---------
 src/caffe/layers/lrn_layer.cpp          |  6 ++---
 src/caffe/layers/lrn_layer.cu           | 10 +++----
 src/caffe/layers/relu_layer.cpp         | 18 ++++++++++---
 src/caffe/layers/relu_layer.cu          | 33 ++++++++++++++++++-----
 src/caffe/net.cpp                       | 16 +++++------
 12 files changed, 113 insertions(+), 60 deletions(-)

diff --git a/include/caffe/common.hpp b/include/caffe/common.hpp
index f9e7194d1e6..23c6e59f87e 100644
--- a/include/caffe/common.hpp
+++ b/include/caffe/common.hpp
@@ -65,11 +65,13 @@ private:\
   template void classname<float>::Deconv_gpu( \
       const std::vector<Blob<float>*>& top, \
       const std::vector<bool>& propagate_down, \
-      const std::vector<Blob<float>*>& bottom); \
+      const std::vector<Blob<float>*>& bottom, \
+      int deconv_type); \
   template void classname<double>::Deconv_gpu( \
       const std::vector<Blob<double>*>& top, \
       const std::vector<bool>& propagate_down, \
-      const std::vector<Blob<double>*>& bottom)
+      const std::vector<Blob<double>*>& bottom, \
+      int deconv_type)
 
 #define INSTANTIATE_LAYER_GPU_FUNCS(classname) \
   INSTANTIATE_LAYER_GPU_FORWARD(classname); \
diff --git a/include/caffe/layer.hpp b/include/caffe/layer.hpp
index 81b35657070..881d7311503 100644
--- a/include/caffe/layer.hpp
+++ b/include/caffe/layer.hpp
@@ -170,7 +170,8 @@ class Layer {
    */
   inline void Deconv(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down,
-      const vector<Blob<Dtype>*>& bottom);
+      const vector<Blob<Dtype>*>& bottom,
+      int deconv_type);
 
   /**
    * @brief Returns the vector of learnable parameter blobs.
@@ -360,23 +361,27 @@ class Layer {
   }
 
   /**
-   * @brief Using the CPU device, compute the deconv (Zeiler et al, 2013) for the bottom blobs.
+   * @brief Using the CPU device, compute the deconv for the bottom blobs.
+   *        deconv is calclated either using Zeiler et al, 2013 or via "guided backprop"
    */
   virtual void Deconv_cpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down,
-      const vector<Blob<Dtype>*>& bottom) {
+      const vector<Blob<Dtype>*>& bottom,
+      int deconv_type) {
     // LOG(WARNING) << "Explicit Deconv_cpu not implemented for " << type() << " yet; falling back to backward_cpu.";
     Backward_cpu(top, propagate_down, bottom);
   }
   /**
-   * @brief Using the GPU device, compute the deconv (Zeiler et al, 2013) for the bottom blobs.
+   * @brief Using the GPU device, compute the deconv for the bottom blobs.
+   *        deconv is calclated either using Zeiler et al, 2013 or via "guided backprop"
    *        Fall back to Deconv_cpu() if unavailable.
    */
   virtual void Deconv_gpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down,
-      const vector<Blob<Dtype>*>& bottom) {
+      const vector<Blob<Dtype>*>& bottom,
+      int deconv_type) {
     // LOG(WARNING) << "Explicit Deconv_gpu not implemented for " << type() << " yet; falling back to backward_gpu.";
-    Backward_gpu(top, propagate_down, bottom);
+    Deconv_cpu(top, propagate_down, bottom, deconv_type);
   }
 
   /**
@@ -505,13 +510,14 @@ inline void Layer<Dtype>::Backward(const vector<Blob<Dtype>*>& top,
 template <typename Dtype>
 inline void Layer<Dtype>::Deconv(const vector<Blob<Dtype>*>& top,
     const vector<bool>& propagate_down,
-    const vector<Blob<Dtype>*>& bottom) {
+    const vector<Blob<Dtype>*>& bottom,
+    int deconv_type) {
   switch (Caffe::mode()) {
   case Caffe::CPU:
-    Deconv_cpu(top, propagate_down, bottom);
+    Deconv_cpu(top, propagate_down, bottom, deconv_type);
     break;
   case Caffe::GPU:
-    Deconv_gpu(top, propagate_down, bottom);
+    Deconv_gpu(top, propagate_down, bottom, deconv_type);
     break;
   default:
     LOG(FATAL) << "Unknown caffe mode.";
diff --git a/include/caffe/layers/lrn_layer.hpp b/include/caffe/layers/lrn_layer.hpp
index 767f297c7b2..840ca76724a 100644
--- a/include/caffe/layers/lrn_layer.hpp
+++ b/include/caffe/layers/lrn_layer.hpp
@@ -44,13 +44,13 @@ class LRNLayer : public Layer<Dtype> {
       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 
   virtual void Deconv_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom, int deconv_type);
   virtual void Deconv_gpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom, int deconv_type);
   virtual void Deconv_passthrough_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom, int deconv_type);
   virtual void Deconv_passthrough_gpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom, int deconv_type);
 
   virtual void CrossChannelForward_cpu(const vector<Blob<Dtype>*>& bottom,
       const vector<Blob<Dtype>*>& top);
diff --git a/include/caffe/layers/relu_layer.hpp b/include/caffe/layers/relu_layer.hpp
index b79dec17671..18351c2e661 100644
--- a/include/caffe/layers/relu_layer.hpp
+++ b/include/caffe/layers/relu_layer.hpp
@@ -80,9 +80,9 @@ class ReLULayer : public NeuronLayer<Dtype> {
       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
 
   virtual void Deconv_cpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom, int deconv_type);
   virtual void Deconv_gpu(const vector<Blob<Dtype>*>& top,
-      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
+      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom, int deconv_type);
 };
 
 }  // namespace caffe
diff --git a/include/caffe/net.hpp b/include/caffe/net.hpp
index f4f4243a365..b2ed259dbbb 100644
--- a/include/caffe/net.hpp
+++ b/include/caffe/net.hpp
@@ -77,10 +77,10 @@ class Net {
   /**
    * The network deconv works similarly to backward and also takes no input and output.
    */
-  void Deconv();
-  void DeconvFromTo(int start, int end);
-  void DeconvFrom(int start);
-  void DeconvTo(int end);
+  void Deconv(int deconv_type);
+  void DeconvFromTo(int start, int end, int deconv_type);
+  void DeconvFrom(int start, int deconv_type);
+  void DeconvTo(int end, int deconv_type);
 
   /**
    * @brief Reshape all layers from bottom to top.
diff --git a/include/caffe/util/device_alternate.hpp b/include/caffe/util/device_alternate.hpp
index 496d4a073ae..a8c8adff332 100644
--- a/include/caffe/util/device_alternate.hpp
+++ b/include/caffe/util/device_alternate.hpp
@@ -29,7 +29,7 @@ void classname<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, \
 template <typename Dtype> \
 void classname<Dtype>::Deconv_gpu(const vector<Blob<Dtype>*>& top, \
     const vector<bool>& propagate_down, \
-    const vector<Blob<Dtype>*>& bottom) { NO_GPU; } \
+    const vector<Blob<Dtype>*>& bottom, int deconv_type) { NO_GPU; } \
 
 #define STUB_GPU_FORWARD(classname, funcname) \
 template <typename Dtype> \
@@ -46,7 +46,7 @@ void classname<Dtype>::funcname##_##gpu(const vector<Blob<Dtype>*>& top, \
 template <typename Dtype> \
 void classname<Dtype>::funcname##_##gpu(const vector<Blob<Dtype>*>& top, \
     const vector<bool>& propagate_down, \
-    const vector<Blob<Dtype>*>& bottom) { NO_GPU; } \
+    const vector<Blob<Dtype>*>& bottom, int deconv_type) { NO_GPU; } \
 
 #else  // Normal GPU + CPU Caffe.
 
diff --git a/python/caffe/pycaffe.py b/python/caffe/pycaffe.py
index 2f8deaf20db..f16f7ace5ea 100644
--- a/python/caffe/pycaffe.py
+++ b/python/caffe/pycaffe.py
@@ -102,23 +102,24 @@ def _Net_backward_from_layer(self, start_name, start_diff, diffs=None, zero_high
     outs: {blob name: diff ndarray} dict.
     """
 
-    if start_diff.shape != self.blobs[start_name].diff.shape:
-        raise Exception('Expected start_diff of shape %s but got %s' % (self.blobs[start_name].diff.shape, start_diff.shape))
+    start_top_name = self.top_names[start_name][0]
+    if start_diff.shape != self.blobs[start_top_name].diff.shape:
+        raise Exception('Expected start_diff of shape %s but got %s' % (self.blobs[start_top_name].diff.shape, start_diff.shape))
 
-    self.blobs[start_name].diff[...] = start_diff
+    self.blobs[start_top_name].diff[...] = start_diff
 
     if zero_higher:
         past_start = False
         for blob_name, blob in self.blobs.items():
             if past_start:
                 blob.diff[...] = 0
-            if blob_name == start_name:
+            if blob_name == start_top_name:
                 past_start = True
 
     return self.backward(start=start_name, diffs=diffs)
 
 
-def _Net_deconv_from_layer(self, start_name, start_diff, diffs=None, zero_higher=False):
+def _Net_deconv_from_layer(self, start_name, start_diff, diffs=None, zero_higher=False, deconv_type='Zeiler & Fergus'):
     """
     Deconv pass starting from somewhere in the middle of the
     network, starting with the provided diffs.
@@ -128,28 +129,38 @@ def _Net_deconv_from_layer(self, start_name, start_diff, diffs=None, zero_higher
     start_diff: diff to set at start_name layer
     diffs: list of diffs to return in addition to bottom diffs.
     zero_higher: whether or not to zero out higher layers to reflect the true 0 derivative or leave them alone to save time.
+    deconv_type: either 'Zeiler & Fergus' or 'Guided Backprop'
 
     Give
     outs: {blob name: diff ndarray} dict.
     """
 
-    if start_diff.shape != self.blobs[start_name].diff.shape:
-        raise Exception('Expected start_diff of shape %s but got %s' % (self.blobs[start_name].diff.shape, start_diff.shape))
+    # convert deconv type string to int value
+    if deconv_type == 'Zeiler & Fergus':
+        deconv_type_int = 0
+    elif deconv_type == 'Guided Backprop':
+        deconv_type_int = 1
+    else:
+        raise Exception('Unsupported deconv type: %s' % (deconv_type))
+
+    start_top_name = self.top_names[start_name][0]
+    if start_diff.shape != self.blobs[start_top_name].diff.shape:
+        raise Exception('Expected start_diff of shape %s but got %s' % (self.blobs[start_top_name].diff.shape, start_diff.shape))
 
-    self.blobs[start_name].diff[...] = start_diff
+    self.blobs[start_top_name].diff[...] = start_diff
 
     if zero_higher:
         past_start = False
         for blob_name, blob in self.blobs.items():
             if past_start:
                 blob.diff[...] = 0
-            if blob_name == start_name:
+            if blob_name == start_top_name:
                 past_start = True
 
-    return self.deconv(start=start_name, diffs=diffs)
+    return self.deconv(start=start_name, diffs=diffs, deconv_type_int=deconv_type_int)
 
 
-def _Net_deconv(self, diffs=None, start=None, end=None, **kwargs):
+def _Net_deconv(self, diffs=None, start=None, end=None, deconv_type_int=0, **kwargs):
     """
     Deconv pass: prepare diffs and run the net backward in deconv mode. Just like _Net_Backward but calls Deconv instead.
 
@@ -159,6 +170,7 @@ def _Net_deconv(self, diffs=None, start=None, end=None, **kwargs):
             If None, top diffs are taken from forward loss.
     start: optional name of layer at which to begin the backward pass
     end: optional name of layer at which to finish the backward pass (inclusive)
+    deconv_type_int: type of deconv to use, 0 for ZF, 1 for guided backprop
 
     Give
     outs: {blob name: diff ndarray} dict.
@@ -190,7 +202,7 @@ def _Net_deconv(self, diffs=None, start=None, end=None, **kwargs):
                 raise Exception('Diff is not batch sized')
             self.blobs[top].diff[...] = diff
 
-    self._deconv(start_ind, end_ind)
+    self._deconv(start_ind, end_ind, deconv_type_int)
 
     # Unpack diffs to extract
     return {out: self.blobs[out].diff for out in outputs}
diff --git a/src/caffe/layers/lrn_layer.cpp b/src/caffe/layers/lrn_layer.cpp
index 0be7911a3be..f7520f1408f 100644
--- a/src/caffe/layers/lrn_layer.cpp
+++ b/src/caffe/layers/lrn_layer.cpp
@@ -249,10 +249,10 @@ void LRNLayer<Dtype>::WithinChannelBackward(
 
 template <typename Dtype>
 void LRNLayer<Dtype>::Deconv_cpu(const vector<Blob<Dtype>*>& top,
-    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom, int deconv_type) {
   if (deconv_ignore_) {
     // Deconv Option 1: pass through (ignore LRN layer):
-    Deconv_passthrough_cpu(top, propagate_down, bottom);
+    Deconv_passthrough_cpu(top, propagate_down, bottom, deconv_type);
   } else {
     // Deconv Option 2: compute derivatives via backprop:
     Backward_cpu(top, propagate_down, bottom);
@@ -261,7 +261,7 @@ void LRNLayer<Dtype>::Deconv_cpu(const vector<Blob<Dtype>*>& top,
 
 template <typename Dtype>
 void LRNLayer<Dtype>::Deconv_passthrough_cpu(const vector<Blob<Dtype>*>& top,
-    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom, int deconv_type) {
   if (propagate_down[0]) {
     const Dtype* top_diff = top[0]->cpu_diff();
     Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
diff --git a/src/caffe/layers/lrn_layer.cu b/src/caffe/layers/lrn_layer.cu
index a5ee2387df7..614af2e23db 100644
--- a/src/caffe/layers/lrn_layer.cu
+++ b/src/caffe/layers/lrn_layer.cu
@@ -197,10 +197,10 @@ template void LRNLayer<double>::CrossChannelBackward_gpu(
 
 template <typename Dtype>
 void LRNLayer<Dtype>::Deconv_gpu(const vector<Blob<Dtype>*>& top,
-    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom, int deconv_type) {
   if (deconv_ignore_) {
     // Deconv Option 1: pass through (ignore LRN layer):
-    Deconv_passthrough_gpu(top, propagate_down, bottom);
+    Deconv_passthrough_gpu(top, propagate_down, bottom, deconv_type);
   } else {
     // Deconv Option 2: compute derivatives via backprop:
     Backward_gpu(top, propagate_down, bottom);
@@ -217,7 +217,7 @@ __global__ void LRNDeconv_passthrough(const int n, const Dtype* in_diff,
 
 template <typename Dtype>
 void LRNLayer<Dtype>::Deconv_passthrough_gpu(const vector<Blob<Dtype>*>& top,
-    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
+    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom, int deconv_type) {
   // Option 2: pass through (ignore LRN layer)
   if (propagate_down[0]) {
     const Dtype* top_diff = top[0]->gpu_diff();
@@ -231,10 +231,10 @@ void LRNLayer<Dtype>::Deconv_passthrough_gpu(const vector<Blob<Dtype>*>& top,
 }
 template void LRNLayer<float>::Deconv_passthrough_gpu(
     const vector<Blob<float>*>& top, const vector<bool>& propagate_down,
-    const vector<Blob<float>*>& bottom);
+    const vector<Blob<float>*>& bottom, int deconv_type);
 template void LRNLayer<double>::Deconv_passthrough_gpu(
     const vector<Blob<double>*>& top, const vector<bool>& propagate_down,
-    const vector<Blob<double>*>& bottom);
+    const vector<Blob<double>*>& bottom, int deconv_type);
 
 
 
diff --git a/src/caffe/layers/relu_layer.cpp b/src/caffe/layers/relu_layer.cpp
index 2f0e52b33f0..6858be0828d 100644
--- a/src/caffe/layers/relu_layer.cpp
+++ b/src/caffe/layers/relu_layer.cpp
@@ -38,16 +38,28 @@ void ReLULayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
 template <typename Dtype>
 void ReLULayer<Dtype>::Deconv_cpu(const vector<Blob<Dtype>*>& top,
     const vector<bool>& propagate_down,
-    const vector<Blob<Dtype>*>& bottom) {
+    const vector<Blob<Dtype>*>& bottom, int deconv_type) {
+
   if (propagate_down[0]) {
+    const Dtype* bottom_data = bottom[0]->cpu_data();
     const Dtype* top_diff = top[0]->cpu_diff();
     Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
     const int count = bottom[0]->count();
     Dtype negative_slope = this->layer_param_.relu_param().negative_slope();
     if (negative_slope != Dtype(0))
       LOG(WARNING) << "negative_slope parameter = " << negative_slope << " but nonzero negative_slope params are not supported for Deconv through RELU.";
-    for (int i = 0; i < count; ++i) {
-      bottom_diff[i] = std::max(top_diff[i], Dtype(0));
+
+    // Zeiler & Fergus deconv
+    if (deconv_type == 0) {
+      for (int i = 0; i < count; ++i) {
+        bottom_diff[i] = std::max(top_diff[i], Dtype(0));
+      }
+    }
+    // "guided backprop" deconv
+    else if (deconv_type == 1) {
+      for (int i = 0; i < count; ++i) {
+        bottom_diff[i] = top_diff[i] * ((bottom_data[i] > 0) + negative_slope * (bottom_data[i] <= 0)) * (top_diff[i] > 0);
+      }
     }
   }
 }
diff --git a/src/caffe/layers/relu_layer.cu b/src/caffe/layers/relu_layer.cu
index 3905448727b..61d1251e665 100644
--- a/src/caffe/layers/relu_layer.cu
+++ b/src/caffe/layers/relu_layer.cu
@@ -58,17 +58,27 @@ void ReLULayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
 }
 
 template <typename Dtype>
-__global__ void ReLUDeconv(const int n, const Dtype* in_diff,
-    Dtype* out_diff) {
+__global__ void ReLUDeconv_ZF(const int n, const Dtype* in_diff,
+    const Dtype* in_data, Dtype* out_diff, Dtype negative_slope) {
   CUDA_KERNEL_LOOP(index, n) {
+    // Zeiler & Fergus deconv
     out_diff[index] = in_diff[index] > 0 ? in_diff[index] : 0;
   }
 }
 
+template <typename Dtype>
+__global__ void ReLUDeconv_GB(const int n, const Dtype* in_diff,
+    const Dtype* in_data, Dtype* out_diff, Dtype negative_slope) {
+  CUDA_KERNEL_LOOP(index, n) {
+    // "guided backprop" deconv
+    out_diff[index] = in_diff[index] * ((in_data[index] > 0) + (in_data[index] <= 0) * negative_slope) * (in_diff[index] > 0);
+  }
+}
+
 template <typename Dtype>
 void ReLULayer<Dtype>::Deconv_gpu(const vector<Blob<Dtype>*>& top,
     const vector<bool>& propagate_down,
-    const vector<Blob<Dtype>*>& bottom) {
+    const vector<Blob<Dtype>*>& bottom, int deconv_type) {
   if (propagate_down[0]) {
     const Dtype* bottom_data = bottom[0]->gpu_data();
     const Dtype* top_diff = top[0]->gpu_diff();
@@ -77,9 +87,20 @@ void ReLULayer<Dtype>::Deconv_gpu(const vector<Blob<Dtype>*>& top,
     Dtype negative_slope = this->layer_param_.relu_param().negative_slope();
     if (negative_slope != Dtype(0))
       LOG(WARNING) << "negative_slope parameter = " << negative_slope << " but nonzero negative_slope params are not supported for Deconv through RELU.";
-    // NOLINT_NEXT_LINE(whitespace/operators)
-    ReLUDeconv<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
-        count, top_diff, bottom_diff);
+
+    // Zeiler & Fergus deconv
+    if (deconv_type == 0) {
+      // NOLINT_NEXT_LINE(whitespace/operators)
+      ReLUDeconv_ZF<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+          count, top_diff, bottom_data, bottom_diff, negative_slope);
+    }
+    // "guided backprop" deconv
+    else if (deconv_type == 1) {
+      // NOLINT_NEXT_LINE(whitespace/operators)
+      ReLUDeconv_GB<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
+          count, top_diff, bottom_data, bottom_diff, negative_slope);
+    }
+
     CUDA_POST_KERNEL_CHECK;
   }
 }
diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp
index 4759804033f..d8ccbe44139 100644
--- a/src/caffe/net.cpp
+++ b/src/caffe/net.cpp
@@ -583,13 +583,13 @@ void Net<Dtype>::BackwardFromTo(int start, int end) {
 }
 
 template <typename Dtype>
-void Net<Dtype>::DeconvFromTo(int start, int end) {
+void Net<Dtype>::DeconvFromTo(int start, int end, int deconv_type) {
   CHECK_GE(end, 0);
   CHECK_LT(start, layers_.size());
   for (int i = start; i >= end; --i) {
     if (layer_need_backward_[i]) {
       layers_[i]->Deconv(
-          top_vecs_[i], bottom_need_backward_[i], bottom_vecs_[i]);
+          top_vecs_[i], bottom_need_backward_[i], bottom_vecs_[i], deconv_type);
       if (debug_info_) { DeconvDebugInfo(i); }
     }
   }
@@ -759,18 +759,18 @@ void Net<Dtype>::Backward() {
 }
 
 template <typename Dtype>
-void Net<Dtype>::DeconvFrom(int start) {
-  DeconvFromTo(start, 0);
+void Net<Dtype>::DeconvFrom(int start, int deconv_type) {
+  DeconvFromTo(start, 0, deconv_type);
 }
 
 template <typename Dtype>
-void Net<Dtype>::DeconvTo(int end) {
-  DeconvFromTo(layers_.size() - 1, end);
+void Net<Dtype>::DeconvTo(int end, int deconv_type) {
+  DeconvFromTo(layers_.size() - 1, end, deconv_type);
 }
 
 template <typename Dtype>
-void Net<Dtype>::Deconv() {
-  DeconvFromTo(layers_.size() - 1, 0);
+void Net<Dtype>::Deconv(int deconv_type) {
+  DeconvFromTo(layers_.size() - 1, 0, deconv_type);
 }
 
 template <typename Dtype>