diff --git a/dynet/exec.cc b/dynet/exec.cc index 8594dd70d..6240a61a1 100644 --- a/dynet/exec.cc +++ b/dynet/exec.cc @@ -566,8 +566,11 @@ const Tensor& BatchedExecutionEngine::incremental_forward_no_update( *(active_un_end++) = j; } } - for (size_t j = 0; j < (size_t)sigmap.size(); ++j) - prof2avg[j] /= prof2cnt[j]; + for (size_t j = 0; j < (size_t)sigmap.size(); ++j) // -prh sigmap.size() == 40 + { + if(std::abs(prof2cnt[j]) > 1e-8) + prof2avg[j] /= prof2cnt[j]; + } // 2) Travel through and do active nodes while (node_id != (VariableIndex)uptop1) { @@ -1060,7 +1063,7 @@ void BatchedExecutionEngine::backward(VariableIndex from_where, bool full) { // No concatenation whatsoever if (my_batch.concat[ai] == 0) { if (needs_derivative[node2batch[arg]]) { - node->backward(xs, my_batch.nfx, batched_ndEdfs[i], ai, batched_ndEdfs[node2batch[arg]]); + node->backward(xs, my_batch.nfx, batched_ndEdfs[i], ai, ndEdfs[arg]); // cerr << "batched backward[" << i << "](" << ai << ")->" << node2batch[arg] << " == " << print_vec(as_vector(batched_ndEdfs[node2batch[arg]])) << endl; } // Needs concatenation diff --git a/dynet/matrix-multiply.h b/dynet/matrix-multiply.h index 08794fd03..4d7858896 100644 --- a/dynet/matrix-multiply.h +++ b/dynet/matrix-multiply.h @@ -162,6 +162,11 @@ inline void MatrixTranspMultiplyAcc(const dynet::Device_CPU & dev, const dynet:: inline void MatrixMultiplyTranspAcc(const dynet::Device_GPU & dev, const dynet::Tensor& l, const dynet::Tensor& r, dynet::Tensor& y) { int max_b = std::max(l.d.bd, r.d.bd); if(y.d.bd == 1 && (l.d.bd == r.d.bd)) { + DYNET_ARG_CHECK(l.d.rows() == y.d.rows(), "MatrixMultiplyTranspAcc: l.d.rows() != y.d.rows()"); + DYNET_ARG_CHECK(r.d.rows() == y.d.cols(), "MatrixMultiplyTranspAcc: r.d.rows() != y.d.cols()"); + DYNET_ARG_CHECK(l.d.cols() == r.d.cols(), "MatrixMultiplyTranspAcc: l.d.cols() != r.d.cols()"); + DYNET_ARG_CHECK(l.d.batch_elems() == r.d.batch_elems(), "MatrixMultiplyTranspAcc: l.d.batch_elems() != r.d.batch_elems()"); + CUBLAS_CHECK(cublasSgemm(dev.cublas_handle, CUBLAS_OP_N, CUBLAS_OP_T, y.d.rows(), y.d.cols(), l.d.cols() * l.d.batch_elems(), dev.kSCALAR_ONE, @@ -183,6 +188,10 @@ inline void MatrixMultiplyTranspAcc(const dynet::Device_GPU & dev, const dynet:: inline void MatrixMultiplyTranspAcc(const dynet::Device_CPU & dev, const dynet::Tensor& l, const dynet::Tensor& r, dynet::Tensor& y) { int max_b = std::max(l.d.bd, r.d.bd); if(y.d.bd == 1 && (l.d.bd == r.d.bd)) { + DYNET_ARG_CHECK(l.d.rows() == y.d.rows(), "MatrixMultiplyTranspAcc [CPU]: l.d.rows() != y.d.rows()"); + DYNET_ARG_CHECK(r.d.rows() == y.d.cols(), "MatrixMultiplyTranspAcc [CPU]: r.d.rows() != y.d.cols()"); + DYNET_ARG_CHECK(l.d.cols() == r.d.cols(), "MatrixMultiplyTranspAcc [CPU]: l.d.cols() != r.d.cols()"); + DYNET_ARG_CHECK(l.d.batch_elems() == r.d.batch_elems(), "MatrixMultiplyTranspAcc [CPU]: l.d.batch_elems() != r.d.batch_elems()"); mat(y).noalias() += colbatch_matrix(l) * colbatch_matrix(r).transpose(); } else { #ifdef __INTEL_MKL__ diff --git a/dynet/nodes-matrixmultiply.cc b/dynet/nodes-matrixmultiply.cc index 8ee92f2a7..ccf318c6f 100644 --- a/dynet/nodes-matrixmultiply.cc +++ b/dynet/nodes-matrixmultiply.cc @@ -51,6 +51,7 @@ template void MatrixMultiply::forward_dev_impl(const MyDevice & dev, const vector& xs, Tensor& fx) const { DYNET_ASSERT(xs.size() == 2, "Failed dimension check in MatrixMultiply::forward"); DYNET_ARG_CHECK(fx.d.bd == max(xs[0]->d.bd, xs[1]->d.bd), "Failed dimension check in MatrixMultiply::forward"); + DYNET_ARG_CHECK(fx.d.batch_size() == dim_forward({xs[0]->d, xs[1]->d}).batch_size(), "Failed result dimension check in MatrixMultiply::forward"); // fx = mat(fx0) + xs[0] * xs[1] dynet::MatrixMultiply(dev, *xs[0], *xs[1], fx, dev.kSCALAR_ZERO); }