Skip to content

Commit

Permalink
fix valgrind issue
Browse files Browse the repository at this point in the history
  • Loading branch information
boennecd committed Nov 25, 2020
1 parent 7c185c9 commit 138e38b
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 58 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: psqn
Type: Package
Title: Partially Separable Quasi-Newton
Version: 0.1.3
Version: 0.1.4
Authors@R: c(person("Benjamin", "Christoffersen",
email = "boennecd@gmail.com",
role = c("cre", "aut")))
Expand Down
21 changes: 8 additions & 13 deletions cran-comments.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,19 @@
R version 3.6.3
* Ubuntu 18.04 LTS with gcc 8.3.0
R version 3.6.3 with LTO checks
* Ubuntu 18.04 LTS with gcc 8.3.0
R version 3.6.3 with valgrind
* Ubuntu 18.04 LTS with clang-6.0.0
R version 3.6.3 with ASAN and UBSAN
* Ubuntu 16.04 LTS (on travis-ci)
R version 4.0.0
* win-builder (devel and release)
* `rhub::check_for_cran()`
* `rhub::check_on_solaris()`
* `rhub::check(platform = "macos-highsierra-release")`
* `rhub::check_with_valgrind()`

## R CMD check results
There were no WARNINGs or ERRORs.

There is a NOTE about the package size in some cases.

The only error that was left is the issue with LTO. However, notice that
this is because of an update of testthat on 2020-10-31. In particular, they
changed this line:
https://github.com/r-lib/testthat/blob/b684f1ee7fd806ff55a9d901174c4cc77c38b7d0/inst/include/testthat/testthat.h#L165

I submitted my package on 2020-11-04 and did not think that there would be
breaking changes in the testthat package.

For the record, the LTO did __not__ remain. I submitted version 0.1.1 prior
to the testthat 3.0.0 release with the breaking changes. I suspect that a
lot of packages will now fail the LTO check. See
https://github.com/r-lib/testthat/issues/1230
68 changes: 44 additions & 24 deletions src/r-api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,22 +63,28 @@ class r_worker {
g_dim(([&]() -> size_t {
NumericVector dum(0);
scomp_grad[0] = false;
SEXP res = f(f_idx, dum, scomp_grad);
if(!Rf_isInteger(res) or !Rf_isVector(res) or Rf_xlength(res) != 2L)
SEXP res = PROTECT(f(f_idx, dum, scomp_grad));
if(!Rf_isInteger(res) or !Rf_isVector(res) or Rf_xlength(res) != 2L){
UNPROTECT(1);
throw std::invalid_argument(
"fn returns invalid lengths with zero length par");

return *INTEGER(res);
}
int const out = *INTEGER(res);
UNPROTECT(1);
return out;
})()),
p_dim(([&]() -> size_t {
NumericVector dum(0);
scomp_grad[0] = false;
SEXP res = f(f_idx, dum, scomp_grad);
if(!Rf_isInteger(res) or !Rf_isVector(res) or Rf_xlength(res) != 2L)
SEXP res = PROTECT(f(f_idx, dum, scomp_grad));
if(!Rf_isInteger(res) or !Rf_isVector(res) or Rf_xlength(res) != 2L){
UNPROTECT(1);
throw std::invalid_argument(
"fn returns invalid lengths with zero length par");

return INTEGER(res)[1L];
}
int const out = INTEGER(res)[1L];
UNPROTECT(1);
return out;
})())
{ };

Expand All @@ -94,31 +100,38 @@ class r_worker {
for(size_t j = 0; j < n_ele; ++j, ++point, ++p)
*p = *point;
scomp_grad[0] = false;
SEXP res = f(f_idx, par, scomp_grad);
if(!Rf_isReal(res) or !Rf_isVector(res) or Rf_xlength(res) != 1L)
SEXP res = PROTECT(f(f_idx, par, scomp_grad));
if(!Rf_isReal(res) or !Rf_isVector(res) or Rf_xlength(res) != 1L){
UNPROTECT(1);
throw std::invalid_argument(
"fn returns invalid output with comp_grad = FALSE");

return *REAL(res);
}
double const out = *REAL(res);
UNPROTECT(1);
return out;
}

double grad
(double const * __restrict__ point, double * __restrict__ gr) const {
for(size_t j = 0; j < n_ele; ++j, ++point)
par[j] = *point;
scomp_grad[0] = true;
SEXP res = f(f_idx, par, scomp_grad);
SEXP res = PROTECT(f(f_idx, par, scomp_grad));
CharacterVector what("grad");
SEXP gr_val = Rf_getAttrib(res, what);
SEXP gr_val = PROTECT(Rf_getAttrib(res, what));

if(!Rf_isReal(res) or !Rf_isVector(res) or Rf_xlength(res) != 1L or
Rf_isNull(gr_val) or !Rf_isReal(gr_val) or
static_cast<size_t>(Rf_xlength(gr_val)) != n_ele)
static_cast<size_t>(Rf_xlength(gr_val)) != n_ele){
UNPROTECT(2);
throw std::invalid_argument(
"fn returns invalid output with comp_grad = TRUE");
}

lp::copy(gr, REAL(gr_val), n_ele);
return *REAL(res);
double const out = *REAL(res);
UNPROTECT(2);
return out;
};

virtual bool thread_safe() const {
Expand Down Expand Up @@ -329,29 +342,36 @@ class r_worker_bfgs : public PSQN::problem {

double func(double const *val){
lp::copy(&par[0], val, n_ele);
SEXP res = f(par);
if(!Rf_isReal(res) or !Rf_isVector(res) or Rf_xlength(res) != 1L)
SEXP res = PROTECT(f(par));
if(!Rf_isReal(res) or !Rf_isVector(res) or Rf_xlength(res) != 1L){
UNPROTECT(1);
throw std::invalid_argument("fn returns invalid output");

return *REAL(res);
}
double const out = *REAL(res);
UNPROTECT(1);
return out;
}

double grad(double const * __restrict__ val,
double * __restrict__ gr){
lp::copy(&par[0], val, n_ele);

SEXP res = g(par);
SEXP res = PROTECT(g(par));
CharacterVector what("value");
SEXP func_val = Rf_getAttrib(res, what);
SEXP func_val = PROTECT(Rf_getAttrib(res, what));

if(!Rf_isReal(res) or !Rf_isVector(res) or
static_cast<size_t>(Rf_xlength(res)) != n_ele or
Rf_isNull(func_val) or !Rf_isReal(func_val) or
Rf_xlength(func_val) != 1L)
Rf_xlength(func_val) != 1L){
UNPROTECT(2);
throw std::invalid_argument("gr returns invalid output");
}

double const out = *REAL(func_val);
lp::copy(gr, REAL(res), n_ele);
return *REAL(func_val);
UNPROTECT(2);
return out;
}
};

Expand Down
40 changes: 20 additions & 20 deletions vignettes/psqn.html
Original file line number Diff line number Diff line change
Expand Up @@ -1076,22 +1076,22 @@ <h3>C++ Implementation</h3>
<a class="sourceLine" id="cb18-24" data-line-number="24"><span class="co">#&gt; # A tibble: 16 x 6</span></a>
<a class="sourceLine" id="cb18-25" data-line-number="25"><span class="co">#&gt; expression min median `itr/sec` mem_alloc `gc/sec`</span></a>
<a class="sourceLine" id="cb18-26" data-line-number="26"><span class="co">#&gt; &lt;bch:expr&gt; &lt;bch:tm&gt; &lt;bch:tm&gt; &lt;dbl&gt; &lt;bch:byt&gt; &lt;dbl&gt;</span></a>
<a class="sourceLine" id="cb18-27" data-line-number="27"><span class="co">#&gt; 1 optim BFGS (2 threads) 1.16s 1.16s 0.857 42.41MB 0.686</span></a>
<a class="sourceLine" id="cb18-28" data-line-number="28"><span class="co">#&gt; 2 lbfgs (1 thread) 262.61ms 264.8ms 3.76 10.07MB 0.989</span></a>
<a class="sourceLine" id="cb18-29" data-line-number="29"><span class="co">#&gt; 3 lbfgs(2 threads) 142.44ms 145.29ms 6.86 9.99MB 1.76 </span></a>
<a class="sourceLine" id="cb18-30" data-line-number="30"><span class="co">#&gt; 4 lbfgs(4 threads) 96.13ms 98.83ms 10.1 11.93MB 1.39 </span></a>
<a class="sourceLine" id="cb18-31" data-line-number="31"><span class="co">#&gt; 5 lbfgsb3c (1 thread) 312.67ms 318.04ms 3.09 22.7MB 0.965</span></a>
<a class="sourceLine" id="cb18-32" data-line-number="32"><span class="co">#&gt; 6 lbfgsb3c(2 threads) 188.63ms 190.42ms 5.24 25.25MB 1.55 </span></a>
<a class="sourceLine" id="cb18-33" data-line-number="33"><span class="co">#&gt; 7 lbfgsb3c(4 threads) 83.79ms 86.84ms 11.4 18.98MB 2.59 </span></a>
<a class="sourceLine" id="cb18-34" data-line-number="34"><span class="co">#&gt; 8 psqn (R; 1 thread) 183.95ms 191.08ms 5.23 6.78MB 12.6 </span></a>
<a class="sourceLine" id="cb18-35" data-line-number="35"><span class="co">#&gt; 9 psqn(R; 2 threads) 183.88ms 200.44ms 4.93 6.78MB 12.0 </span></a>
<a class="sourceLine" id="cb18-36" data-line-number="36"><span class="co">#&gt; 10 psqn (1 thread, SR1) 38.44ms 38.87ms 25.7 27.58KB 0 </span></a>
<a class="sourceLine" id="cb18-37" data-line-number="37"><span class="co">#&gt; 11 psqn(2 threads, SR1) 19.11ms 19.63ms 50.4 27.58KB 0 </span></a>
<a class="sourceLine" id="cb18-38" data-line-number="38"><span class="co">#&gt; 12 psqn (1 thread, opt pri.) 24.9ms 25.27ms 39.5 60.05KB 0 </span></a>
<a class="sourceLine" id="cb18-39" data-line-number="39"><span class="co">#&gt; 13 psqn (2 threads, opt pri.) 13.11ms 13.54ms 73.0 55.16KB 0 </span></a>
<a class="sourceLine" id="cb18-40" data-line-number="40"><span class="co">#&gt; 14 psqn (1 thread) 18.94ms 19.15ms 52.0 27.58KB 0 </span></a>
<a class="sourceLine" id="cb18-41" data-line-number="41"><span class="co">#&gt; 15 psqn(2 threads) 10.09ms 10.41ms 95.3 27.58KB 0.200</span></a>
<a class="sourceLine" id="cb18-42" data-line-number="42"><span class="co">#&gt; 16 psqn(4 threads) 5.77ms 6ms 165. 27.58KB 0</span></a></code></pre></div>
<a class="sourceLine" id="cb18-27" data-line-number="27"><span class="co">#&gt; 1 optim BFGS (2 threads) 1.13s 1.13s 0.882 42.41MB 0.705</span></a>
<a class="sourceLine" id="cb18-28" data-line-number="28"><span class="co">#&gt; 2 lbfgs (1 thread) 251.66ms 252.93ms 3.94 10.07MB 0.985</span></a>
<a class="sourceLine" id="cb18-29" data-line-number="29"><span class="co">#&gt; 3 lbfgs(2 threads) 136.38ms 136.89ms 7.22 9.99MB 0.976</span></a>
<a class="sourceLine" id="cb18-30" data-line-number="30"><span class="co">#&gt; 4 lbfgs(4 threads) 92.23ms 94.17ms 10.5 11.93MB 1.58 </span></a>
<a class="sourceLine" id="cb18-31" data-line-number="31"><span class="co">#&gt; 5 lbfgsb3c (1 thread) 298.11ms 306.41ms 3.19 22.71MB 0.997</span></a>
<a class="sourceLine" id="cb18-32" data-line-number="32"><span class="co">#&gt; 6 lbfgsb3c(2 threads) 180.32ms 180.9ms 5.14 25.25MB 1.58 </span></a>
<a class="sourceLine" id="cb18-33" data-line-number="33"><span class="co">#&gt; 7 lbfgsb3c(4 threads) 78.02ms 80.27ms 12.3 18.98MB 3.18 </span></a>
<a class="sourceLine" id="cb18-34" data-line-number="34"><span class="co">#&gt; 8 psqn (R; 1 thread) 168.67ms 177.44ms 5.49 6.78MB 12.0 </span></a>
<a class="sourceLine" id="cb18-35" data-line-number="35"><span class="co">#&gt; 9 psqn(R; 2 threads) 179.28ms 186.24ms 5.29 6.78MB 10.2 </span></a>
<a class="sourceLine" id="cb18-36" data-line-number="36"><span class="co">#&gt; 10 psqn (1 thread, SR1) 37.45ms 37.49ms 26.7 27.58KB 0 </span></a>
<a class="sourceLine" id="cb18-37" data-line-number="37"><span class="co">#&gt; 11 psqn(2 threads, SR1) 19.04ms 19.35ms 51.0 27.58KB 0 </span></a>
<a class="sourceLine" id="cb18-38" data-line-number="38"><span class="co">#&gt; 12 psqn (1 thread, opt pri.) 24.12ms 24.77ms 40.2 60.23KB 0 </span></a>
<a class="sourceLine" id="cb18-39" data-line-number="39"><span class="co">#&gt; 13 psqn (2 threads, opt pri.) 13.27ms 13.46ms 73.9 55.16KB 0 </span></a>
<a class="sourceLine" id="cb18-40" data-line-number="40"><span class="co">#&gt; 14 psqn (1 thread) 18.29ms 18.36ms 53.7 27.58KB 0.199</span></a>
<a class="sourceLine" id="cb18-41" data-line-number="41"><span class="co">#&gt; 15 psqn(2 threads) 10.13ms 10.31ms 96.8 27.58KB 0 </span></a>
<a class="sourceLine" id="cb18-42" data-line-number="42"><span class="co">#&gt; 16 psqn(4 threads) 6ms 6.19ms 161. 27.58KB 0</span></a></code></pre></div>
<p>We see a large reduction. To be fair, we can use the C interface for the limited-memory BFGS methods to avoid re-allocating the gradient at every iteration. This will reduce their computation time. The R version of the quasi-Newton method is slower mainly as the R version to evaluate the log of the integrand and its derivative is slower than the version used by all the other methods. We can illustrate this by comparing with the computation time with the <code>eval_integrand</code>:</p>
<div class="sourceCode" id="cb19"><pre class="sourceCode r"><code class="sourceCode r"><a class="sourceLine" id="cb19-1" data-line-number="1">bench<span class="op">::</span><span class="kw">mark</span>(</a>
<a class="sourceLine" id="cb19-2" data-line-number="2"> <span class="st">`</span><span class="dt"> R</span><span class="st">`</span> =<span class="st"> </span><span class="kw">eval_integrand</span>(true_params), </a>
Expand All @@ -1100,8 +1100,8 @@ <h3>C++ Implementation</h3>
<a class="sourceLine" id="cb19-5" data-line-number="5"><span class="co">#&gt; # A tibble: 2 x 6</span></a>
<a class="sourceLine" id="cb19-6" data-line-number="6"><span class="co">#&gt; expression min median `itr/sec` mem_alloc `gc/sec`</span></a>
<a class="sourceLine" id="cb19-7" data-line-number="7"><span class="co">#&gt; &lt;bch:expr&gt; &lt;bch:tm&gt; &lt;bch:tm&gt; &lt;dbl&gt; &lt;bch:byt&gt; &lt;dbl&gt;</span></a>
<a class="sourceLine" id="cb19-8" data-line-number="8"><span class="co">#&gt; 1 R 3.08ms 3.36ms 290. 139.17KB 15.3</span></a>
<a class="sourceLine" id="cb19-9" data-line-number="9"><span class="co">#&gt; 2 C++ 589.7µs 594.19µs 1658. 2.49KB 0</span></a></code></pre></div>
<a class="sourceLine" id="cb19-8" data-line-number="8"><span class="co">#&gt; 1 R 2.94ms 3.26ms 280. 139.17KB 11.0</span></a>
<a class="sourceLine" id="cb19-9" data-line-number="9"><span class="co">#&gt; 2 C++ 573.83µs 576.78µs 1705. 2.49KB 0</span></a></code></pre></div>
<p>There is a big difference. Moreover, there is an overhead with repeatedly going back and forward between R and C++. A fair comparison would use an R implementation for all methods.</p>
</div>
<div id="polynomial-example" class="section level3">
Expand Down Expand Up @@ -1486,8 +1486,8 @@ <h3>BFGS Method</h3>
<a class="sourceLine" id="cb24-35" data-line-number="35"><span class="co">#&gt; # A tibble: 2 x 6</span></a>
<a class="sourceLine" id="cb24-36" data-line-number="36"><span class="co">#&gt; expression min median `itr/sec` mem_alloc `gc/sec`</span></a>
<a class="sourceLine" id="cb24-37" data-line-number="37"><span class="co">#&gt; &lt;bch:expr&gt; &lt;bch:tm&gt; &lt;bch:tm&gt; &lt;dbl&gt; &lt;bch:byt&gt; &lt;dbl&gt;</span></a>
<a class="sourceLine" id="cb24-38" data-line-number="38"><span class="co">#&gt; 1 optim 115.8µs 129.9µs 7629. 384B 13.2</span></a>
<a class="sourceLine" id="cb24-39" data-line-number="39"><span class="co">#&gt; 2 psqn_bfgs 88.1µs 96.2µs 10048. 2.49KB 10.4</span></a></code></pre></div>
<a class="sourceLine" id="cb24-38" data-line-number="38"><span class="co">#&gt; 1 optim 111.9µs 126.8µs 7834. 384B 14.5</span></a>
<a class="sourceLine" id="cb24-39" data-line-number="39"><span class="co">#&gt; 2 psqn_bfgs 86.3µs 94.3µs 10432. 2.49KB 12.6</span></a></code></pre></div>
</div>
<div id="bfgs-and-partially-separable-quasi-newton" class="section level3">
<h3>BFGS and Partially Separable Quasi-Newton</h3>
Expand Down

0 comments on commit 138e38b

Please sign in to comment.