Automatic Differentiation
 
Loading...
Searching...
No Matches
map_rect_concurrent.hpp
Go to the documentation of this file.
1#ifndef STAN_MATH_REV_FUNCTOR_MAP_RECT_CONCURRENT_HPP
2#define STAN_MATH_REV_FUNCTOR_MAP_RECT_CONCURRENT_HPP
3
10
11#include <tbb/parallel_for.h>
12#include <tbb/blocked_range.h>
13
14#include <algorithm>
15#include <vector>
16
17namespace stan {
18namespace math {
19namespace internal {
20
21template <int call_id, typename F, typename T_shared_param,
22 typename T_job_param, require_eigen_col_vector_t<T_shared_param>*>
23inline Eigen::Matrix<return_type_t<T_shared_param, T_job_param>, Eigen::Dynamic,
24 1>
26 const T_shared_param& shared_params,
27 const std::vector<Eigen::Matrix<T_job_param, Eigen::Dynamic, 1>>&
28 job_params,
29 const std::vector<std::vector<double>>& x_r,
30 const std::vector<std::vector<int>>& x_i, std::ostream* msgs) {
31 using ReduceF
34
35 const int num_jobs = job_params.size();
36 const vector_d shared_params_dbl = value_of(shared_params);
37 std::vector<matrix_d> job_output(num_jobs);
38 std::vector<int> world_f_out(num_jobs, 0);
39
40 auto execute_chunk = [&](std::size_t start, std::size_t end) -> void {
41 for (std::size_t i = start; i != end; ++i) {
42 job_output[i] = ReduceF()(shared_params_dbl, value_of(job_params[i]),
43 x_r[i], x_i[i], msgs);
44 world_f_out[i] = job_output[i].cols();
45 }
46 };
47
48#ifdef STAN_THREADS
49 // we must use task isolation as described here:
50 // https://software.intel.com/content/www/us/en/develop/documentation/tbb-documentation/top/intel-threading-building-blocks-developer-guide/task-isolation.html
51 // this is to ensure that the thread local AD tape ressource is
52 // not being modified from a different task which may happen
53 // whenever this function is being used itself in a parallel
54 // context (like running multiple chains for Stan)
55 tbb::this_task_arena::isolate([&] {
56 tbb::parallel_for(tbb::blocked_range<std::size_t>(0, num_jobs),
57 [&](const tbb::blocked_range<size_t>& r) {
58 execute_chunk(r.begin(), r.end());
59 });
60 });
61#else
62 execute_chunk(0, num_jobs);
63#endif
64
65 // collect results
66 const int num_world_output
67 = std::accumulate(world_f_out.begin(), world_f_out.end(), 0);
68 matrix_d world_output(job_output[0].rows(), num_world_output);
69
70 int offset = 0;
71 for (const auto& job : job_output) {
72 const int num_job_outputs = job.cols();
73
74 world_output.block(0, offset, world_output.rows(), num_job_outputs) = job;
75
76 offset += num_job_outputs;
77 }
78 CombineF combine(shared_params, job_params);
79 return combine(world_output, world_f_out);
80}
81
82} // namespace internal
83} // namespace math
84} // namespace stan
85
86#endif
int64_t rows(const T_x &x)
Returns the number of rows in the specified kernel generator expression.
Definition rows.hpp:22
Eigen::Matrix< return_type_t< T_shared_param, T_job_param >, Eigen::Dynamic, 1 > map_rect_concurrent(const T_shared_param &shared_params, const std::vector< Eigen::Matrix< T_job_param, Eigen::Dynamic, 1 > > &job_params, const std::vector< std::vector< double > > &x_r, const std::vector< std::vector< int > > &x_i, std::ostream *msgs=nullptr)
Eigen::Matrix< double, Eigen::Dynamic, 1 > vector_d
Type for (column) vector of double values.
Definition typedefs.hpp:24
T value_of(const fvar< T > &v)
Return the value of the specified variable.
Definition value_of.hpp:18
Eigen::Matrix< double, Eigen::Dynamic, Eigen::Dynamic > matrix_d
Type for matrix of double values.
Definition typedefs.hpp:19
The lgamma implementation in stan-math is based on either the reentrant safe lgamma_r implementation ...