Skip to content

Commit bace063

Browse files
authored
Merge pull request #30 from asgr/copilot/improve-rcpp-function-speed
Fix OpenMP data races and add speed improvements in Rcpp src/ functions
2 parents 52c6c06 + 80357fb commit bace063

6 files changed

Lines changed: 78 additions & 52 deletions

File tree

src/akima.cpp

Lines changed: 34 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -306,11 +306,24 @@ void interpolateLinearGrid(NumericVector xseq, NumericVector yseq, NumericMatrix
306306
int ncol=tempmat_sky.ncol();
307307
int nrow=tempmat_sky.nrow();
308308

309+
// Precompute y-bracket indices for all output columns (independent of row i)
310+
std::vector<int> top_indices(myynpts, -1), bottom_indices(myynpts, -1);
311+
for (int j = 1; j <= myynpts; j++) {
312+
double y = -0.5+j;
313+
for (int jj = 1; jj < ncol; jj++) {
314+
if (myy[jj-1] <= y && myy[jj] >= y) {
315+
top_indices[j-1] = jj-1;
316+
bottom_indices[j-1] = jj;
317+
break;
318+
}
319+
}
320+
}
321+
309322
// For each vertical row
310323
for (int i = 1; i <= myxnpts; i++) {
311324
// For a spline to interpolate vertically along the elements of the row
312325
double x = -0.5+i;
313-
// find the left and right index ibnto xseq
326+
// find the left and right index into xseq
314327
int left_index = -1;
315328
int right_index = -1;
316329
for (int ii = 1; ii < nrow; ii++) {
@@ -321,34 +334,28 @@ void interpolateLinearGrid(NumericVector xseq, NumericVector yseq, NumericMatrix
321334
}
322335
}
323336

324-
//Rcpp::Rcout << "x="<<x<<" xindex="<<left_index<<" "<<right_index<<"\n";
325-
//Rcpp::Rcout << "x="<<x<<" xleft="<<myx[left_index]<<" "<<myx[right_index]<<"\n";
326-
int top_index = -1;
327-
int bottom_index = -1;
337+
if (left_index < 0) continue;
338+
const double xlambda = (x-myx[left_index])/(myx[right_index]-myx[left_index]);
339+
328340
for (int j = 1; j <= myynpts; j++) {
341+
int top_index = top_indices[j-1];
342+
int bottom_index = bottom_indices[j-1];
343+
if (top_index < 0) continue;
344+
345+
// p1...p2
346+
// . .
347+
// . .
348+
// p3...p4
349+
double p1 = tempmat_sky(left_index,top_index);
350+
double p2 = tempmat_sky(right_index,top_index);
351+
double p3 = tempmat_sky(left_index,bottom_index);
352+
double p4 = tempmat_sky(right_index,bottom_index);
353+
329354
double y = -0.5+j;
330-
for (int jj = 1; jj < ncol; jj++) {
331-
if (myy[jj-1] <= y && myy[jj] >= y) {
332-
top_index = jj-1;
333-
bottom_index = jj;
334-
// p1...p2
335-
// . .
336-
// . .
337-
// p3...p4
338-
double p1 = tempmat_sky(left_index,top_index);
339-
double p2 = tempmat_sky(right_index,top_index);
340-
double p3 = tempmat_sky(left_index,bottom_index);
341-
double p4 = tempmat_sky(right_index,bottom_index);
342-
343-
double xlambda = (x-myx[left_index])/(myx[right_index]-myx[left_index]);
344-
double ylambda = (y-myy[top_index])/(myy[bottom_index]-myy[top_index]);
345-
double ztop = p1 * (1.0-xlambda) + p2 * xlambda;
346-
double zbottom = p3 * (1.0-xlambda) + p4 * xlambda;
347-
output(i-1,j-1) = ztop * (1.0-ylambda) +zbottom * ylambda;
348-
//Rcpp::Rcout << "y="<<y<<" yindex="<<top_index<<" "<<bottom_index<<" "<<p1<<" "<<p2<<" "<<p3<<" "<<p4<<" result="<<output(i-1,j-1)<<"\n";
349-
break;
350-
}
351-
}
355+
double ylambda = (y-myy[top_index])/(myy[bottom_index]-myy[top_index]);
356+
double ztop = p1 * (1.0-xlambda) + p2 * xlambda;
357+
double zbottom = p3 * (1.0-xlambda) + p4 * xlambda;
358+
output(i-1,j-1) = ztop * (1.0-ylambda) +zbottom * ylambda;
352359
}
353360
}
354361
}

src/aper_cover.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,9 +194,13 @@ NumericMatrix profoundAperWeight(NumericVector cx,
194194
const double PC_temp = pixelCoverAper(delta_x, delta_y, delta_2,
195195
rad_2, rad_min_2, rad_max_2, depth);
196196
if(rad_re[k] == 0){
197-
weight(i,j) += wt_use[k] * PC_temp;
197+
double cover = wt_use[k] * PC_temp;
198+
#pragma omp atomic
199+
weight(i,j) += cover;
198200
}else{
199-
weight(i,j) += wt_use[k] * PC_temp * exp(-bn[k]*pow(sqrt(delta_2) / rad_re[k], 1/nser[k]));
201+
double cover = wt_use[k] * PC_temp * exp(-bn[k]*pow(sqrt(delta_2) / rad_re[k], 1/nser[k]));
202+
#pragma omp atomic
203+
weight(i,j) += cover;
200204
}
201205
}
202206
}

src/ellip_cover.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -229,18 +229,25 @@ NumericMatrix profoundEllipWeight(NumericVector cx,
229229
const double delta_2 = (delta_x * delta_x) + (delta_y * delta_y);
230230
if(rad_re[k] == 0){
231231
if(delta_2 < semi_min_min * semi_min_min){
232+
#pragma omp atomic
232233
weight(i,j) += wt_use[k];
233234
}else if(delta_2 < rad_plus * rad_plus){
234-
weight(i,j) += wt_use[k] * pixelCoverEllip(delta_x, delta_y, x_term, y_term, xy_term, depth);
235+
double cover = wt_use[k] * pixelCoverEllip(delta_x, delta_y, x_term, y_term, xy_term, depth);
236+
#pragma omp atomic
237+
weight(i,j) += cover;
235238
}
236239
}else{
237240
const double mod_x = (delta_x * cos_ang + delta_y * sin_ang) / axrat_loc;
238241
const double mod_y = (-delta_x * sin_ang + delta_y * cos_ang);
239242
const double mod_delta_2 = (mod_x * mod_x) + (mod_y * mod_y);
240243
if(delta_2 < semi_min_min * semi_min_min){
241-
weight(i,j) += wt_use[k] * exp(-bn[k]*pow(sqrt(mod_delta_2) / rad_re[k], 1/nser[k]));
244+
double cover = wt_use[k] * exp(-bn[k]*pow(sqrt(mod_delta_2) / rad_re[k], 1/nser[k]));
245+
#pragma omp atomic
246+
weight(i,j) += cover;
242247
}else if(delta_2 < rad_plus * rad_plus){
243-
weight(i,j) += wt_use[k] * pixelCoverEllip(delta_x, delta_y, x_term, y_term, xy_term, depth) * exp(-bn[k]*pow(sqrt(mod_delta_2) / rad_re[k], 1/nser[k]));
248+
double cover = wt_use[k] * pixelCoverEllip(delta_x, delta_y, x_term, y_term, xy_term, depth) * exp(-bn[k]*pow(sqrt(mod_delta_2) / rad_re[k], 1/nser[k]));
249+
#pragma omp atomic
250+
weight(i,j) += cover;
244251
}
245252
}
246253
}

src/skygrid.cpp

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -233,28 +233,25 @@ Rcpp::NumericVector Cadacs_FindSkyCellValues(
233233

234234
// Rcpp::Rcout << skyN << "\n";
235235

236-
Rcpp::NumericVector vec(skyN);
237-
int k=0;
236+
std::vector<double> vec;
237+
vec.reserve(skyN);
238238
for (int j = sscol; j <= eecol; j++) {
239239
int ii=(j-1)*nrow+(ssrow-1);
240240
for (int i = ssrow; i <= eerow; i++,ii++) {
241-
//Rcpp::Rcout << i << "\n";
242-
//Rcpp::Rcout << ii << "\n";
243241
if ((iiobjects!=NULL)) {
244242
if (iiobjects[ii]==0 && (iimask==NULL || iimask[ii]==0)) {
245-
vec[k++] = iiimage[ii];
243+
vec.push_back(iiimage[ii]);
246244
}
247245
} else if (iimask!=NULL) {
248246
if (iimask[ii]==0) {
249-
vec[k++] = iiimage[ii];
247+
vec.push_back(iiimage[ii]);
250248
}
251249
} else {
252-
vec[k++] = iiimage[ii];
250+
vec.push_back(iiimage[ii]);
253251
}
254252
}
255253
}
256-
// Rcpp::Rcout << vec[k-1] << k << " FINE!\n";
257-
return vec;
254+
return Rcpp::NumericVector(vec.begin(), vec.end());
258255
}
259256

260257
//==================================================================================

src/sum_segim.cpp

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,36 @@ NumericVector profoundSegimFlux(NumericMatrix image, NumericMatrix segim, int nt
1414
NumericVector fluxes(max_seg, 0.0);
1515

1616
#ifdef _OPENMP
17-
// Parallelize the main loop
18-
#pragma omp parallel for schedule(static) num_threads(nthreads)
19-
#endif
17+
#pragma omp parallel num_threads(nthreads)
18+
{
19+
// Thread-local copy to avoid data races on shared fluxes
20+
NumericVector local_fluxes(max_seg, 0.0);
21+
#pragma omp for schedule(static)
22+
for (int i = 0; i < nrow; ++i) {
23+
for (int j = 0; j < ncol; ++j) {
24+
if(segim(i, j) > 0){
25+
if(!NumericMatrix::is_na(image(i, j))){
26+
local_fluxes[segim(i, j) - 1] += image(i, j);
27+
}
28+
}
29+
}
30+
}
31+
#pragma omp critical
32+
for (int k = 0; k < max_seg; ++k) {
33+
fluxes[k] += local_fluxes[k];
34+
}
35+
}
36+
#else
2037
for (int i = 0; i < nrow; ++i) {
2138
for (int j = 0; j < ncol; ++j) {
2239
if(segim(i, j) > 0){
2340
if(!NumericMatrix::is_na(image(i, j))){
24-
fluxes(segim(i, j) - 1) += image(i, j);
41+
fluxes[segim(i, j) - 1] += image(i, j);
2542
}
2643
}
2744
}
2845
}
46+
#endif
2947

3048
return fluxes;
3149
}

src/this_in_that.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,6 @@ LogicalVector vec_this_in_vec_that(IntegerVector vec_this, IntegerVector vec_tha
1212
LogicalVector ref_ID(max(vec_that) + 1, false);
1313
LogicalVector result(vec_this.size(), invert);
1414

15-
#ifdef _OPENMP
16-
// Parallelize the main loop
17-
#pragma omp parallel for schedule(static) num_threads(nthreads)
18-
#endif
1915
for (int i = 0; i < vec_that.size(); ++i) {
2016
if (!IntegerVector::is_na(vec_that[i])) {
2117
ref_ID[vec_that[i]] = true;
@@ -53,9 +49,6 @@ LogicalMatrix mat_this_in_vec_that(IntegerMatrix mat_this, IntegerVector vec_tha
5349
LogicalMatrix result(mat_this.nrow(), mat_this.ncol());
5450

5551
// Populate the reference vector
56-
#ifdef _OPENMP
57-
#pragma omp parallel for schedule(static) num_threads(nthreads)
58-
#endif
5952
for (int i = 0; i < vec_that.size(); ++i) {
6053
if (!IntegerVector::is_na(vec_that[i])) {
6154
ref_ID[vec_that[i]] = true;

0 commit comments

Comments
 (0)