Merge pull request #30 from asgr/copilot/improve-rcpp-function-speed

asgr · web-flow · commit bace0631ebf5 · 2026-02-22T00:08:37.000+08:00
Fix OpenMP data races and add speed improvements in Rcpp src/ functions
diff --git a/src/akima.cpp b/src/akima.cpp
@@ -306,11 +306,24 @@ void interpolateLinearGrid(NumericVector xseq, NumericVector yseq, NumericMatrix
   int ncol=tempmat_sky.ncol();
   int nrow=tempmat_sky.nrow();
 
+  // Precompute y-bracket indices for all output columns (independent of row i)
+  std::vector<int> top_indices(myynpts, -1), bottom_indices(myynpts, -1);
+  for (int j = 1; j <= myynpts; j++) {
+    double y = -0.5+j;
+    for (int jj = 1; jj < ncol; jj++) {
+      if (myy[jj-1] <= y && myy[jj] >= y) {
+        top_indices[j-1] = jj-1;
+        bottom_indices[j-1] = jj;
+        break;
+      }
+    }
+  }
+
   // For each vertical row
   for (int i = 1; i <= myxnpts; i++) {
     // For a spline to interpolate vertically along the elements of the row
     double x = -0.5+i;
-    // find the left and right index ibnto xseq
+    // find the left and right index into xseq
     int left_index = -1;
     int right_index = -1;
     for (int ii = 1; ii < nrow; ii++) {
@@ -321,34 +334,28 @@ void interpolateLinearGrid(NumericVector xseq, NumericVector yseq, NumericMatrix
       }
     }
 
-    //Rcpp::Rcout << "x="<<x<<" xindex="<<left_index<<" "<<right_index<<"\n";
-    //Rcpp::Rcout << "x="<<x<<" xleft="<<myx[left_index]<<" "<<myx[right_index]<<"\n";
-    int top_index = -1;
-    int bottom_index = -1;
+    if (left_index < 0) continue;
+    const double xlambda = (x-myx[left_index])/(myx[right_index]-myx[left_index]);
+
     for (int j = 1; j <= myynpts; j++) {
+      int top_index = top_indices[j-1];
+      int bottom_index = bottom_indices[j-1];
+      if (top_index < 0) continue;
+
+      // p1...p2
+      // .     .
+      // .     .
+      // p3...p4
+      double p1 = tempmat_sky(left_index,top_index);
+      double p2 = tempmat_sky(right_index,top_index);
+      double p3 = tempmat_sky(left_index,bottom_index);
+      double p4 = tempmat_sky(right_index,bottom_index);
+
       double y = -0.5+j;
-      for (int jj = 1; jj < ncol; jj++) {
-        if (myy[jj-1] <= y && myy[jj] >= y) {
-          top_index = jj-1;
-          bottom_index = jj;
-          // p1...p2
-          // .     .
-          // .     .
-          // p3...p4
-          double p1 = tempmat_sky(left_index,top_index);
-          double p2 = tempmat_sky(right_index,top_index);
-          double p3 = tempmat_sky(left_index,bottom_index);
-          double p4 = tempmat_sky(right_index,bottom_index);
-
-          double xlambda = (x-myx[left_index])/(myx[right_index]-myx[left_index]);
-          double ylambda = (y-myy[top_index])/(myy[bottom_index]-myy[top_index]);
-          double ztop = p1 * (1.0-xlambda) + p2 * xlambda;
-          double zbottom = p3 * (1.0-xlambda) + p4 * xlambda;
-          output(i-1,j-1) = ztop * (1.0-ylambda) +zbottom * ylambda;
-          //Rcpp::Rcout << "y="<<y<<" yindex="<<top_index<<" "<<bottom_index<<" "<<p1<<" "<<p2<<" "<<p3<<" "<<p4<<" result="<<output(i-1,j-1)<<"\n";
-          break;
-        }
-      }
+      double ylambda = (y-myy[top_index])/(myy[bottom_index]-myy[top_index]);
+      double ztop = p1 * (1.0-xlambda) + p2 * xlambda;
+      double zbottom = p3 * (1.0-xlambda) + p4 * xlambda;
+      output(i-1,j-1) = ztop * (1.0-ylambda) +zbottom * ylambda;
     }
   }
 }
diff --git a/src/aper_cover.cpp b/src/aper_cover.cpp
@@ -194,9 +194,13 @@ NumericMatrix profoundAperWeight(NumericVector cx,
             const double PC_temp = pixelCoverAper(delta_x, delta_y, delta_2,
                                                   rad_2, rad_min_2, rad_max_2, depth);
             if(rad_re[k] == 0){
-              weight(i,j) += wt_use[k] * PC_temp;
+              double cover = wt_use[k] * PC_temp;
+              #pragma omp atomic
+              weight(i,j) += cover;
             }else{
-              weight(i,j) += wt_use[k] * PC_temp * exp(-bn[k]*pow(sqrt(delta_2) / rad_re[k], 1/nser[k]));
+              double cover = wt_use[k] * PC_temp * exp(-bn[k]*pow(sqrt(delta_2) / rad_re[k], 1/nser[k]));
+              #pragma omp atomic
+              weight(i,j) += cover;
             }
           }
         }
diff --git a/src/ellip_cover.cpp b/src/ellip_cover.cpp
@@ -229,18 +229,25 @@ NumericMatrix profoundEllipWeight(NumericVector cx,
             const double delta_2 = (delta_x * delta_x) + (delta_y * delta_y);
             if(rad_re[k] == 0){
               if(delta_2 < semi_min_min * semi_min_min){
+                #pragma omp atomic
                 weight(i,j) += wt_use[k];
               }else if(delta_2 < rad_plus * rad_plus){
-                weight(i,j) += wt_use[k] * pixelCoverEllip(delta_x, delta_y, x_term, y_term, xy_term, depth);
+                double cover = wt_use[k] * pixelCoverEllip(delta_x, delta_y, x_term, y_term, xy_term, depth);
+                #pragma omp atomic
+                weight(i,j) += cover;
               }
             }else{
               const double mod_x = (delta_x * cos_ang + delta_y * sin_ang) / axrat_loc;
               const double mod_y = (-delta_x * sin_ang + delta_y * cos_ang);
               const double mod_delta_2 = (mod_x * mod_x) + (mod_y * mod_y);
               if(delta_2 < semi_min_min * semi_min_min){
-                weight(i,j) += wt_use[k] * exp(-bn[k]*pow(sqrt(mod_delta_2) / rad_re[k], 1/nser[k]));
+                double cover = wt_use[k] * exp(-bn[k]*pow(sqrt(mod_delta_2) / rad_re[k], 1/nser[k]));
+                #pragma omp atomic
+                weight(i,j) += cover;
               }else if(delta_2 < rad_plus * rad_plus){
-                weight(i,j) += wt_use[k] * pixelCoverEllip(delta_x, delta_y, x_term, y_term, xy_term, depth) * exp(-bn[k]*pow(sqrt(mod_delta_2) / rad_re[k], 1/nser[k]));
+                double cover = wt_use[k] * pixelCoverEllip(delta_x, delta_y, x_term, y_term, xy_term, depth) * exp(-bn[k]*pow(sqrt(mod_delta_2) / rad_re[k], 1/nser[k]));
+                #pragma omp atomic
+                weight(i,j) += cover;
               }
             }
           }
diff --git a/src/skygrid.cpp b/src/skygrid.cpp
@@ -233,28 +233,25 @@ Rcpp::NumericVector Cadacs_FindSkyCellValues(
   
   // Rcpp::Rcout << skyN << "\n";
   
-  Rcpp::NumericVector vec(skyN);
-  int k=0;
+  std::vector<double> vec;
+  vec.reserve(skyN);
   for (int j = sscol; j <= eecol; j++) {
     int ii=(j-1)*nrow+(ssrow-1);
     for (int i = ssrow; i <= eerow; i++,ii++) {
-      //Rcpp::Rcout << i << "\n";
-      //Rcpp::Rcout << ii << "\n";
       if ((iiobjects!=NULL)) {
         if (iiobjects[ii]==0 && (iimask==NULL || iimask[ii]==0)) {
-          vec[k++] = iiimage[ii];
+          vec.push_back(iiimage[ii]);
         }
       } else if (iimask!=NULL) {
         if (iimask[ii]==0) {
-          vec[k++] = iiimage[ii];
+          vec.push_back(iiimage[ii]);
         }
       } else {
-        vec[k++] = iiimage[ii];
+        vec.push_back(iiimage[ii]);
       }
     }
   }
-  // Rcpp::Rcout << vec[k-1] << k << " FINE!\n";
-  return vec;
+  return Rcpp::NumericVector(vec.begin(), vec.end());
 }
 
 //==================================================================================
diff --git a/src/sum_segim.cpp b/src/sum_segim.cpp
@@ -14,18 +14,36 @@ NumericVector profoundSegimFlux(NumericMatrix image, NumericMatrix segim, int nt
   NumericVector fluxes(max_seg, 0.0);
   
   #ifdef _OPENMP
-    // Parallelize the main loop
-  #pragma omp parallel for schedule(static) num_threads(nthreads)
-  #endif
+  #pragma omp parallel num_threads(nthreads)
+  {
+    // Thread-local copy to avoid data races on shared fluxes
+    NumericVector local_fluxes(max_seg, 0.0);
+    #pragma omp for schedule(static)
+    for (int i = 0; i < nrow; ++i) {
+      for (int j = 0; j < ncol; ++j) {
+        if(segim(i, j) > 0){
+          if(!NumericMatrix::is_na(image(i, j))){
+            local_fluxes[segim(i, j) - 1] += image(i, j);
+          }
+        }
+      }
+    }
+    #pragma omp critical
+    for (int k = 0; k < max_seg; ++k) {
+      fluxes[k] += local_fluxes[k];
+    }
+  }
+  #else
   for (int i = 0; i < nrow; ++i) {
     for (int j = 0; j < ncol; ++j) {
       if(segim(i, j) > 0){
         if(!NumericMatrix::is_na(image(i, j))){
-          fluxes(segim(i, j) - 1) += image(i, j); 
+          fluxes[segim(i, j) - 1] += image(i, j);
         }
       }
     }
   }
+  #endif
   
   return fluxes;
 }
diff --git a/src/this_in_that.cpp b/src/this_in_that.cpp
@@ -12,10 +12,6 @@ LogicalVector vec_this_in_vec_that(IntegerVector vec_this, IntegerVector vec_tha
   LogicalVector ref_ID(max(vec_that) + 1, false);
   LogicalVector result(vec_this.size(), invert);
   
-  #ifdef _OPENMP
-    // Parallelize the main loop
-  #pragma omp parallel for schedule(static) num_threads(nthreads)
-  #endif
   for (int i = 0; i < vec_that.size(); ++i) {
     if (!IntegerVector::is_na(vec_that[i])) {
       ref_ID[vec_that[i]] = true;
@@ -53,9 +49,6 @@ LogicalMatrix mat_this_in_vec_that(IntegerMatrix mat_this, IntegerVector vec_tha
   LogicalMatrix result(mat_this.nrow(), mat_this.ncol());
   
   // Populate the reference vector
-  #ifdef _OPENMP
-  #pragma omp parallel for schedule(static) num_threads(nthreads)
-  #endif
   for (int i = 0; i < vec_that.size(); ++i) {
     if (!IntegerVector::is_na(vec_that[i])) {
       ref_ID[vec_that[i]] = true;

Original file line number	Diff line number	Diff line change
`@@ -194,9 +194,13 @@ NumericMatrix profoundAperWeight(NumericVector cx,`
`194`	`194`	`const double PC_temp = pixelCoverAper(delta_x, delta_y, delta_2,`
`195`	`195`	`rad_2, rad_min_2, rad_max_2, depth);`
`196`	`196`	`if(rad_re[k] == 0){`
`197`		`- weight(i,j) += wt_use[k] * PC_temp;`
	`197`	`+ double cover = wt_use[k] * PC_temp;`
	`198`	`+ #pragma omp atomic`
	`199`	`+ weight(i,j) += cover;`
`198`	`200`	`}else{`
`199`		`- weight(i,j) += wt_use[k] * PC_temp * exp(-bn[k]*pow(sqrt(delta_2) / rad_re[k], 1/nser[k]));`
	`201`	`+ double cover = wt_use[k] * PC_temp * exp(-bn[k]*pow(sqrt(delta_2) / rad_re[k], 1/nser[k]));`
	`202`	`+ #pragma omp atomic`
	`203`	`+ weight(i,j) += cover;`
`200`	`204`	`}`
`201`	`205`	`}`
`202`	`206`	`}`
Original file line number	Diff line number	Diff line change
`@@ -233,28 +233,25 @@ Rcpp::NumericVector Cadacs_FindSkyCellValues(`
`233`	`233`
`234`	`234`	`// Rcpp::Rcout << skyN << "\n";`
`235`	`235`
`236`		`- Rcpp::NumericVector vec(skyN);`
`237`		`- int k=0;`
	`236`	`+ std::vector<double> vec;`
	`237`	`+ vec.reserve(skyN);`
`238`	`238`	`for (int j = sscol; j <= eecol; j++) {`
`239`	`239`	`int ii=(j-1)*nrow+(ssrow-1);`
`240`	`240`	`for (int i = ssrow; i <= eerow; i++,ii++) {`
`241`		`- //Rcpp::Rcout << i << "\n";`
`242`		`- //Rcpp::Rcout << ii << "\n";`
`243`	`241`	`if ((iiobjects!=NULL)) {`
`244`	`242`	`if (iiobjects[ii]==0 && (iimask==NULL \|\| iimask[ii]==0)) {`
`245`		`- vec[k++] = iiimage[ii];`
	`243`	`+ vec.push_back(iiimage[ii]);`
`246`	`244`	`}`
`247`	`245`	`} else if (iimask!=NULL) {`
`248`	`246`	`if (iimask[ii]==0) {`
`249`		`- vec[k++] = iiimage[ii];`
	`247`	`+ vec.push_back(iiimage[ii]);`
`250`	`248`	`}`
`251`	`249`	`} else {`
`252`		`- vec[k++] = iiimage[ii];`
	`250`	`+ vec.push_back(iiimage[ii]);`
`253`	`251`	`}`
`254`	`252`	`}`
`255`	`253`	`}`
`256`		`- // Rcpp::Rcout << vec[k-1] << k << " FINE!\n";`
`257`		`- return vec;`
	`254`	`+ return Rcpp::NumericVector(vec.begin(), vec.end());`
`258`	`255`	`}`
`259`	`256`
`260`	`257`	`//==================================================================================`