Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 14 additions & 25 deletions include/core/dispatch_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,10 @@ namespace detail { // Performance utilities
* scalar) Strategy::PARALLEL \u2192 parallel_func \u2192 ParallelUtils::parallelFor
* Strategy::WORK_STEALING \u2192 work_stealing_func \u2192 WorkStealingPool::parallelFor
*
* The GpuAcceleratedFunc template parameter and gpu_accelerated_func argument are
* retained for ABI compatibility and will be removed in Phase 6 when all distribution
* batch implementations are updated.
* The GPU_ACCELERATED strategy slot has been removed. The GpuAcceleratedFunc
* template parameter and corresponding lambda are no longer accepted by these
* templates. See issue #23 for the rationale and prerequisites for any future
* GPU backend.
*/
class DispatchUtils {
public:
Expand All @@ -49,7 +50,6 @@ class DispatchUtils {
* @tparam BatchFunc Function type for SIMD batch operations
* @tparam ParallelFunc Function type for parallel operations
* @tparam WorkStealingFunc Function type for work-stealing operations
* @tparam GpuAcceleratedFunc Function type for GPU-accelerated operations
*
* @param dist Reference to the distribution instance
* @param values Input values span
Expand All @@ -61,16 +61,14 @@ class DispatchUtils {
* @param batch_func Function to call for SIMD batch operations
* @param parallel_func Function to call for parallel operations
* @param work_stealing_func Function to call for work-stealing operations
* @param gpu_accelerated_func Function to call for GPU-accelerated operations
*/
template <typename Distribution, typename ScalarFunc, typename BatchFunc, typename ParallelFunc,
typename WorkStealingFunc, typename GpuAcceleratedFunc>
typename WorkStealingFunc>
static void autoDispatch(const Distribution& dist, std::span<const double> values,
std::span<double> results, const PerformanceHint& hint,
DistributionType dist_type, OperationType op_type,
ScalarFunc&& scalar_func, BatchFunc&& batch_func,
ParallelFunc&& parallel_func, WorkStealingFunc&& work_stealing_func,
GpuAcceleratedFunc&& gpu_accelerated_func) {
ParallelFunc&& parallel_func, WorkStealingFunc&& work_stealing_func) {
// Validate input
if (values.size() != results.size()) {
throw std::invalid_argument("Input and output spans must have the same size");
Expand Down Expand Up @@ -103,8 +101,7 @@ class DispatchUtils {
executeStrategy(strategy, dist, values, results, count,
std::forward<ScalarFunc>(scalar_func), std::forward<BatchFunc>(batch_func),
std::forward<ParallelFunc>(parallel_func),
std::forward<WorkStealingFunc>(work_stealing_func),
std::forward<GpuAcceleratedFunc>(gpu_accelerated_func));
std::forward<WorkStealingFunc>(work_stealing_func));
}

/**
Expand Down Expand Up @@ -140,7 +137,6 @@ class DispatchUtils {
* @tparam BatchFunc Function type for SIMD batch operations
* @tparam ParallelFunc Function type for parallel operations
* @tparam WorkStealingFunc Function type for work-stealing operations
* @tparam GpuAcceleratedFunc Function type for GPU-accelerated operations
*
* @param dist Reference to the distribution instance
* @param values Input values span
Expand All @@ -150,16 +146,14 @@ class DispatchUtils {
* @param batch_func Function to call for SIMD batch operations
* @param parallel_func Function to call for parallel operations
* @param work_stealing_func Function to call for work-stealing operations
* @param gpu_accelerated_func Function to call for GPU-accelerated operations
*/
template <typename Distribution, typename ScalarFunc, typename BatchFunc, typename ParallelFunc,
typename WorkStealingFunc, typename GpuAcceleratedFunc>
typename WorkStealingFunc>
static void executeWithStrategy(const Distribution& dist, std::span<const double> values,
std::span<double> results, Strategy strategy,
ScalarFunc&& scalar_func, BatchFunc&& batch_func,
ParallelFunc&& parallel_func,
WorkStealingFunc&& work_stealing_func,
GpuAcceleratedFunc&& gpu_accelerated_func) {
WorkStealingFunc&& work_stealing_func) {
// Validate input
if (values.size() != results.size()) {
throw std::invalid_argument("Input and output spans must have the same size");
Expand All @@ -173,8 +167,7 @@ class DispatchUtils {
executeStrategy(strategy, dist, values, results, count,
std::forward<ScalarFunc>(scalar_func), std::forward<BatchFunc>(batch_func),
std::forward<ParallelFunc>(parallel_func),
std::forward<WorkStealingFunc>(work_stealing_func),
std::forward<GpuAcceleratedFunc>(gpu_accelerated_func));
std::forward<WorkStealingFunc>(work_stealing_func));
}

private:
Expand Down Expand Up @@ -203,12 +196,12 @@ class DispatchUtils {
* @brief Executes the selected strategy with appropriate function calls
*/
template <typename Distribution, typename ScalarFunc, typename BatchFunc, typename ParallelFunc,
typename WorkStealingFunc, typename GpuAcceleratedFunc>
typename WorkStealingFunc>
static void executeStrategy(Strategy strategy, const Distribution& dist,
std::span<const double> values, std::span<double> results,
size_t count, ScalarFunc&& scalar_func, BatchFunc&& batch_func,
ParallelFunc&& parallel_func, WorkStealingFunc&& work_stealing_func,
GpuAcceleratedFunc&& gpu_accelerated_func) {
ParallelFunc&& parallel_func,
WorkStealingFunc&& work_stealing_func) {
switch (strategy) {
case Strategy::SCALAR:
// Use simple loop for tiny batches (< 8 elements)
Expand Down Expand Up @@ -240,9 +233,6 @@ class DispatchUtils {
break;
}
}
// gpu_accelerated_func is intentionally unused — GPU_ACCELERATED was removed
// from the Strategy enum. Retained for ABI compatibility until Phase 6.
(void)gpu_accelerated_func;
}

/**
Expand Down Expand Up @@ -319,8 +309,7 @@ class DispatchUtils {
}
}

// executeBatchGpuAccelerated removed — GPU_ACCELERATED strategy removed from enum.
// Callers should use executeBatchWorkStealing directly.
// GPU_ACCELERATED strategy slot removed. See issue #23 for future GPU backend prerequisites.

/**
* @brief Common cache validation and parameter extraction pattern
Expand Down
79 changes: 0 additions & 79 deletions src/beta.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -570,35 +570,6 @@ void BetaDistribution::getProbability(std::span<const double> values, std::span<
dist.getProbabilityBatchUnsafeImpl(vals.data(), res.data(), count, lnc, am1, bm1);
}
},
[](const BetaDistribution& dist, std::span<const double> vals, std::span<double> res,
WorkStealingPool& pool) {
if (vals.size() != res.size())
throw std::invalid_argument("Span size mismatch");
const std::size_t count = vals.size();
if (count == 0)
return;
std::shared_lock<std::shared_mutex> lock(dist.cache_mutex_);
if (!dist.cache_valid_) {
lock.unlock();
std::unique_lock<std::shared_mutex> ulock(dist.cache_mutex_);
if (!dist.cache_valid_)
const_cast<BetaDistribution&>(dist).updateCacheUnsafe();
ulock.unlock();
lock.lock();
}
const double lnc = dist.logNormConst_;
const double am1 = dist.alphaMinus1_;
const double bm1 = dist.betaMinus1_;
lock.unlock();
constexpr std::size_t CHUNK = 1024;
const std::size_t num_chunks = (count + CHUNK - 1) / CHUNK;
pool.parallelFor(std::size_t{0}, num_chunks, [&](std::size_t ci) {
const std::size_t start = ci * CHUNK;
const std::size_t len = std::min(CHUNK, count - start);
dist.getProbabilityBatchUnsafeImpl(vals.data() + start, res.data() + start, len,
lnc, am1, bm1);
});
},
[](const BetaDistribution& dist, std::span<const double> vals, std::span<double> res,
WorkStealingPool& pool) {
if (vals.size() != res.size())
Expand Down Expand Up @@ -687,35 +658,6 @@ void BetaDistribution::getLogProbability(std::span<const double> values, std::sp
bm1);
}
},
[](const BetaDistribution& dist, std::span<const double> vals, std::span<double> res,
WorkStealingPool& pool) {
if (vals.size() != res.size())
throw std::invalid_argument("Span size mismatch");
const std::size_t count = vals.size();
if (count == 0)
return;
std::shared_lock<std::shared_mutex> lock(dist.cache_mutex_);
if (!dist.cache_valid_) {
lock.unlock();
std::unique_lock<std::shared_mutex> ulock(dist.cache_mutex_);
if (!dist.cache_valid_)
const_cast<BetaDistribution&>(dist).updateCacheUnsafe();
ulock.unlock();
lock.lock();
}
const double lnc = dist.logNormConst_;
const double am1 = dist.alphaMinus1_;
const double bm1 = dist.betaMinus1_;
lock.unlock();
constexpr std::size_t CHUNK = 1024;
const std::size_t num_chunks = (count + CHUNK - 1) / CHUNK;
pool.parallelFor(std::size_t{0}, num_chunks, [&](std::size_t ci) {
const std::size_t start = ci * CHUNK;
const std::size_t len = std::min(CHUNK, count - start);
dist.getLogProbabilityBatchUnsafeImpl(vals.data() + start, res.data() + start, len,
lnc, am1, bm1);
});
},
[](const BetaDistribution& dist, std::span<const double> vals, std::span<double> res,
WorkStealingPool& pool) {
if (vals.size() != res.size())
Expand Down Expand Up @@ -787,27 +729,6 @@ void BetaDistribution::getCumulativeProbability(std::span<const double> values,
dist.getCumulativeProbabilityBatchUnsafeImpl(vals.data(), res.data(), count, a, b);
}
},
[](const BetaDistribution& dist, std::span<const double> vals, std::span<double> res,
WorkStealingPool& pool) {
if (vals.size() != res.size())
throw std::invalid_argument("Span size mismatch");
const std::size_t count = vals.size();
if (count == 0)
return;
std::shared_lock<std::shared_mutex> lock(dist.cache_mutex_);
const double a = dist.alpha_, b = dist.beta_;
lock.unlock();
const double log_prefix = detail::lgamma(a + b) - detail::lgamma(a) - detail::lgamma(b);
pool.parallelFor(std::size_t{0}, count, [&](std::size_t i) {
const double x = vals[i];
if (x <= 0.0)
res[i] = 0.0;
else if (x >= 1.0)
res[i] = 1.0;
else
res[i] = detail::beta_i(x, a, b, log_prefix);
});
},
[](const BetaDistribution& dist, std::span<const double> vals, std::span<double> res,
WorkStealingPool& pool) {
if (vals.size() != res.size())
Expand Down
Loading
Loading