Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions .runpod.env.template
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ RUNPOD_SSH_USER=root
# Remote paths
RUNPOD_REMOTE_DIR=/workspace/cuvarbase

# RunPod API Key (optional, for advanced automation)
# RunPod API Key (required for scripts/runpod-create.sh and scripts/gpu-test.sh)
# Get from https://www.runpod.io/console/user/settings
# RUNPOD_API_KEY=your-api-key-here
RUNPOD_API_KEY=

# Pod ID (auto-populated by runpod-create.sh)
# RUNPOD_POD_ID=
39 changes: 38 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,12 @@ Currently includes implementations of:
- Sparse BLS ([Panahi & Zucker 2021](https://arxiv.org/abs/2103.06193)) for small datasets (< 500 observations)
- GPU implementation: `sparse_bls_gpu()` (default)
- CPU implementation: `sparse_bls_cpu()` (fallback)
- **Transit Least Squares ([TLS](https://ui.adsabs.harvard.edu/abs/2019A%26A...623A..39H/abstract))** - GPU-accelerated transit detection with optimal depth fitting
- **35-202× faster** than CPU TLS (transitleastsquares package)
- Keplerian-aware duration constraints (`tls_transit()`) - searches physically plausible transit durations
- Standard mode (`tls_search_gpu()`) for custom period/duration grids
- Optimal period grid sampling (Ofir 2014)
- Supports datasets up to ~100,000 observations (optimal: 500-20,000)
- **Non-equispaced fast Fourier transform (NFFT)** - Adjoint operation ([paper](http://epubs.siam.org/doi/abs/10.1137/0914081))
- **NUFFT-based Likelihood Ratio Test (LRT)** - Transit detection with correlated noise (contributed by Jamila Taaki)
- Matched filter in frequency domain with adaptive noise estimation
Expand Down Expand Up @@ -196,6 +202,8 @@ Full documentation is available at: https://johnh2o2.github.io/cuvarbase/

## Quick Start

### Box Least Squares (BLS) - Transit Detection

```python
import numpy as np
from cuvarbase import bls
Expand All @@ -205,7 +213,6 @@ t = np.sort(np.random.uniform(0, 10, 1000)).astype(np.float32)
y = np.sin(2 * np.pi * t / 2.5) + np.random.normal(0, 0.1, len(t))
dy = np.ones_like(y) * 0.1 # uncertainties

# Box Least Squares (BLS) - Transit detection
# Define frequency grid
freqs = np.linspace(0.1, 2.0, 5000).astype(np.float32)

Expand All @@ -218,6 +225,36 @@ print(f"Best period: {1/best_freq:.2f} (expected: 2.5)")
power_adaptive = bls.eebls_gpu_fast_adaptive(t, y, dy, freqs)
```

### Transit Least Squares (TLS) - Advanced Transit Detection

```python
from cuvarbase import tls

# Generate transit data
t = np.sort(np.random.uniform(0, 50, 500)).astype(np.float32)
y = np.ones(len(t), dtype=np.float32)
dy = np.ones(len(t), dtype=np.float32) * 0.001

# Add 1% transit at 10-day period
phase = (t % 10.0) / 10.0
in_transit = (phase < 0.01) | (phase > 0.99)
y[in_transit] -= 0.01
y += np.random.normal(0, 0.001, len(t)).astype(np.float32)

# TLS with Keplerian duration constraints (35-202x faster than CPU TLS!)
results = tls.tls_transit(
t, y, dy,
R_star=1.0, # Solar radii
M_star=1.0, # Solar masses
period_min=5.0,
period_max=20.0
)

print(f"Best period: {results['period']:.2f} days")
print(f"Transit depth: {results['depth']:.4f}")
print(f"SDE: {results['SDE']:.1f}")
```

For more advanced usage including Lomb-Scargle and Conditional Entropy, see the [full documentation](https://johnh2o2.github.io/cuvarbase/) and [examples/](examples/).

## Using Multiple GPUs
Expand Down
69 changes: 69 additions & 0 deletions compare_gpu_cpu_depth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/env python3
"""Compare GPU and CPU TLS depth calculations"""
import numpy as np
from cuvarbase import tls as gpu_tls
from transitleastsquares import transitleastsquares as cpu_tls

# Generate test data
np.random.seed(42)
ndata = 500
t = np.sort(np.random.uniform(0, 50, ndata))
y = np.ones(ndata, dtype=np.float32)

# Add transit
period_true = 10.0
depth_true = 0.01 # Fractional dip
phase = (t % period_true) / period_true
in_transit = (phase < 0.01) | (phase > 0.99)
y[in_transit] -= depth_true
y += np.random.normal(0, 0.001, ndata).astype(np.float32)
dy = np.ones(ndata, dtype=np.float32) * 0.001

print(f"Test data:")
print(f" N = {ndata}")
print(f" Period = {period_true:.1f} days")
print(f" Depth (fractional dip) = {depth_true:.3f}")
print(f" Points in transit: {np.sum(in_transit)}")
print(f" Measured depth: {np.mean(y[~in_transit]) - np.mean(y[in_transit]):.6f}")

# GPU TLS
print(f"\n--- GPU TLS ---")
gpu_result = gpu_tls.tls_search_gpu(
t.astype(np.float32), y, dy,
period_min=9.0,
period_max=11.0
)

print(f"Period: {gpu_result['period']:.4f} (error: {abs(gpu_result['period'] - period_true)/period_true*100:.2f}%)")
print(f"Depth: {gpu_result['depth']:.6f}")
print(f"Duration: {gpu_result['duration']:.4f} days")
print(f"T0: {gpu_result['T0']:.4f}")

# CPU TLS
print(f"\n--- CPU TLS ---")
model = cpu_tls(t, y, dy)
cpu_result = model.power(
period_min=9.0,
period_max=11.0,
n_transits_min=2
)

print(f"Period: {cpu_result.period:.4f} (error: {abs(cpu_result.period - period_true)/period_true*100:.2f}%)")
print(f"Depth (flux ratio): {cpu_result.depth:.6f}")
print(f"Depth (fractional dip): {1 - cpu_result.depth:.6f}")
print(f"Duration: {cpu_result.duration:.4f} days")
print(f"T0: {cpu_result.T0:.4f}")

# Compare
print(f"\n--- Comparison ---")
print(f"Period agreement: {abs(gpu_result['period'] - cpu_result.period):.4f} days")
print(f"Duration agreement: {abs(gpu_result['duration'] - cpu_result.duration):.4f} days")

# Check depth conventions
gpu_depth_frac = gpu_result['depth'] # GPU reports fractional dip
cpu_depth_frac = 1 - cpu_result.depth # CPU reports flux ratio

print(f"\nDepth (fractional dip convention):")
print(f" True: {depth_true:.6f}")
print(f" GPU: {gpu_depth_frac:.6f} (error: {abs(gpu_depth_frac - depth_true)/depth_true*100:.1f}%)")
print(f" CPU: {cpu_depth_frac:.6f} (error: {abs(cpu_depth_frac - depth_true)/depth_true*100:.1f}%)")
Loading