johnh2o2 · johnh2o2 · Oct 27, 2025 · Oct 27, 2025 · Oct 27, 2025 · Oct 27, 2025
diff --git a/.runpod.env.template b/.runpod.env.template
@@ -14,6 +14,9 @@ RUNPOD_SSH_USER=root
 # Remote paths
 RUNPOD_REMOTE_DIR=/workspace/cuvarbase
 
-# RunPod API Key (optional, for advanced automation)
+# RunPod API Key (required for scripts/runpod-create.sh and scripts/gpu-test.sh)
 # Get from https://www.runpod.io/console/user/settings
-# RUNPOD_API_KEY=your-api-key-here
+RUNPOD_API_KEY=
+
+# Pod ID (auto-populated by runpod-create.sh)
+# RUNPOD_POD_ID=
diff --git a/README.md b/README.md
@@ -130,6 +130,12 @@ Currently includes implementations of:
   - Sparse BLS ([Panahi & Zucker 2021](https://arxiv.org/abs/2103.06193)) for small datasets (< 500 observations)
     - GPU implementation: `sparse_bls_gpu()` (default)
     - CPU implementation: `sparse_bls_cpu()` (fallback)
+- **Transit Least Squares ([TLS](https://ui.adsabs.harvard.edu/abs/2019A%26A...623A..39H/abstract))** - GPU-accelerated transit detection with optimal depth fitting
+  - **35-202× faster** than CPU TLS (transitleastsquares package)
+  - Keplerian-aware duration constraints (`tls_transit()`) - searches physically plausible transit durations
+  - Standard mode (`tls_search_gpu()`) for custom period/duration grids
+  - Optimal period grid sampling (Ofir 2014)
+  - Supports datasets up to ~100,000 observations (optimal: 500-20,000)
 - **Non-equispaced fast Fourier transform (NFFT)** - Adjoint operation ([paper](http://epubs.siam.org/doi/abs/10.1137/0914081))
 - **NUFFT-based Likelihood Ratio Test (LRT)** - Transit detection with correlated noise (contributed by Jamila Taaki)
   - Matched filter in frequency domain with adaptive noise estimation
@@ -196,6 +202,8 @@ Full documentation is available at: https://johnh2o2.github.io/cuvarbase/
 
 ## Quick Start
 
+### Box Least Squares (BLS) - Transit Detection
+
 ```python
 import numpy as np
 from cuvarbase import bls
@@ -205,7 +213,6 @@ t = np.sort(np.random.uniform(0, 10, 1000)).astype(np.float32)
 y = np.sin(2 * np.pi * t / 2.5) + np.random.normal(0, 0.1, len(t))
 dy = np.ones_like(y) * 0.1  # uncertainties
 
-# Box Least Squares (BLS) - Transit detection
 # Define frequency grid
 freqs = np.linspace(0.1, 2.0, 5000).astype(np.float32)
 
@@ -218,6 +225,36 @@ print(f"Best period: {1/best_freq:.2f} (expected: 2.5)")
 power_adaptive = bls.eebls_gpu_fast_adaptive(t, y, dy, freqs)
 ```
 
+### Transit Least Squares (TLS) - Advanced Transit Detection
+
+```python
+from cuvarbase import tls
+
+# Generate transit data
+t = np.sort(np.random.uniform(0, 50, 500)).astype(np.float32)
+y = np.ones(len(t), dtype=np.float32)
+dy = np.ones(len(t), dtype=np.float32) * 0.001
+
+# Add 1% transit at 10-day period
+phase = (t % 10.0) / 10.0
+in_transit = (phase < 0.01) | (phase > 0.99)
+y[in_transit] -= 0.01
+y += np.random.normal(0, 0.001, len(t)).astype(np.float32)
+
+# TLS with Keplerian duration constraints (35-202x faster than CPU TLS!)
+results = tls.tls_transit(
+    t, y, dy,
+    R_star=1.0,      # Solar radii
+    M_star=1.0,      # Solar masses
+    period_min=5.0,
+    period_max=20.0
+)
+
+print(f"Best period: {results['period']:.2f} days")
+print(f"Transit depth: {results['depth']:.4f}")
+print(f"SDE: {results['SDE']:.1f}")
+```
+
 For more advanced usage including Lomb-Scargle and Conditional Entropy, see the [full documentation](https://johnh2o2.github.io/cuvarbase/) and [examples/](examples/).
 
 ## Using Multiple GPUs

diff --git a/compare_gpu_cpu_depth.py b/compare_gpu_cpu_depth.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python3
+"""Compare GPU and CPU TLS depth calculations"""
+import numpy as np
+from cuvarbase import tls as gpu_tls
+from transitleastsquares import transitleastsquares as cpu_tls
+
+# Generate test data
+np.random.seed(42)
+ndata = 500
+t = np.sort(np.random.uniform(0, 50, ndata))
+y = np.ones(ndata, dtype=np.float32)
+
+# Add transit
+period_true = 10.0
+depth_true = 0.01  # Fractional dip
+phase = (t % period_true) / period_true
+in_transit = (phase < 0.01) | (phase > 0.99)
+y[in_transit] -= depth_true
+y += np.random.normal(0, 0.001, ndata).astype(np.float32)
+dy = np.ones(ndata, dtype=np.float32) * 0.001
+
+print(f"Test data:")
+print(f"  N = {ndata}")
+print(f"  Period = {period_true:.1f} days")
+print(f"  Depth (fractional dip) = {depth_true:.3f}")
+print(f"  Points in transit: {np.sum(in_transit)}")
+print(f"  Measured depth: {np.mean(y[~in_transit]) - np.mean(y[in_transit]):.6f}")
+
+# GPU TLS
+print(f"\n--- GPU TLS ---")
+gpu_result = gpu_tls.tls_search_gpu(
+    t.astype(np.float32), y, dy,
+    period_min=9.0,
+    period_max=11.0
+)
+
+print(f"Period: {gpu_result['period']:.4f} (error: {abs(gpu_result['period'] - period_true)/period_true*100:.2f}%)")
+print(f"Depth: {gpu_result['depth']:.6f}")
+print(f"Duration: {gpu_result['duration']:.4f} days")
+print(f"T0: {gpu_result['T0']:.4f}")
+
+# CPU TLS
+print(f"\n--- CPU TLS ---")
+model = cpu_tls(t, y, dy)
+cpu_result = model.power(
+    period_min=9.0,
+    period_max=11.0,
+    n_transits_min=2
+)
+
+print(f"Period: {cpu_result.period:.4f} (error: {abs(cpu_result.period - period_true)/period_true*100:.2f}%)")
+print(f"Depth (flux ratio): {cpu_result.depth:.6f}")
+print(f"Depth (fractional dip): {1 - cpu_result.depth:.6f}")
+print(f"Duration: {cpu_result.duration:.4f} days")
+print(f"T0: {cpu_result.T0:.4f}")
+
+# Compare
+print(f"\n--- Comparison ---")
+print(f"Period agreement: {abs(gpu_result['period'] - cpu_result.period):.4f} days")
+print(f"Duration agreement: {abs(gpu_result['duration'] - cpu_result.duration):.4f} days")
+
+# Check depth conventions
+gpu_depth_frac = gpu_result['depth']  # GPU reports fractional dip
+cpu_depth_frac = 1 - cpu_result.depth  # CPU reports flux ratio
+
+print(f"\nDepth (fractional dip convention):")
+print(f"  True: {depth_true:.6f}")
+print(f"  GPU:  {gpu_depth_frac:.6f} (error: {abs(gpu_depth_frac - depth_true)/depth_true*100:.1f}%)")
+print(f"  CPU:  {cpu_depth_frac:.6f} (error: {abs(cpu_depth_frac - depth_true)/depth_true*100:.1f}%)")