apt-sim · SeverinDiederichs · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026 · Mar 21, 2026
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -444,6 +444,8 @@ endif()
 # Build Targets
 #----------------------------------------------------------------------------#
 set(ADEPT_G4_INTEGRATION_SRCS
+  src/AdePTG4HepEmState.cpp
+  src/AdePTGeometryBridge.cpp
   src/G4HepEmTrackingManagerSpecialized.cc
   src/AdePTTrackingManager.cc
   src/G4EmStandardPhysics_AdePT.cc

diff --git a/include/AdePT/core/AdePTG4HepEmState.hh b/include/AdePT/core/AdePTG4HepEmState.hh
@@ -0,0 +1,80 @@
+// SPDX-FileCopyrightText: 2026 CERN
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef ADEPT_G4_HEPEM_STATE_HH
+#define ADEPT_G4_HEPEM_STATE_HH
+
+#include <memory>
+
+struct G4HepEmConfig;
+struct G4HepEmData;
+struct G4HepEmParameters;
+
+namespace AsyncAdePT {
+
+/// @brief Owns the prepared host-side G4HepEm inputs used by transport.
+/// @details
+/// The Geant4 integration side prepares one of these objects before the shared
+/// transport is created. This wrapper owns both:
+/// - the rebuilt `G4HepEmData`
+/// - a deep copy of the `G4HepEmParameters` taken from the provided config
+///
+/// Cleanup is intentionally split:
+/// - `DataDeleter` performs the deep cleanup of the owned `G4HepEmData`
+///   and then deletes the outer `G4HepEmData` allocation.
+/// - `ParametersDeleter` performs the deep cleanup of the owned
+///   `G4HepEmParameters`, including the GPU mirror created by AdePT, and then
+///   deletes the outer `G4HepEmParameters` allocation.
+class AdePTG4HepEmState {
+public:
+  /// @brief Build the AdePT-owned `G4HepEmData` and `G4HepEmParameters` copies from the supplied config.
+  explicit AdePTG4HepEmState(G4HepEmConfig *hepEmConfig);
+
+  /// @brief Destroy the owned `G4HepEmData` and `G4HepEmParameters` copies.
+  ~AdePTG4HepEmState();
+
+  AdePTG4HepEmState(const AdePTG4HepEmState &)            = delete;
+  AdePTG4HepEmState &operator=(const AdePTG4HepEmState &) = delete;
+
+  AdePTG4HepEmState(AdePTG4HepEmState &&) noexcept            = default;
+  AdePTG4HepEmState &operator=(AdePTG4HepEmState &&) noexcept = default;
+
+  /// @brief Access the owned host-side HepEm data tables.
+  G4HepEmData *GetData() const { return fData.get(); }
+
+  /// @brief Access the owned HepEm parameter copy.
+  G4HepEmParameters *GetParameters() const { return fParameters.get(); }
+
+private:
+  /// @brief Deletes the outer `G4HepEmData` object after first freeing all tables it owns.
+  /// @details
+  /// `FreeG4HepEmData` releases both the host-side tables and any device-side
+  /// mirrors embedded in the `G4HepEmData` object, but it does not delete the outer
+  /// `G4HepEmData` allocation itself. This deleter performs both steps for the
+  /// owned `fData` member. It does not touch the separately owned
+  /// `G4HepEmParameters` copy stored in `fParameters`.
+  struct DataDeleter {
+    void operator()(G4HepEmData *data) const;
+  };
+
+  /// @brief Deletes the outer `G4HepEmParameters` object after first freeing
+  /// all host/device allocations it owns.
+  /// @details
+  /// The copied parameter block owns its `fParametersPerRegion` host array and
+  /// the GPU mirror pointed to by `fParametersPerRegion_gpu` after transport
+  /// upload. `FreeG4HepEmParameters` releases those nested allocations, while
+  /// this deleter also deletes the outer `G4HepEmParameters` allocation.
+  struct ParametersDeleter {
+    void operator()(G4HepEmParameters *parameters) const;
+  };
+
+  /// Owned `G4HepEmData` rebuilt for AdePT.
+  std::unique_ptr<G4HepEmData, DataDeleter> fData;
+
+  /// Owned deep copy of `G4HepEmParameters` used to build and upload transport data.
+  std::unique_ptr<G4HepEmParameters, ParametersDeleter> fParameters;
+};
+
+} // namespace AsyncAdePT
+
+#endif
diff --git a/include/AdePT/core/AsyncAdePTTransport.cuh b/include/AdePT/core/AsyncAdePTTransport.cuh
@@ -45,9 +45,6 @@ using SteppingAction = adept::SteppingAction::Action;
 #endif
 
 #include <G4HepEmData.hh>
-#include <G4HepEmConfig.hh>
-#include <G4HepEmState.hh>
-#include <G4HepEmStateInit.hh>
 #include <G4HepEmParameters.hh>
 #include <G4HepEmMatCutData.hh>
 #include <G4HepEmParametersInit.hh>
@@ -536,48 +533,32 @@ void CopySurfaceModelToGPU()
 #endif
 }
 
-G4HepEmState *InitG4HepEm(G4HepEmConfig *hepEmConfig)
+void UploadG4HepEmToGPU(G4HepEmData *hepEmData, G4HepEmParameters *hepEmParameters)
 {
-  // here we call everything from InitG4HepEmState, as we need to provide the parameters from the G4HepEmConfig and do
-  // not want to initialize to the default values
-  auto state = new G4HepEmState;
-
-  // Use the config-provided parameters
-  state->fParameters = hepEmConfig->GetG4HepEmParameters();
-
-  // Initialize data and fill each subtable using its initialize function
-  state->fData = new G4HepEmData;
-  InitG4HepEmData(state->fData);
-  InitMaterialAndCoupleData(state->fData, state->fParameters);
-
-  // electrons, positrons, gamma
-  InitElectronData(state->fData, state->fParameters, true);
-  InitElectronData(state->fData, state->fParameters, false);
-  InitGammaData(state->fData, state->fParameters);
-
-  G4HepEmMatCutData *cutData = state->fData->fTheMatCutData;
-  G4cout << "fNumG4MatCuts = " << cutData->fNumG4MatCuts << ", fNumMatCutData = " << cutData->fNumMatCutData << G4endl;
+  if (hepEmData == nullptr || hepEmParameters == nullptr) {
+    throw std::runtime_error("UploadG4HepEmToGPU requires non-null G4HepEmData and G4HepEmParameters.");
+  }
 
-  // Copy to GPU.
-  CopyG4HepEmDataToGPU(state->fData);
-  CopyG4HepEmParametersToGPU(state->fParameters);
+  // Copy the prepared host-side HepEm data to the GPU.
+  CopyG4HepEmDataToGPU(hepEmData);
+  CopyG4HepEmParametersToGPU(hepEmParameters);
 
   // Create G4HepEmParameters with the device pointer
-  G4HepEmParameters parametersOnDevice        = *state->fParameters;
-  parametersOnDevice.fParametersPerRegion     = state->fParameters->fParametersPerRegion_gpu;
+  G4HepEmParameters parametersOnDevice        = *hepEmParameters;
+  parametersOnDevice.fParametersPerRegion     = hepEmParameters->fParametersPerRegion_gpu;
   parametersOnDevice.fParametersPerRegion_gpu = nullptr;
 
   ADEPT_DEVICE_API_CALL(MemcpyToSymbol(g4HepEmPars, &parametersOnDevice, sizeof(G4HepEmParameters)));
 
   // Create G4HepEmData with the device pointers.
   G4HepEmData dataOnDevice;
-  dataOnDevice.fTheMatCutData   = state->fData->fTheMatCutData_gpu;
-  dataOnDevice.fTheMaterialData = state->fData->fTheMaterialData_gpu;
-  dataOnDevice.fTheElementData  = state->fData->fTheElementData_gpu;
-  dataOnDevice.fTheElectronData = state->fData->fTheElectronData_gpu;
-  dataOnDevice.fThePositronData = state->fData->fThePositronData_gpu;
-  dataOnDevice.fTheSBTableData  = state->fData->fTheSBTableData_gpu;
-  dataOnDevice.fTheGammaData    = state->fData->fTheGammaData_gpu;
+  dataOnDevice.fTheMatCutData   = hepEmData->fTheMatCutData_gpu;
+  dataOnDevice.fTheMaterialData = hepEmData->fTheMaterialData_gpu;
+  dataOnDevice.fTheElementData  = hepEmData->fTheElementData_gpu;
+  dataOnDevice.fTheElectronData = hepEmData->fTheElectronData_gpu;
+  dataOnDevice.fThePositronData = hepEmData->fThePositronData_gpu;
+  dataOnDevice.fTheSBTableData  = hepEmData->fTheSBTableData_gpu;
+  dataOnDevice.fTheGammaData    = hepEmData->fTheGammaData_gpu;
   // The other pointers should never be used.
   dataOnDevice.fTheMatCutData_gpu   = nullptr;
   dataOnDevice.fTheMaterialData_gpu = nullptr;
@@ -588,8 +569,6 @@ G4HepEmState *InitG4HepEm(G4HepEmConfig *hepEmConfig)
   dataOnDevice.fTheGammaData_gpu    = nullptr;
 
   ADEPT_DEVICE_API_CALL(MemcpyToSymbol(g4HepEmData, &dataOnDevice, sizeof(G4HepEmData)));
-
-  return state;
 }
 
 template <typename FieldType>
@@ -1828,8 +1807,8 @@ std::thread LaunchGPUWorker(int trackCapacity, int leakCapacity, int scoringCapa
                      hasWDTRegions};
 }
 
-void FreeGPU(std::unique_ptr<AsyncAdePT::GPUstate, AsyncAdePT::GPUstateDeleter> &gpuState, G4HepEmState &g4hepem_state,
-             std::thread &gpuWorker, adeptint::WDTDeviceBuffers &wdtDev)
+void FreeGPU(std::unique_ptr<AsyncAdePT::GPUstate, AsyncAdePT::GPUstateDeleter> &gpuState, std::thread &gpuWorker,
+             adeptint::WDTDeviceBuffers &wdtDev)
 {
   gpuState->runTransport = false;
   gpuWorker.join();
@@ -1844,9 +1823,10 @@ void FreeGPU(std::unique_ptr<AsyncAdePT::GPUstate, AsyncAdePT::GPUstateDeleter>
   // Free resources.
   gpuState.reset();
 
-  // Free G4HepEm data
-  FreeG4HepEmData(g4hepem_state.fData);
-  FreeG4HepEmParametersOnGPU(g4hepem_state.fParameters);
+  // Note: the GPU mirror of `G4HepEmParameters` is not released here.
+  // That cleanup happens when the transport-owned `AdePTG4HepEmState` dies,
+  // because it owns both the copied `G4HepEmParameters` object and the upload
+  // lifecycle attached to that copy.
 
   // Free magnetic field
 #ifdef ADEPT_USE_EXT_BFIELD

diff --git a/include/AdePT/core/AsyncAdePTTransport.hh b/include/AdePT/core/AsyncAdePTTransport.hh
@@ -10,25 +10,24 @@
 #define ASYNC_ADEPT_TRANSPORT_HH
 
 #include <AdePT/core/AdePTConfiguration.hh>
+#include <AdePT/core/AdePTG4HepEmState.hh>
 #include <AdePT/core/AsyncAdePTTransportStruct.hh>
 #include <AdePT/core/CommonStruct.h>
 #include <AdePT/integration/AdePTGeant4Integration.hh>
-#include <AdePT/integration/G4HepEmTrackingManagerSpecialized.hh>
 
 #include <VecGeom/base/Config.h>
 #include <VecGeom/management/CudaManager.h> // forward declares vecgeom::cxx::VPlacedVolume
 
 #include <condition_variable>
 #include <mutex>
 #include <memory>
+#include <span>
 #include <thread>
 #include <unordered_map>
 #include <optional>
 
 class G4Region;
 class G4VPhysicalVolume;
-class G4HepEmConfig;
-struct G4HepEmState;
 namespace AsyncAdePT {
 struct TrackBuffer;
 struct GPUstate;
@@ -50,10 +49,11 @@ private:
   unsigned short fLastNParticlesOnCPU{0}; ///< Number N of last N particles that are finished on CPU
   unsigned short fMaxWDTIter{5};          ///< Maximum number of Woodcock tracking iterations per step
   std::unique_ptr<GPUstate, GPUstateDeleter> fGPUstate{nullptr}; ///< CUDA state placeholder
-  std::unique_ptr<TrackBuffer> fBuffer{nullptr};     ///< Buffers for transferring tracks between host and device
-  std::unique_ptr<G4HepEmState> fg4hepem_state;      ///< The HepEm state singleton
-  adeptint::WDTDeviceBuffers fWDTDev{};              ///< device buffers for Woodcock tracking data
-  std::thread fGPUWorker;                            ///< Thread to manage GPU
+  std::unique_ptr<TrackBuffer> fBuffer{nullptr}; ///< Buffers for transferring tracks between host and device
+  std::unique_ptr<AdePTG4HepEmState>
+      fAdePTG4HepEmState;               ///< Transport-owned wrapper around `G4HepEmData` and copied `G4HepEmParameters`
+  adeptint::WDTDeviceBuffers fWDTDev{}; ///< device buffers for Woodcock tracking data
+  std::thread fGPUWorker;               ///< Thread to manage GPU
   std::condition_variable fCV_G4Workers;             ///< Communicate with G4 workers
   std::mutex fMutex_G4Workers;                       ///< Mutex associated to the condition variable
   std::vector<std::atomic<EventState>> fEventStates; ///< State machine for each G4 worker
@@ -74,14 +74,17 @@ private:
   ///< Needed to stall the GPU, in case the nPartInFlight * fHitBufferSafetyFactor > available HitSlots
   double fHitBufferSafetyFactor{1.5};
 
-  void Initialize(G4HepEmConfig *hepEmConfig);
+  void Initialize(adeptint::VolAuxData *auxData, const adeptint::WDTHostPacked &wdtPacked,
+                  const std::vector<float> &uniformFieldValues);
   void InitBVH();
   bool InitializeGeometry(const vecgeom::cxx::VPlacedVolume *world);
-  bool InitializePhysics(G4HepEmConfig *hepEmConfig);
+  bool InitializePhysics();
   void InitWDTOnDevice(const adeptint::WDTHostPacked &src, adeptint::WDTDeviceBuffers &dev, unsigned short maxIter);
 
 public:
-  AsyncAdePTTransport(AdePTConfiguration &configuration, G4HepEmConfig *hepEmConfig);
+  AsyncAdePTTransport(AdePTConfiguration &configuration, std::unique_ptr<AdePTG4HepEmState> adeptG4HepEmState,
+                      adeptint::VolAuxData *auxData, const adeptint::WDTHostPacked &wdtPacked,
+                      const std::vector<float> &uniformFieldValues);
   AsyncAdePTTransport(const AsyncAdePTTransport &other) = delete;
   ~AsyncAdePTTransport();
 
@@ -90,15 +93,32 @@ public:
                 double diry, double dirz, double globalTime, double localTime, double properTime, float weight,
                 unsigned short stepCounter, int threadId, unsigned int eventId, vecgeom::NavigationState &&state);
   bool GetTrackInAllRegions() const { return fTrackInAllRegions; }
-  bool GetCallUserActions() const { return fReturnFirstAndLastStep; }
+  bool GetReturnAllSteps() const { return fReturnAllSteps; }
+  bool GetReturnFirstAndLastStep() const { return fReturnFirstAndLastStep; }
   std::vector<std::string> const *GetGPURegionNames() { return fGPURegionNames; }
   std::vector<std::string> const *GetCPURegionNames() { return fCPURegionNames; }
-  G4HepEmState *GetHepEmState() const { return fg4hepem_state.get(); }
-  void CompleteInitialization(adeptint::VolAuxData *auxData, const adeptint::WDTHostPacked &wdtPacked,
-                              const std::vector<float> &uniformFieldValues);
+  /// @brief Handle the currently available returned GPU-hit batches for one thread and event.
+  /// @details
+  /// Transport retains ownership of the hit-buffer lifetime. For each available
+  /// batch, `callback` is invoked with a `std::span<const GPUHit>` view and the
+  /// batch is released again when the callback returns.
+  ///
+  /// In this code path, the callback is the `AdePTTrackingManager` logic that
+  /// reconstructs Geant4 steps from the returned GPU hits.
+  template <typename Callback>
+  void HandleReturnedGPUHitBatchesWith(int threadId, int eventId, Callback &&callback);
+  /// @brief Request that the device flush all pending work for the given worker.
+  void RequestFlush(int threadId);
+  /// @brief Wait until the transport threads make further flush progress.
+  void WaitForFlushProgress();
+  /// @brief Check whether the device side has completed flushing for the given worker.
+  bool IsDeviceFlushed(int threadId) const;
+  /// @brief Take the leaked-track batch returned by transport for the given worker.
+  std::vector<TrackDataWithIDs> TakeReturnedTracks(int threadId);
+  /// @brief Mark the returned-track batch for the given worker as consumed.
+  void MarkLeakedTracksRetrieved(int threadId);
   /// Block until transport of the given event is done.
   void Flush(int threadId, int eventId, AdePTGeant4Integration &g4Integration);
-  void ProcessGPUSteps(int threadId, int eventId, AdePTGeant4Integration &g4Integration);
 };
 
 } // namespace AsyncAdePT