diff --git a/CMakeLists.txt b/CMakeLists.txt index 00626f47..f238296a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -444,6 +444,7 @@ endif() # Build Targets #----------------------------------------------------------------------------# set(ADEPT_G4_INTEGRATION_SRCS + src/AdePTG4HepEmState.cpp src/G4HepEmTrackingManagerSpecialized.cc src/AdePTTrackingManager.cc src/G4EmStandardPhysics_AdePT.cc diff --git a/include/AdePT/core/AdePTG4HepEmState.hh b/include/AdePT/core/AdePTG4HepEmState.hh new file mode 100644 index 00000000..a396c67e --- /dev/null +++ b/include/AdePT/core/AdePTG4HepEmState.hh @@ -0,0 +1,80 @@ +// SPDX-FileCopyrightText: 2026 CERN +// SPDX-License-Identifier: Apache-2.0 + +#ifndef ADEPT_G4_HEPEM_STATE_HH +#define ADEPT_G4_HEPEM_STATE_HH + +#include + +struct G4HepEmConfig; +struct G4HepEmData; +struct G4HepEmParameters; + +namespace AsyncAdePT { + +/// @brief Owns the prepared host-side G4HepEm inputs used by transport. +/// @details +/// The Geant4 integration side prepares one of these objects before the shared +/// transport is created. This wrapper owns both: +/// - the rebuilt `G4HepEmData` +/// - a deep copy of the `G4HepEmParameters` taken from the provided config +/// +/// Cleanup is intentionally split: +/// - `DataDeleter` performs the deep cleanup of the owned `G4HepEmData` +/// and then deletes the outer `G4HepEmData` allocation. +/// - `ParametersDeleter` performs the deep cleanup of the owned +/// `G4HepEmParameters`, including the GPU mirror created by AdePT, and then +/// deletes the outer `G4HepEmParameters` allocation. +class AdePTG4HepEmState { +public: + /// @brief Build the AdePT-owned `G4HepEmData` and `G4HepEmParameters` copies from the supplied config. + explicit AdePTG4HepEmState(G4HepEmConfig *hepEmConfig); + + /// @brief Destroy the owned `G4HepEmData` and `G4HepEmParameters` copies. + ~AdePTG4HepEmState(); + + AdePTG4HepEmState(const AdePTG4HepEmState &) = delete; + AdePTG4HepEmState &operator=(const AdePTG4HepEmState &) = delete; + + AdePTG4HepEmState(AdePTG4HepEmState &&) noexcept = default; + AdePTG4HepEmState &operator=(AdePTG4HepEmState &&) noexcept = default; + + /// @brief Access the owned host-side HepEm data tables. + G4HepEmData *GetData() const { return fData.get(); } + + /// @brief Access the owned HepEm parameter copy. + G4HepEmParameters *GetParameters() const { return fParameters.get(); } + +private: + /// @brief Deletes the outer `G4HepEmData` object after first freeing all tables it owns. + /// @details + /// `FreeG4HepEmData` releases both the host-side tables and any device-side + /// mirrors embedded in the `G4HepEmData` object, but it does not delete the outer + /// `G4HepEmData` allocation itself. This deleter performs both steps for the + /// owned `fData` member. It does not touch the separately owned + /// `G4HepEmParameters` copy stored in `fParameters`. + struct DataDeleter { + void operator()(G4HepEmData *data) const; + }; + + /// @brief Deletes the outer `G4HepEmParameters` object after first freeing + /// all host/device allocations it owns. + /// @details + /// The copied parameter block owns its `fParametersPerRegion` host array and + /// the GPU mirror pointed to by `fParametersPerRegion_gpu` after transport + /// upload. `FreeG4HepEmParameters` releases those nested allocations, while + /// this deleter also deletes the outer `G4HepEmParameters` allocation. + struct ParametersDeleter { + void operator()(G4HepEmParameters *parameters) const; + }; + + /// Owned `G4HepEmData` rebuilt for AdePT. + std::unique_ptr fData; + + /// Owned deep copy of `G4HepEmParameters` used to build and upload transport data. + std::unique_ptr fParameters; +}; + +} // namespace AsyncAdePT + +#endif diff --git a/include/AdePT/core/AsyncAdePTTransport.cuh b/include/AdePT/core/AsyncAdePTTransport.cuh index f832c57c..b27a3d0f 100644 --- a/include/AdePT/core/AsyncAdePTTransport.cuh +++ b/include/AdePT/core/AsyncAdePTTransport.cuh @@ -45,9 +45,6 @@ using SteppingAction = adept::SteppingAction::Action; #endif #include -#include -#include -#include #include #include #include @@ -536,48 +533,32 @@ void CopySurfaceModelToGPU() #endif } -G4HepEmState *InitG4HepEm(G4HepEmConfig *hepEmConfig) +void UploadG4HepEmToGPU(G4HepEmData *hepEmData, G4HepEmParameters *hepEmParameters) { - // here we call everything from InitG4HepEmState, as we need to provide the parameters from the G4HepEmConfig and do - // not want to initialize to the default values - auto state = new G4HepEmState; - - // Use the config-provided parameters - state->fParameters = hepEmConfig->GetG4HepEmParameters(); - - // Initialize data and fill each subtable using its initialize function - state->fData = new G4HepEmData; - InitG4HepEmData(state->fData); - InitMaterialAndCoupleData(state->fData, state->fParameters); - - // electrons, positrons, gamma - InitElectronData(state->fData, state->fParameters, true); - InitElectronData(state->fData, state->fParameters, false); - InitGammaData(state->fData, state->fParameters); - - G4HepEmMatCutData *cutData = state->fData->fTheMatCutData; - G4cout << "fNumG4MatCuts = " << cutData->fNumG4MatCuts << ", fNumMatCutData = " << cutData->fNumMatCutData << G4endl; + if (hepEmData == nullptr || hepEmParameters == nullptr) { + throw std::runtime_error("UploadG4HepEmToGPU requires non-null G4HepEmData and G4HepEmParameters."); + } - // Copy to GPU. - CopyG4HepEmDataToGPU(state->fData); - CopyG4HepEmParametersToGPU(state->fParameters); + // Copy the prepared host-side HepEm data to the GPU. + CopyG4HepEmDataToGPU(hepEmData); + CopyG4HepEmParametersToGPU(hepEmParameters); // Create G4HepEmParameters with the device pointer - G4HepEmParameters parametersOnDevice = *state->fParameters; - parametersOnDevice.fParametersPerRegion = state->fParameters->fParametersPerRegion_gpu; + G4HepEmParameters parametersOnDevice = *hepEmParameters; + parametersOnDevice.fParametersPerRegion = hepEmParameters->fParametersPerRegion_gpu; parametersOnDevice.fParametersPerRegion_gpu = nullptr; ADEPT_DEVICE_API_CALL(MemcpyToSymbol(g4HepEmPars, ¶metersOnDevice, sizeof(G4HepEmParameters))); // Create G4HepEmData with the device pointers. G4HepEmData dataOnDevice; - dataOnDevice.fTheMatCutData = state->fData->fTheMatCutData_gpu; - dataOnDevice.fTheMaterialData = state->fData->fTheMaterialData_gpu; - dataOnDevice.fTheElementData = state->fData->fTheElementData_gpu; - dataOnDevice.fTheElectronData = state->fData->fTheElectronData_gpu; - dataOnDevice.fThePositronData = state->fData->fThePositronData_gpu; - dataOnDevice.fTheSBTableData = state->fData->fTheSBTableData_gpu; - dataOnDevice.fTheGammaData = state->fData->fTheGammaData_gpu; + dataOnDevice.fTheMatCutData = hepEmData->fTheMatCutData_gpu; + dataOnDevice.fTheMaterialData = hepEmData->fTheMaterialData_gpu; + dataOnDevice.fTheElementData = hepEmData->fTheElementData_gpu; + dataOnDevice.fTheElectronData = hepEmData->fTheElectronData_gpu; + dataOnDevice.fThePositronData = hepEmData->fThePositronData_gpu; + dataOnDevice.fTheSBTableData = hepEmData->fTheSBTableData_gpu; + dataOnDevice.fTheGammaData = hepEmData->fTheGammaData_gpu; // The other pointers should never be used. dataOnDevice.fTheMatCutData_gpu = nullptr; dataOnDevice.fTheMaterialData_gpu = nullptr; @@ -588,8 +569,6 @@ G4HepEmState *InitG4HepEm(G4HepEmConfig *hepEmConfig) dataOnDevice.fTheGammaData_gpu = nullptr; ADEPT_DEVICE_API_CALL(MemcpyToSymbol(g4HepEmData, &dataOnDevice, sizeof(G4HepEmData))); - - return state; } template @@ -1828,8 +1807,8 @@ std::thread LaunchGPUWorker(int trackCapacity, int leakCapacity, int scoringCapa hasWDTRegions}; } -void FreeGPU(std::unique_ptr &gpuState, G4HepEmState &g4hepem_state, - std::thread &gpuWorker, adeptint::WDTDeviceBuffers &wdtDev) +void FreeGPU(std::unique_ptr &gpuState, std::thread &gpuWorker, + adeptint::WDTDeviceBuffers &wdtDev) { gpuState->runTransport = false; gpuWorker.join(); @@ -1844,9 +1823,10 @@ void FreeGPU(std::unique_ptr // Free resources. gpuState.reset(); - // Free G4HepEm data - FreeG4HepEmData(g4hepem_state.fData); - FreeG4HepEmParametersOnGPU(g4hepem_state.fParameters); + // Note: the GPU mirror of `G4HepEmParameters` is not released here. + // That cleanup happens when the transport-owned `AdePTG4HepEmState` dies, + // because it owns both the copied `G4HepEmParameters` object and the upload + // lifecycle attached to that copy. // Free magnetic field #ifdef ADEPT_USE_EXT_BFIELD diff --git a/include/AdePT/core/AsyncAdePTTransport.hh b/include/AdePT/core/AsyncAdePTTransport.hh index 51ecbc60..095cf1d3 100644 --- a/include/AdePT/core/AsyncAdePTTransport.hh +++ b/include/AdePT/core/AsyncAdePTTransport.hh @@ -10,10 +10,10 @@ #define ASYNC_ADEPT_TRANSPORT_HH #include +#include #include #include #include -#include #include #include // forward declares vecgeom::cxx::VPlacedVolume @@ -27,8 +27,6 @@ class G4Region; class G4VPhysicalVolume; -class G4HepEmConfig; -struct G4HepEmState; namespace AsyncAdePT { struct TrackBuffer; struct GPUstate; @@ -50,10 +48,11 @@ private: unsigned short fLastNParticlesOnCPU{0}; ///< Number N of last N particles that are finished on CPU unsigned short fMaxWDTIter{5}; ///< Maximum number of Woodcock tracking iterations per step std::unique_ptr fGPUstate{nullptr}; ///< CUDA state placeholder - std::unique_ptr fBuffer{nullptr}; ///< Buffers for transferring tracks between host and device - std::unique_ptr fg4hepem_state; ///< The HepEm state singleton - adeptint::WDTDeviceBuffers fWDTDev{}; ///< device buffers for Woodcock tracking data - std::thread fGPUWorker; ///< Thread to manage GPU + std::unique_ptr fBuffer{nullptr}; ///< Buffers for transferring tracks between host and device + std::unique_ptr + fAdePTG4HepEmState; ///< Transport-owned wrapper around `G4HepEmData` and copied `G4HepEmParameters` + adeptint::WDTDeviceBuffers fWDTDev{}; ///< device buffers for Woodcock tracking data + std::thread fGPUWorker; ///< Thread to manage GPU std::condition_variable fCV_G4Workers; ///< Communicate with G4 workers std::mutex fMutex_G4Workers; ///< Mutex associated to the condition variable std::vector> fEventStates; ///< State machine for each G4 worker @@ -74,14 +73,17 @@ private: ///< Needed to stall the GPU, in case the nPartInFlight * fHitBufferSafetyFactor > available HitSlots double fHitBufferSafetyFactor{1.5}; - void Initialize(G4HepEmConfig *hepEmConfig); + void Initialize(adeptint::VolAuxData *auxData, const adeptint::WDTHostPacked &wdtPacked, + const std::vector &uniformFieldValues); void InitBVH(); bool InitializeGeometry(const vecgeom::cxx::VPlacedVolume *world); - bool InitializePhysics(G4HepEmConfig *hepEmConfig); + bool InitializePhysics(); void InitWDTOnDevice(const adeptint::WDTHostPacked &src, adeptint::WDTDeviceBuffers &dev, unsigned short maxIter); public: - AsyncAdePTTransport(AdePTConfiguration &configuration, G4HepEmConfig *hepEmConfig); + AsyncAdePTTransport(AdePTConfiguration &configuration, std::unique_ptr adeptG4HepEmState, + adeptint::VolAuxData *auxData, const adeptint::WDTHostPacked &wdtPacked, + const std::vector &uniformFieldValues); AsyncAdePTTransport(const AsyncAdePTTransport &other) = delete; ~AsyncAdePTTransport(); @@ -93,9 +95,6 @@ public: bool GetCallUserActions() const { return fReturnFirstAndLastStep; } std::vector const *GetGPURegionNames() { return fGPURegionNames; } std::vector const *GetCPURegionNames() { return fCPURegionNames; } - G4HepEmState *GetHepEmState() const { return fg4hepem_state.get(); } - void CompleteInitialization(adeptint::VolAuxData *auxData, const adeptint::WDTHostPacked &wdtPacked, - const std::vector &uniformFieldValues); /// Block until transport of the given event is done. void Flush(int threadId, int eventId, AdePTGeant4Integration &g4Integration); void ProcessGPUSteps(int threadId, int eventId, AdePTGeant4Integration &g4Integration); diff --git a/include/AdePT/core/AsyncAdePTTransport.icc b/include/AdePT/core/AsyncAdePTTransport.icc index 2967c224..53961aa0 100644 --- a/include/AdePT/core/AsyncAdePTTransport.icc +++ b/include/AdePT/core/AsyncAdePTTransport.icc @@ -21,10 +21,7 @@ #include #include -#include -#include #include -#include #include #include @@ -34,7 +31,7 @@ namespace async_adept_impl { void setDeviceLimits(int stackLimit = 0, int heapLimit = 0); void CopySurfaceModelToGPU(); void InitWDTOnDevice(const adeptint::WDTHostPacked &, adeptint::WDTDeviceBuffers &, unsigned short); -G4HepEmState *InitG4HepEm(G4HepEmConfig *hepEmConfig); +void UploadG4HepEmToGPU(G4HepEmData *hepEmData, G4HepEmParameters *hepEmParameters); std::shared_ptr> GetGPUHits(unsigned int, AsyncAdePT::GPUstate &); std::pair GetGPUHitsFromBuffer(unsigned int, unsigned int, AsyncAdePT::GPUstate &, bool &); void CloseGPUBuffer(unsigned int, AsyncAdePT::GPUstate &, GPUHit *, const bool); @@ -45,7 +42,7 @@ std::unique_ptr InitializeGPU int trackCapacity, int leakCapacity, int scoringCapacity, int numThreads, AsyncAdePT::TrackBuffer &trackBuffer, double CPUCapacityFactor, double CPUCopyFraction, std::string &generalBfieldFile, const std::vector &uniformBfieldValues); -void FreeGPU(std::unique_ptr &, G4HepEmState &, std::thread &, +void FreeGPU(std::unique_ptr &, std::thread &, adeptint::WDTDeviceBuffers &); } // namespace async_adept_impl @@ -72,16 +69,20 @@ std::ostream &operator<<(std::ostream &stream, TrackDataWithIDs const &track) // These definitions live in a header-included .icc file, so they must remain // inline to avoid multiple definitions across translation units. -inline AsyncAdePTTransport::AsyncAdePTTransport(AdePTConfiguration &configuration, G4HepEmConfig *hepEmConfig) +inline AsyncAdePTTransport::AsyncAdePTTransport(AdePTConfiguration &configuration, + std::unique_ptr adeptG4HepEmState, + adeptint::VolAuxData *auxData, const adeptint::WDTHostPacked &wdtPacked, + const std::vector &uniformFieldValues) : fAdePTSeed{configuration.GetAdePTSeed()}, fNThread{(ushort)configuration.GetNumThreads()}, fTrackCapacity{(uint)(1024 * 1024 * configuration.GetMillionsOfTrackSlots())}, fLeakCapacity{(uint)(1024 * 1024 * configuration.GetMillionsOfLeakSlots())}, fScoringCapacity{(uint)(1024 * 1024 * configuration.GetMillionsOfHitSlots())}, fDebugLevel{configuration.GetVerbosity()}, fCUDAStackLimit{configuration.GetCUDAStackLimit()}, fCUDAHeapLimit{configuration.GetCUDAHeapLimit()}, fLastNParticlesOnCPU{configuration.GetLastNParticlesOnCPU()}, - fMaxWDTIter{configuration.GetMaxWDTIter()}, fEventStates(fNThread), fGPUNetEnergy(fNThread, 0.0), - fTrackInAllRegions{configuration.GetTrackInAllRegions()}, fGPURegionNames{configuration.GetGPURegionNames()}, - fCPURegionNames{configuration.GetCPURegionNames()}, fReturnAllSteps{configuration.GetCallUserSteppingAction()}, + fMaxWDTIter{configuration.GetMaxWDTIter()}, fAdePTG4HepEmState(std::move(adeptG4HepEmState)), + fEventStates(fNThread), fGPUNetEnergy(fNThread, 0.0), fTrackInAllRegions{configuration.GetTrackInAllRegions()}, + fGPURegionNames{configuration.GetGPURegionNames()}, fCPURegionNames{configuration.GetCPURegionNames()}, + fReturnAllSteps{configuration.GetCallUserSteppingAction()}, fReturnFirstAndLastStep{configuration.GetCallUserTrackingAction() || configuration.GetCallUserSteppingAction()}, fBfieldFile{configuration.GetCovfieBfieldFile()}, fCPUCapacityFactor{configuration.GetCPUCapacityFactor()}, fCPUCopyFraction{configuration.GetHitBufferFlushThreshold()}, @@ -94,12 +95,12 @@ inline AsyncAdePTTransport::AsyncAdePTTransport(AdePTConfiguration &configuratio std::atomic_init(&eventState, EventState::LeakedTracksRetrieved); } - AsyncAdePTTransport::Initialize(hepEmConfig); + AsyncAdePTTransport::Initialize(auxData, wdtPacked, uniformFieldValues); } inline AsyncAdePTTransport::~AsyncAdePTTransport() { - async_adept_impl::FreeGPU(std::ref(fGPUstate), *fg4hepem_state, fGPUWorker, fWDTDev); + async_adept_impl::FreeGPU(std::ref(fGPUstate), fGPUWorker, fWDTDev); } inline void AsyncAdePTTransport::AddTrack(int pdg, uint64_t trackId, uint64_t parentId, double energy, double x, @@ -175,14 +176,20 @@ inline bool AsyncAdePTTransport::InitializeGeometry(const vecgeom::cxx::VPlacedV return success; } -inline bool AsyncAdePTTransport::InitializePhysics(G4HepEmConfig *hepEmConfig) +inline bool AsyncAdePTTransport::InitializePhysics() { - // Initialize shared physics data - fg4hepem_state.reset(async_adept_impl::InitG4HepEm(hepEmConfig)); + if (!fAdePTG4HepEmState) { + throw std::runtime_error("AsyncAdePTTransport::InitializePhysics: Missing AdePT-owned G4HepEm state."); + } + + // Upload the transport-owned `G4HepEmData` and copied + // `G4HepEmParameters` to the device. + async_adept_impl::UploadG4HepEmToGPU(fAdePTG4HepEmState->GetData(), fAdePTG4HepEmState->GetParameters()); return true; } -inline void AsyncAdePTTransport::Initialize(G4HepEmConfig *hepEmConfig) +inline void AsyncAdePTTransport::Initialize(adeptint::VolAuxData *auxData, const adeptint::WDTHostPacked &wdtPacked, + const std::vector &uniformFieldValues) { if (vecgeom::GeoManager::Instance().GetRegisteredVolumesCount() == 0) throw std::runtime_error("AsyncAdePTTransport::Initialize: Number of geometry volumes is zero."); @@ -196,23 +203,12 @@ inline void AsyncAdePTTransport::Initialize(G4HepEmConfig *hepEmConfig) if (!InitializeGeometry(world)) throw std::runtime_error("AsyncAdePTTransport::Initialize: Cannot initialize geometry on GPU"); - // Initialize G4HepEm - if (!InitializePhysics(hepEmConfig)) + // Upload the prepared HepEm physics data to the device. + if (!InitializePhysics()) throw std::runtime_error("AsyncAdePTTransport::Initialize cannot initialize physics on GPU"); -} - -inline void AsyncAdePTTransport::CompleteInitialization(adeptint::VolAuxData *auxData, - const adeptint::WDTHostPacked &wdtPacked, - const std::vector &uniformFieldValues) -{ - // This is the second half of the split initialization. A non-zero volume count was already - // required in Initialize() before geometry upload, and it remains a hard precondition here - // before uploading any geometry-derived metadata to the device. - const auto numVolumes = vecgeom::GeoManager::Instance().GetRegisteredVolumesCount(); - if (numVolumes == 0) - throw std::runtime_error("AsyncAdePTTransport::CompleteInitialization: Number of geometry volumes is zero."); // Initialize volume auxiliary data on device + const auto numVolumes = vecgeom::GeoManager::Instance().GetRegisteredVolumesCount(); auto &volAuxArray = adeptint::VolAuxArray::GetInstance(); volAuxArray.fNumVolumes = numVolumes; volAuxArray.fAuxData = auxData; diff --git a/include/AdePT/integration/AdePTGeant4Integration.hh b/include/AdePT/integration/AdePTGeant4Integration.hh index 46660cec..0d3466eb 100644 --- a/include/AdePT/integration/AdePTGeant4Integration.hh +++ b/include/AdePT/integration/AdePTGeant4Integration.hh @@ -20,7 +20,7 @@ #include #include -struct G4HepEmState; +struct G4HepEmData; namespace AdePTGeant4Integration_detail { struct ScoringObjects; @@ -53,10 +53,10 @@ public: static void CreateVecGeomWorld(G4VPhysicalVolume const *physvol); /// @brief This function compares G4 and VecGeom geometries and reports any differences - static void CheckGeometry(G4HepEmState *hepEmState); + static void CheckGeometry(G4HepEmData const *hepEmData); /// @brief Fills the auxiliary data needed for AdePT - static void InitVolAuxData(adeptint::VolAuxData *volAuxData, G4HepEmState *hepEmState, + static void InitVolAuxData(adeptint::VolAuxData *volAuxData, G4HepEmData const *hepEmData, G4HepEmTrackingManagerSpecialized *hepEmTM, bool trackInAllRegions, std::vector const *gpuRegionNames, adeptint::WDTHostRaw &wdtRaw); diff --git a/include/AdePT/integration/AdePTTrackingManager.hh b/include/AdePT/integration/AdePTTrackingManager.hh index f20232ba..93fd49f1 100644 --- a/include/AdePT/integration/AdePTTrackingManager.hh +++ b/include/AdePT/integration/AdePTTrackingManager.hh @@ -62,6 +62,17 @@ private: const G4NavigationHistory *aG4NavigationHistory = nullptr); /// @brief Perform the one-time shared AdePT transport initialization on the first Geant4 worker. + /// @details + /// The first worker prepares all host-side inputs needed by transport: + /// - the uniform magnetic-field values + /// - the AdePT-owned `AdePTG4HepEmState` + /// - geometry consistency checks + /// - `VolAuxData` + /// - packed WDT metadata + /// + /// Once that host-side preparation is complete, the worker creates the + /// shared `AsyncAdePTTransport`. The transport constructor then performs the + /// corresponding one-time device initialization and upload. void InitializeSharedAdePTTransport(); std::unique_ptr fHepEmTrackingManager; diff --git a/src/AdePTG4HepEmState.cpp b/src/AdePTG4HepEmState.cpp new file mode 100644 index 00000000..87753306 --- /dev/null +++ b/src/AdePTG4HepEmState.cpp @@ -0,0 +1,100 @@ +// SPDX-FileCopyrightText: 2026 CERN +// SPDX-License-Identifier: Apache-2.0 + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace AsyncAdePT { + +/// @brief Release the tables owned by `G4HepEmData` and then delete the outer object. +/// @details +/// This is the cleanup path for the fully owned `fData` member. It is separate +/// from the class destructor because `std::unique_ptr` needs a deleter for the +/// deep cleanup before the outer `G4HepEmData` allocation itself can be deleted. +void AdePTG4HepEmState::DataDeleter::operator()(G4HepEmData *data) const +{ + if (data == nullptr) return; + FreeG4HepEmData(data); + delete data; +} + +/// @brief Release the copied HepEm parameters and then delete the outer object. +/// @details +/// The copied `G4HepEmParameters` block is fully owned by +/// `AdePTG4HepEmState`. This deep +/// cleanup therefore releases both the host-side per-region array and the +/// device-side mirror created during transport upload before deleting the outer +/// `G4HepEmParameters` allocation itself. +void AdePTG4HepEmState::ParametersDeleter::operator()(G4HepEmParameters *parameters) const +{ + if (parameters == nullptr) return; + FreeG4HepEmParameters(parameters); + delete parameters; +} + +/// @brief Rebuild a complete AdePT-owned set of host-side G4HepEm inputs from the supplied config. +/// @details +/// `AdePTG4HepEmState` owns two different G4HepEm objects: +/// - a deep copy of the `G4HepEmParameters` stored in the supplied `G4HepEmConfig` +/// - a freshly rebuilt `G4HepEmData` derived from that copied parameter block +/// +/// We must copy `G4HepEmParameters` because the original object remains owned by +/// the worker-local `G4HepEmConfig`, while the shared AdePT transport can outlive +/// the worker that first created it. `G4HepEmData` is rebuilt here directly, so +/// it is already fully owned by AdePT and does not need a second copy step. +AdePTG4HepEmState::AdePTG4HepEmState(G4HepEmConfig *hepEmConfig) + : fData(new G4HepEmData), fParameters(new G4HepEmParameters) +{ + if (hepEmConfig == nullptr) { + throw std::runtime_error("AdePTG4HepEmState requires a non-null G4HepEmConfig."); + } + + G4HepEmParameters *sourceParameters = hepEmConfig->GetG4HepEmParameters(); + if (sourceParameters == nullptr) { + throw std::runtime_error("AdePTG4HepEmState requires initialized G4HepEmParameters in the supplied config."); + } + + // Deep-copy the G4HepEmParameters so the shared transport does not keep a + // pointer into a worker-owned G4HepEmConfig. + *fParameters = *sourceParameters; + fParameters->fParametersPerRegion = nullptr; +#ifdef G4HepEm_CUDA_BUILD + fParameters->fParametersPerRegion_gpu = nullptr; +#endif + if (sourceParameters->fNumRegions > 0) { + if (sourceParameters->fParametersPerRegion == nullptr) { + throw std::runtime_error("AdePTG4HepEmState requires initialized per-region G4HepEmParameters."); + } + fParameters->fParametersPerRegion = new G4HepEmRegionParmeters[sourceParameters->fNumRegions]; + std::copy_n(sourceParameters->fParametersPerRegion, sourceParameters->fNumRegions, + fParameters->fParametersPerRegion); + } + + // Rebuild the G4HepEmData tables from the copied G4HepEmParameters so the + // transport owns a complete, self-contained set of host-side inputs. + InitG4HepEmData(fData.get()); + InitMaterialAndCoupleData(fData.get(), fParameters.get()); + + // Build all EM species + InitElectronData(fData.get(), fParameters.get(), true); + InitElectronData(fData.get(), fParameters.get(), false); + InitGammaData(fData.get(), fParameters.get()); + + G4HepEmMatCutData *cutData = fData->fTheMatCutData; + G4cout << "fNumG4MatCuts = " << cutData->fNumG4MatCuts << ", fNumMatCutData = " << cutData->fNumMatCutData << G4endl; +} + +AdePTG4HepEmState::~AdePTG4HepEmState() = default; + +} // namespace AsyncAdePT diff --git a/src/AdePTGeant4Integration.cpp b/src/AdePTGeant4Integration.cpp index 8b13fca9..0d15e8e3 100644 --- a/src/AdePTGeant4Integration.cpp +++ b/src/AdePTGeant4Integration.cpp @@ -23,7 +23,6 @@ #include #include -#include #include #include #include @@ -257,7 +256,7 @@ namespace { struct VisitContext { const int *g4tohepmcindex; std::size_t nvolumes; - G4HepEmState const *hepEmState; + G4HepEmData const *hepEmData; }; /// Recursive geometry visitor matching one by one Geant4 and VecGeom logical volumes @@ -329,7 +328,7 @@ void visitGeometry(G4VPhysicalVolume const *g4_pvol, vecgeom::VPlacedVolume cons const int g4mcindex = g4_lvol->GetMaterialCutsCouple()->GetIndex(); const int hepemmcindex = context.g4tohepmcindex[g4mcindex]; // Check consistency with G4HepEm data - if (context.hepEmState->fData->fTheMatCutData->fMatCutData[hepemmcindex].fG4MatCutIndex != g4mcindex) + if (context.hepEmData->fTheMatCutData->fMatCutData[hepemmcindex].fG4MatCutIndex != g4mcindex) throw std::runtime_error("Fatal: CheckGeometry: Mismatch between Geant4 mcindex and corresponding G4HepEm index"); if (vg_lvol->id() >= context.nvolumes) throw std::runtime_error("Fatal: CheckGeometry: Volume id larger than number of volumes"); @@ -344,21 +343,21 @@ void visitGeometry(G4VPhysicalVolume const *g4_pvol, vecgeom::VPlacedVolume cons } } // namespace -void AdePTGeant4Integration::CheckGeometry(G4HepEmState *hepEmState) +void AdePTGeant4Integration::CheckGeometry(G4HepEmData const *hepEmData) { const G4VPhysicalVolume *g4world = G4TransportationManager::GetTransportationManager()->GetNavigatorForTracking()->GetWorldVolume(); const vecgeom::VPlacedVolume *vecgeomWorld = vecgeom::GeoManager::Instance().GetWorld(); - const int *g4tohepmcindex = hepEmState->fData->fTheMatCutData->fG4MCIndexToHepEmMCIndex; + const int *g4tohepmcindex = hepEmData->fTheMatCutData->fG4MCIndexToHepEmMCIndex; const auto nvolumes = vecgeom::GeoManager::Instance().GetRegisteredVolumesCount(); std::cout << "Visiting geometry ...\n"; - const VisitContext context{g4tohepmcindex, nvolumes, hepEmState}; + const VisitContext context{g4tohepmcindex, nvolumes, hepEmData}; visitGeometry(g4world, vecgeomWorld, context); std::cout << "Visiting geometry done\n"; } -void AdePTGeant4Integration::InitVolAuxData(adeptint::VolAuxData *volAuxData, G4HepEmState *hepEmState, +void AdePTGeant4Integration::InitVolAuxData(adeptint::VolAuxData *volAuxData, G4HepEmData const *hepEmData, G4HepEmTrackingManagerSpecialized *hepEmTM, bool trackInAllRegions, std::vector const *gpuRegionNames, adeptint::WDTHostRaw &wdtRaw) @@ -371,7 +370,7 @@ void AdePTGeant4Integration::InitVolAuxData(adeptint::VolAuxData *volAuxData, G4 const G4VPhysicalVolume *g4world = G4TransportationManager::GetTransportationManager()->GetNavigatorForTracking()->GetWorldVolume(); const vecgeom::VPlacedVolume *vecgeomWorld = vecgeom::GeoManager::Instance().GetWorld(); - const int *g4tohepmcindex = hepEmState->fData->fTheMatCutData->fG4MCIndexToHepEmMCIndex; + const int *g4tohepmcindex = hepEmData->fTheMatCutData->fG4MCIndexToHepEmMCIndex; // We need to go from region names to G4Region std::vector gpuRegions{}; diff --git a/src/AdePTTrackingManager.cc b/src/AdePTTrackingManager.cc index b0a17c1e..888d2226 100644 --- a/src/AdePTTrackingManager.cc +++ b/src/AdePTTrackingManager.cc @@ -36,8 +36,10 @@ std::weak_ptr &SharedAdePTTransportStorage() return transport; } -std::shared_ptr CreateSharedAdePTTransport(AdePTConfiguration &conf, - G4HepEmTrackingManagerSpecialized *hepEmTM) +std::shared_ptr GetSharedAdePTTransport( + AdePTConfiguration &conf, std::unique_ptr adeptG4HepEmState, + adeptint::VolAuxData *auxData, const adeptint::WDTHostPacked &wdtPacked, + const std::vector &uniformFieldValues) { auto &transport = SharedAdePTTransportStorage(); // weak_ptr::lock() promotes the stored weak reference to a shared_ptr if the @@ -46,8 +48,12 @@ std::shared_ptr CreateSharedAdePTTransport(AdePTConfiguration &c return existing; } - auto created = std::make_shared(conf, hepEmTM->GetConfig()); - transport = created; + // Create the shared AdePT transport engine on the first worker thread. At + // this point all required host-side inputs have already been prepared, so the + // transport constructor can perform the one-time device initialization. + auto created = + std::make_shared(conf, std::move(adeptG4HepEmState), auxData, wdtPacked, uniformFieldValues); + transport = created; return created; } @@ -90,24 +96,28 @@ void AdePTTrackingManager::InitializeSharedAdePTTransport() std::cout << "Reading in covfie file for magnetic field: " << fAdePTConfiguration->GetCovfieBfieldFile() << std::endl; if (fAdePTConfiguration->GetCovfieBfieldFile() == "") std::cout << "No magnetic field file provided!" << std::endl; #endif + // Prepare the complete host-side input package before constructing the + // shared transport. The transport constructor then performs the one-time + // device-side initialization from these prepared inputs. const auto uniformFieldValues = fGeant4Integration.GetUniformField(); - - // Create the shared AdePT transport engine on the first worker thread. - fAdeptTransport = CreateSharedAdePTTransport(*fAdePTConfiguration, fHepEmTrackingManager.get()); + auto adeptG4HepEmState = std::make_unique(fHepEmTrackingManager->GetConfig()); // Check VecGeom geometry matches Geant4 before deriving any geometry metadata for transport. - fGeant4Integration.CheckGeometry(fAdeptTransport->GetHepEmState()); + fGeant4Integration.CheckGeometry(adeptG4HepEmState->GetData()); // Initialize auxiliary per-LV data and collect the raw WDT metadata on the Geant4 side. auto *auxData = new adeptint::VolAuxData[vecgeom::GeoManager::Instance().GetRegisteredVolumesCount()]; adeptint::WDTHostRaw wdtRaw; - fGeant4Integration.InitVolAuxData(auxData, fAdeptTransport->GetHepEmState(), fHepEmTrackingManager.get(), + fGeant4Integration.InitVolAuxData(auxData, adeptG4HepEmState->GetData(), fHepEmTrackingManager.get(), fAdePTConfiguration->GetTrackInAllRegions(), fAdePTConfiguration->GetGPURegionNames(), wdtRaw); adeptint::WDTHostPacked wdtPacked = adeptint::PackWDT(wdtRaw); - // Finish the shared transport initialization by uploading the prepared metadata to the device. - fAdeptTransport->CompleteInitialization(auxData, wdtPacked, uniformFieldValues); + // Move the fully prepared host-side package into the shared transport. The + // first worker creates the transport here; later workers only retrieve the + // already-created shared instance. + fAdeptTransport = GetSharedAdePTTransport(*fAdePTConfiguration, std::move(adeptG4HepEmState), auxData, wdtPacked, + uniformFieldValues); } void AdePTTrackingManager::InitializeAdePT() @@ -127,7 +137,9 @@ void AdePTTrackingManager::InitializeAdePT() static std::condition_variable initCV; static bool commonInitDone = false; - // Global initialization: only done once by the first worker thread + // Global initialization: only done once by the first worker thread. This + // first worker closes the geometry, prepares the shared host-side inputs, + // and creates the shared transport, which performs the one-time device init. std::call_once(onceFlag, [&]() { // get number of threads from config, if available if (fNumThreads <= 0) { @@ -181,8 +193,8 @@ void AdePTTrackingManager::InitializeAdePT() // Now the fNumThreads is known and all workers can initialize fAdePTConfiguration->SetNumThreads(fNumThreads); - // The shared AdePT transport was already created and initialized by the first worker. - // The remaining workers only retrieve the shared pointer here. + // The shared AdePT transport was already created and device-initialized by + // the first worker. The remaining workers only retrieve the shared pointer. fAdeptTransport = GetSharedAdePTTransport(); // Initialize the GPU region list