Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 5 additions & 10 deletions include/AdePT/core/AsyncAdePTTransport.hh
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ private:
int fCUDAHeapLimit{0}; ///< CUDA device heap limit
unsigned short fLastNParticlesOnCPU{0}; ///< Number N of last N particles that are finished on CPU
unsigned short fMaxWDTIter{5}; ///< Maximum number of Woodcock tracking iterations per step
std::vector<AdePTGeant4Integration> fG4IntegrationObjects; //< Geant4 integration state owned per worker thread
std::unique_ptr<GPUstate, GPUstateDeleter> fGPUstate{nullptr}; ///< CUDA state placeholder
std::unique_ptr<TrackBuffer> fBuffer{nullptr}; ///< Buffers for transferring tracks between host and device
std::unique_ptr<G4HepEmState> fg4hepem_state; ///< The HepEm state singleton
Expand All @@ -74,14 +73,15 @@ private:
///< Needed to stall the GPU, in case the nPartInFlight * fHitBufferSafetyFactor > available HitSlots
double fHitBufferSafetyFactor{1.5};

void Initialize(G4HepEmTrackingManagerSpecialized *hepEmTM);
void Initialize(G4HepEmTrackingManagerSpecialized *hepEmTM, AdePTGeant4Integration &g4Integration);
void InitBVH();
bool InitializeGeometry(const vecgeom::cxx::VPlacedVolume *world);
bool InitializePhysics(G4HepEmConfig *hepEmConfig);
void InitWDTOnDevice(const adeptint::WDTHostPacked &src, adeptint::WDTDeviceBuffers &dev, unsigned short maxIter);

public:
AsyncAdePTTransport(AdePTConfiguration &configuration, G4HepEmTrackingManagerSpecialized *hepEmTM);
AsyncAdePTTransport(AdePTConfiguration &configuration, G4HepEmTrackingManagerSpecialized *hepEmTM,
AdePTGeant4Integration &g4Integration);
AsyncAdePTTransport(const AsyncAdePTTransport &other) = delete;
~AsyncAdePTTransport();

Expand All @@ -94,13 +94,8 @@ public:
std::vector<std::string> const *GetGPURegionNames() { return fGPURegionNames; }
std::vector<std::string> const *GetCPURegionNames() { return fCPURegionNames; }
/// Block until transport of the given event is done.
void Flush(int threadId, int eventId);
void ProcessGPUSteps(int threadId, int eventId);
/// @brief Setup function used only in async AdePT
/// @param threadId thread Id
/// @param hepEmTM specialized G4HepEmTrackingManager
void SetHepEmTrackingManagerForThread(int threadId, G4HepEmTrackingManagerSpecialized *hepEmTM);
AdePTGeant4Integration &GetGeant4Integration(int threadId) { return fG4IntegrationObjects[threadId]; }
void Flush(int threadId, int eventId, AdePTGeant4Integration &g4Integration);
void ProcessGPUSteps(int threadId, int eventId, AdePTGeant4Integration &g4Integration);
};

} // namespace AsyncAdePT
Expand Down
42 changes: 16 additions & 26 deletions include/AdePT/core/AsyncAdePTTransport.icc
Original file line number Diff line number Diff line change
Expand Up @@ -73,17 +73,17 @@ std::ostream &operator<<(std::ostream &stream, TrackDataWithIDs const &track)
// These definitions live in a header-included .icc file, so they must remain
// inline to avoid multiple definitions across translation units.
inline AsyncAdePTTransport::AsyncAdePTTransport(AdePTConfiguration &configuration,
G4HepEmTrackingManagerSpecialized *hepEmTM)
G4HepEmTrackingManagerSpecialized *hepEmTM,
AdePTGeant4Integration &g4Integration)
: fAdePTSeed{configuration.GetAdePTSeed()}, fNThread{(ushort)configuration.GetNumThreads()},
fTrackCapacity{(uint)(1024 * 1024 * configuration.GetMillionsOfTrackSlots())},
fLeakCapacity{(uint)(1024 * 1024 * configuration.GetMillionsOfLeakSlots())},
fScoringCapacity{(uint)(1024 * 1024 * configuration.GetMillionsOfHitSlots())},
fDebugLevel{configuration.GetVerbosity()}, fCUDAStackLimit{configuration.GetCUDAStackLimit()},
fCUDAHeapLimit{configuration.GetCUDAHeapLimit()}, fLastNParticlesOnCPU{configuration.GetLastNParticlesOnCPU()},
fMaxWDTIter{configuration.GetMaxWDTIter()}, fG4IntegrationObjects(fNThread), fEventStates(fNThread),
fGPUNetEnergy(fNThread, 0.0), fTrackInAllRegions{configuration.GetTrackInAllRegions()},
fGPURegionNames{configuration.GetGPURegionNames()}, fCPURegionNames{configuration.GetCPURegionNames()},
fReturnAllSteps{configuration.GetCallUserSteppingAction()},
fMaxWDTIter{configuration.GetMaxWDTIter()}, fEventStates(fNThread), fGPUNetEnergy(fNThread, 0.0),
fTrackInAllRegions{configuration.GetTrackInAllRegions()}, fGPURegionNames{configuration.GetGPURegionNames()},
fCPURegionNames{configuration.GetCPURegionNames()}, fReturnAllSteps{configuration.GetCallUserSteppingAction()},
fReturnFirstAndLastStep{configuration.GetCallUserTrackingAction() || configuration.GetCallUserSteppingAction()},
fBfieldFile{configuration.GetCovfieBfieldFile()}, fCPUCapacityFactor{configuration.GetCPUCapacityFactor()},
fCPUCopyFraction{configuration.GetHitBufferFlushThreshold()},
Expand All @@ -96,20 +96,14 @@ inline AsyncAdePTTransport::AsyncAdePTTransport(AdePTConfiguration &configuratio
std::atomic_init(&eventState, EventState::LeakedTracksRetrieved);
}

AsyncAdePTTransport::Initialize(hepEmTM);
AsyncAdePTTransport::Initialize(hepEmTM, g4Integration);
}

inline AsyncAdePTTransport::~AsyncAdePTTransport()
{
async_adept_impl::FreeGPU(std::ref(fGPUstate), *fg4hepem_state, fGPUWorker, fWDTDev);
}

inline void AsyncAdePTTransport::SetHepEmTrackingManagerForThread(int threadId,
G4HepEmTrackingManagerSpecialized *hepEmTM)
{
fG4IntegrationObjects[threadId].SetHepEmTrackingManager(hepEmTM);
}

inline void AsyncAdePTTransport::AddTrack(int pdg, uint64_t trackId, uint64_t parentId, double energy, double x,
double y, double z, double dirx, double diry, double dirz, double globalTime,
double localTime, double properTime, float weight, unsigned short stepCounter,
Expand Down Expand Up @@ -190,7 +184,8 @@ inline bool AsyncAdePTTransport::InitializePhysics(G4HepEmConfig *hepEmConfig)
return true;
}

inline void AsyncAdePTTransport::Initialize(G4HepEmTrackingManagerSpecialized *hepEmTM)
inline void AsyncAdePTTransport::Initialize(G4HepEmTrackingManagerSpecialized *hepEmTM,
AdePTGeant4Integration &g4Integration)
{
const auto numVolumes = vecgeom::GeoManager::Instance().GetRegisteredVolumesCount();
if (numVolumes == 0) throw std::runtime_error("AsyncAdePTTransport::Initialize: Number of geometry volumes is zero.");
Expand All @@ -209,11 +204,10 @@ inline void AsyncAdePTTransport::Initialize(G4HepEmTrackingManagerSpecialized *h
throw std::runtime_error("AsyncAdePTTransport::Initialize cannot initialize physics on GPU");

// Check VecGeom geometry matches Geant4. Initialize auxiliary per-LV data. Initialize scoring map.
fG4IntegrationObjects.front().CheckGeometry(fg4hepem_state.get());
g4Integration.CheckGeometry(fg4hepem_state.get());
adeptint::VolAuxData *auxData = new adeptint::VolAuxData[vecgeom::GeoManager::Instance().GetRegisteredVolumesCount()];
adeptint::WDTHostRaw wdtRaw;
fG4IntegrationObjects.front().InitVolAuxData(auxData, fg4hepem_state.get(), hepEmTM, fTrackInAllRegions,
fGPURegionNames, wdtRaw);
g4Integration.InitVolAuxData(auxData, fg4hepem_state.get(), hepEmTM, fTrackInAllRegions, fGPURegionNames, wdtRaw);

// Initialize volume auxiliary data on device
auto &volAuxArray = adeptint::VolAuxArray::GetInstance();
Expand Down Expand Up @@ -242,7 +236,7 @@ inline void AsyncAdePTTransport::Initialize(G4HepEmTrackingManagerSpecialized *h

fGPUstate = async_adept_impl::InitializeGPU(fTrackCapacity, fLeakCapacity, fScoringCapacity, fNThread, *fBuffer,
fCPUCapacityFactor, fCPUCopyFraction, fBfieldFile,
fG4IntegrationObjects.front().GetUniformField());
g4Integration.GetUniformField());
fGPUWorker = async_adept_impl::LaunchGPUWorker(fTrackCapacity, fLeakCapacity, fScoringCapacity, fNThread, *fBuffer,
*fGPUstate, fEventStates, fCV_G4Workers, fAdePTSeed, fDebugLevel,
fReturnAllSteps, fReturnFirstAndLastStep, fLastNParticlesOnCPU,
Expand All @@ -255,10 +249,8 @@ inline void AsyncAdePTTransport::InitBVH()
vecgeom::cxx::BVHManager::DeviceInit();
}

inline void AsyncAdePTTransport::ProcessGPUSteps(int threadId, int eventId)
inline void AsyncAdePTTransport::ProcessGPUSteps(int threadId, int eventId, AdePTGeant4Integration &g4Integration)
{

AdePTGeant4Integration &integrationInstance = fG4IntegrationObjects[threadId];
std::pair<GPUHit *, GPUHit *> range;
bool dataOnBuffer;

Expand All @@ -284,14 +276,14 @@ inline void AsyncAdePTTransport::ProcessGPUSteps(int threadId, int eventId)
}
auto blockSize = 1 + it->fNumSecondaries;
std::span<const GPUHit> gpuStepWithSecondaries(it, blockSize);
integrationInstance.ProcessGPUStep(gpuStepWithSecondaries, fReturnAllSteps, fReturnFirstAndLastStep);
g4Integration.ProcessGPUStep(gpuStepWithSecondaries, fReturnAllSteps, fReturnFirstAndLastStep);
it += 1 + it->fNumSecondaries;
}
async_adept_impl::CloseGPUBuffer(threadId, *fGPUstate, range.first, dataOnBuffer);
}
}

inline void AsyncAdePTTransport::Flush(G4int threadId, G4int eventId)
inline void AsyncAdePTTransport::Flush(G4int threadId, G4int eventId, AdePTGeant4Integration &g4Integration)
{
if (fDebugLevel >= 3) {
G4cout << "\nFlushing AdePT for event " << eventId << G4endl;
Expand All @@ -300,16 +292,14 @@ inline void AsyncAdePTTransport::Flush(G4int threadId, G4int eventId)
assert(static_cast<unsigned int>(threadId) < fBuffer->fromDeviceBuffers.size());
fEventStates[threadId].store(EventState::G4RequestsFlush, std::memory_order_release);

AdePTGeant4Integration &integrationInstance = fG4IntegrationObjects[threadId];

while (fEventStates[threadId].load(std::memory_order_acquire) < EventState::DeviceFlushed) {

{
std::unique_lock lock{fMutex_G4Workers};
fCV_G4Workers.wait(lock);
}

ProcessGPUSteps(threadId, eventId);
ProcessGPUSteps(threadId, eventId, g4Integration);
}

// Now device should be flushed, so retrieve the tracks:
Expand Down Expand Up @@ -360,7 +350,7 @@ inline void AsyncAdePTTransport::Flush(G4int threadId, G4int eventId)
G4cout << str.str() << G4endl;
}

integrationInstance.ReturnTracks(tracks.begin(), tracks.end(), fDebugLevel, fReturnFirstAndLastStep);
g4Integration.ReturnTracks(tracks.begin(), tracks.end(), fDebugLevel, fReturnFirstAndLastStep);
}

} // namespace AsyncAdePT
1 change: 1 addition & 0 deletions include/AdePT/integration/AdePTTrackingManager.hh
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ private:
const G4NavigationHistory *aG4NavigationHistory = nullptr);

std::unique_ptr<G4HepEmTrackingManagerSpecialized> fHepEmTrackingManager;
AdePTGeant4Integration fGeant4Integration;
static inline int fNumThreads{0};
std::set<G4Region const *> fGPURegions{};
std::shared_ptr<AdePTTransport> fAdeptTransport;
Expand Down
19 changes: 9 additions & 10 deletions src/AdePTTrackingManager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,10 @@ namespace {
using AdePTTransport = AdePTTrackingManager::AdePTTransport;
}

std::shared_ptr<AdePTTransport> InstantiateAdePT(AdePTConfiguration &conf, G4HepEmTrackingManagerSpecialized *hepEmTM)
std::shared_ptr<AdePTTransport> InstantiateAdePT(AdePTConfiguration &conf, G4HepEmTrackingManagerSpecialized *hepEmTM,
AdePTGeant4Integration &g4Integration)
{
static std::shared_ptr<AdePTTransport> AdePT{new AdePTTransport(conf, hepEmTM)};
static std::shared_ptr<AdePTTransport> AdePT{new AdePTTransport(conf, hepEmTM, g4Integration)};
return AdePT;
}

Expand All @@ -38,6 +39,7 @@ AdePTTrackingManager::AdePTTrackingManager(AdePTConfiguration *config, int verbo
: fHepEmTrackingManager(std::make_unique<G4HepEmTrackingManagerSpecialized>()), fAdePTConfiguration(config),
fVerbosity(verbosity)
{
fGeant4Integration.SetHepEmTrackingManager(fHepEmTrackingManager.get());
}

//....oooOO0OOooo........oooOO0OOooo........oooOO0OOooo........oooOO0OOooo......
Expand Down Expand Up @@ -111,7 +113,7 @@ void AdePTTrackingManager::InitializeAdePT()

// Create an instance of an AdePT transport engine. This can either be one engine per thread or a shared engine for
// all threads.
fAdeptTransport = InstantiateAdePT(*fAdePTConfiguration, fHepEmTrackingManager.get());
fAdeptTransport = InstantiateAdePT(*fAdePTConfiguration, fHepEmTrackingManager.get(), fGeant4Integration);

// common init done, can notify other workers to proceed their initialization
{
Expand All @@ -132,10 +134,7 @@ void AdePTTrackingManager::InitializeAdePT()
fAdePTConfiguration->SetNumThreads(fNumThreads);

// AdePTTransport was already initialized by the first G4 worker. The other workers get its pointer here
fAdeptTransport = InstantiateAdePT(*fAdePTConfiguration, fHepEmTrackingManager.get());
// All workers store the pointer to their HepEmTrackingManager in fAdePTTransport. This is required for nuclear
// processes
fAdeptTransport->SetHepEmTrackingManagerForThread(tid, fHepEmTrackingManager.get());
fAdeptTransport = InstantiateAdePT(*fAdePTConfiguration, fHepEmTrackingManager.get(), fGeant4Integration);

// Initialize the GPU region list
if (!fAdePTConfiguration->GetTrackInAllRegions()) {
Expand Down Expand Up @@ -266,7 +265,7 @@ void AdePTTrackingManager::FlushEvent()
G4cout << "No more particles on the stack, triggering shower to flush the AdePT buffer." << G4endl;

fAdeptTransport->Flush(G4Threading::G4GetThreadId(),
G4EventManager::GetEventManager()->GetConstCurrentEvent()->GetEventID());
G4EventManager::GetEventManager()->GetConstCurrentEvent()->GetEventID(), fGeant4Integration);
}

void AdePTTrackingManager::ProcessTrack(G4Track *aTrack)
Expand All @@ -282,8 +281,8 @@ void AdePTTrackingManager::ProcessTrack(G4Track *aTrack)

// Check for GPU steps, to alleviate pressure on the GPU step buffer
G4int threadId = G4Threading::G4GetThreadId();
fAdeptTransport->ProcessGPUSteps(threadId, eventID);
auto &trackMapper = fAdeptTransport->GetGeant4Integration(threadId).GetHostTrackDataMapper();
fAdeptTransport->ProcessGPUSteps(threadId, eventID, fGeant4Integration);
auto &trackMapper = fGeant4Integration.GetHostTrackDataMapper();

if (fCurrentEventID != eventID) trackMapper.beginEvent(eventID);

Expand Down
Loading