8#include "quest/include/environment.h"
9#include "quest/include/precision.h"
10#include "quest/include/modes.h"
12#include "quest/src/core/errors.hpp"
13#include "quest/src/core/memory.hpp"
14#include "quest/src/core/printer.hpp"
15#include "quest/src/core/autodeployer.hpp"
16#include "quest/src/core/validation.hpp"
17#include "quest/src/core/randomiser.hpp"
18#include "quest/src/comm/comm_config.hpp"
19#include "quest/src/cpu/cpu_config.hpp"
20#include "quest/src/gpu/gpu_config.hpp"
49static QuESTEnv* globalEnvPtr =
nullptr;
63static bool hasEnvBeenFinalized =
false;
72void validateAndInitCustomQuESTEnv(
int useDistrib,
int useGpuAccel,
int useMultithread,
const char* caller) {
76 validate_envNeverInit(globalEnvPtr !=
nullptr, hasEnvBeenFinalized, caller);
82 validate_newEnvDeploymentMode(useDistrib, useGpuAccel, useMultithread, caller);
85 autodep_chooseQuESTEnvDeployment(useDistrib, useGpuAccel, useMultithread);
94 validate_newEnvDistributedBetweenPower2Nodes(caller);
103 gpu_bindLocalGPUsToNodes();
111 validate_newEnvNodesEachHaveUniqueGpu(caller);
118 if (useGpuAccel && gpu_isCuQuantumCompiled()) {
119 validate_gpuIsCuQuantumCompatible(caller);
124 rand_setSeedsToDefault();
130 if (globalEnvPtr ==
nullptr)
131 error_allocOfQuESTEnvFailed();
134 globalEnvPtr->isMultithreaded = useMultithread;
135 globalEnvPtr->isGpuAccelerated = useGpuAccel;
136 globalEnvPtr->isDistributed = useDistrib;
139 globalEnvPtr->rank = (useDistrib)? comm_getRank() : 0;
140 globalEnvPtr->numNodes = (useDistrib)? comm_getNumNodes() : 1;
150void printPrecisionInfo() {
159 {
"qreal", printer_getQrealType() +
" (" + printer_getMemoryWithUnitStr(
sizeof(qreal)) +
")"},
163 {
"qcomp", printer_getQcompType() +
" (" + printer_getMemoryWithUnitStr(
sizeof(qcomp)) +
")"},
165 {
"qindex", printer_getQindexType() +
" (" + printer_getMemoryWithUnitStr(
sizeof(qindex)) +
")"},
168 {
"validationEpsilon", printer_toStr(validateconfig_getEpsilon())},
173void printCompilationInfo() {
177 {
"isMpiCompiled", comm_isMpiCompiled()},
178 {
"isGpuCompiled", gpu_isGpuCompiled()},
179 {
"isOmpCompiled", cpu_isOpenmpCompiled()},
180 {
"isCuQuantumCompiled", gpu_isCuQuantumCompiled()},
185void printDeploymentInfo() {
189 {
"isMpiEnabled", globalEnvPtr->isDistributed},
190 {
"isGpuEnabled", globalEnvPtr->isGpuAccelerated},
191 {
"isOmpEnabled", globalEnvPtr->isMultithreaded},
198 using namespace printer_substrings;
203 ram = printer_getMemoryWithUnitStr(mem_tryGetLocalRamCapacityInBytes()) + pm;
204 }
catch(mem::COULD_NOT_QUERY_RAM e){};
211 {
"numCpuCores", printer_toStr(std::thread::hardware_concurrency()) + pm},
212 {
"numOmpProcs", (cpu_isOpenmpCompiled())? printer_toStr(cpu_getNumOpenmpProcessors()) + pm : na},
213 {
"numOmpThrds", (cpu_isOpenmpCompiled())? printer_toStr(cpu_getCurrentNumThreads()) + pn : na},
215 {
"cpuMemoryFree", un},
222 using namespace printer_substrings;
229 bool isComp = gpu_isGpuCompiled();
230 bool isGpu = isComp && gpu_isGpuAvailable();
234 {
"numGpus", isComp? printer_toStr(gpu_getNumberOfLocalGpus()) : na},
235 {
"gpuDirect", isGpu? printer_toStr(gpu_isDirectGpuCommPossible()) : na},
236 {
"gpuMemPools", isGpu? printer_toStr(gpu_doesGpuSupportMemPools()) : na},
237 {
"gpuMemory", isGpu? printer_getMemoryWithUnitStr(gpu_getTotalMemoryInBytes()) + pg : na},
238 {
"gpuMemoryFree", isGpu? printer_getMemoryWithUnitStr(gpu_getCurrentAvailableMemoryInBytes()) + pg : na},
239 {
"gpuCache", isGpu? printer_getMemoryWithUnitStr(gpu_getCacheMemoryInBytes()) + pg : na},
244void printDistributionInfo() {
246 using namespace printer_substrings;
250 {
"isMpiGpuAware", (comm_isMpiCompiled())? printer_toStr(comm_isMpiGpuAware()) : na},
251 {
"numMpiNodes", printer_toStr(globalEnvPtr->numNodes)},
256void printQuregSizeLimits(
bool isDensMatr) {
258 using namespace printer_substrings;
261 int numNodes = globalEnvPtr->numNodes;
264 string maxQbForCpu = un;
265 string maxQbForMpiCpu = un;
269 qindex cpuMem = mem_tryGetLocalRamCapacityInBytes();
270 maxQbForCpu = printer_toStr(mem_getMaxNumQuregQubitsWhichCanFitInMemory(isDensMatr, 1, cpuMem));
273 if (globalEnvPtr->isDistributed)
274 maxQbForMpiCpu = printer_toStr(mem_getMaxNumQuregQubitsWhichCanFitInMemory(isDensMatr, numNodes, cpuMem));
281 }
catch(mem::COULD_NOT_QUERY_RAM e) {};
284 string maxQbForGpu = na;
285 string maxQbForMpiGpu = na;
288 if (globalEnvPtr->isGpuAccelerated) {
289 qindex gpuMem = gpu_getCurrentAvailableMemoryInBytes();
290 maxQbForGpu = printer_toStr(mem_getMaxNumQuregQubitsWhichCanFitInMemory(isDensMatr, 1, gpuMem));
293 if (globalEnvPtr->isDistributed)
294 maxQbForMpiGpu = printer_toStr(mem_getMaxNumQuregQubitsWhichCanFitInMemory(isDensMatr, numNodes, gpuMem));
298 string prefix = (isDensMatr)?
"density matrix" :
"statevector";
299 string title = prefix +
" limits";
303 {
"minQubitsForMpi", (numNodes>1)? printer_toStr(mem_getMinNumQubitsForDistribution(numNodes)) : na},
304 {
"maxQubitsForCpu", maxQbForCpu},
305 {
"maxQubitsForGpu", maxQbForGpu},
306 {
"maxQubitsForMpiCpu", maxQbForMpiCpu},
307 {
"maxQubitsForMpiGpu", maxQbForMpiGpu},
308 {
"maxQubitsForMemOverflow", printer_toStr(mem_getMaxNumQuregQubitsBeforeGlobalMemSizeofOverflow(isDensMatr, numNodes))},
309 {
"maxQubitsForIndOverflow", printer_toStr(mem_getMaxNumQuregQubitsBeforeIndexOverflow(isDensMatr))},
314void printQuregAutoDeployments(
bool isDensMatr) {
317 std::vector<std::tuple<string, string>> rows;
320 int useDistrib, useGpuAccel, useMulti;
321 int prevDistrib, prevGpuAccel, prevMulti;
330 int maxQubits = mem_getMaxNumQuregQubitsBeforeGlobalMemSizeofOverflow(isDensMatr, globalEnvPtr->numNodes);
332 for (
int numQubits=1; numQubits<maxQubits; numQubits++) {
335 useDistrib = modeflag::USE_AUTO;
336 useGpuAccel = modeflag::USE_AUTO;
337 useMulti = modeflag::USE_AUTO;;
338 autodep_chooseQuregDeployment(numQubits, isDensMatr, useDistrib, useGpuAccel, useMulti, *globalEnvPtr);
341 if (useDistrib == prevDistrib &&
342 useGpuAccel == prevGpuAccel &&
343 useMulti == prevMulti)
356 rows.push_back({printer_toStr(numQubits) +
" qubits", value});
359 prevDistrib = useDistrib;
360 prevGpuAccel = useGpuAccel;
361 prevMulti = useMulti;
365 string prefix = (isDensMatr)?
"density matrix" :
"statevector";
366 string title = prefix +
" autodeployment";
368 print_table(title,
"(no parallelisations available)"):
369 print_table(title, rows);
385 validateAndInitCustomQuESTEnv(useDistrib, useGpuAccel, useMultithread, __func__);
391 validateAndInitCustomQuESTEnv(modeflag::USE_AUTO, modeflag::USE_AUTO, modeflag::USE_AUTO, __func__);
397 return (
int) (globalEnvPtr !=
nullptr);
402 validate_envIsInit(__func__);
405 return *globalEnvPtr;
410 validate_envIsInit(__func__);
416 if (globalEnvPtr->isGpuAccelerated)
419 if (globalEnvPtr->isGpuAccelerated && gpu_isCuQuantumCompiled())
420 gpu_finalizeCuQuantum();
422 if (globalEnvPtr->isDistributed) {
429 globalEnvPtr =
nullptr;
432 hasEnvBeenFinalized =
true;
437 validate_envIsInit(__func__);
439 if (globalEnvPtr->isGpuAccelerated)
442 if (globalEnvPtr->isDistributed)
448 validate_envIsInit(__func__);
449 validate_numReportedNewlinesAboveZero(__func__);
453 print_label(
"QuEST execution environment");
455 bool statevec =
false;
456 bool densmatr =
true;
461 printPrecisionInfo();
462 printCompilationInfo();
463 printDeploymentInfo();
466 printDistributionInfo();
467 printQuregSizeLimits(statevec);
468 printQuregSizeLimits(densmatr);
469 printQuregAutoDeployments(statevec);
470 printQuregAutoDeployments(densmatr);
473 print_oneFewerNewlines();
478 validate_envIsInit(__func__);
482 int numThreads = cpu_isOpenmpCompiled()? cpu_getCurrentNumThreads() : 1;
483 int cuQuantum = env.isGpuAccelerated && gpu_isCuQuantumCompiled();
484 int gpuDirect = env.isGpuAccelerated && gpu_isDirectGpuCommPossible();
486 snprintf(str, 200,
"CUDA=%d OpenMP=%d MPI=%d threads=%d ranks=%d cuQuantum=%d gpuDirect=%d",
487 env.isGpuAccelerated,
void getEnvironmentString(char str[200])
void initCustomQuESTEnv(int useDistrib, int useGpuAccel, int useMultithread)
const int PERMIT_NODES_TO_SHARE_GPU