8#include "quest/include/environment.h"
9#include "quest/include/precision.h"
10#include "quest/include/modes.h"
12#include "quest/src/core/errors.hpp"
13#include "quest/src/core/memory.hpp"
14#include "quest/src/core/printer.hpp"
15#include "quest/src/core/autodeployer.hpp"
16#include "quest/src/core/validation.hpp"
17#include "quest/src/core/randomiser.hpp"
18#include "quest/src/comm/comm_config.hpp"
19#include "quest/src/cpu/cpu_config.hpp"
20#include "quest/src/gpu/gpu_config.hpp"
49static QuESTEnv* globalEnvPtr =
nullptr;
63static bool hasEnvBeenFinalized =
false;
72void validateAndInitCustomQuESTEnv(
int useDistrib,
int useGpuAccel,
int useMultithread,
const char* caller) {
76 validate_envNeverInit(globalEnvPtr !=
nullptr, hasEnvBeenFinalized, caller);
82 validate_newEnvDeploymentMode(useDistrib, useGpuAccel, useMultithread, caller);
85 autodep_chooseQuESTEnvDeployment(useDistrib, useGpuAccel, useMultithread);
94 validate_newEnvDistributedBetweenPower2Nodes(caller);
103 gpu_bindLocalGPUsToNodes();
110 if (useGpuAccel && useDistrib && ! PERMIT_NODES_TO_SHARE_GPU)
111 validate_newEnvNodesEachHaveUniqueGpu(caller);
118 if (useGpuAccel && gpu_isCuQuantumCompiled()) {
119 validate_gpuIsCuQuantumCompatible(caller);
124 rand_setSeedsToDefault();
130 if (globalEnvPtr ==
nullptr)
131 error_allocOfQuESTEnvFailed();
139 .isMultithreaded = useMultithread,
140 .isGpuAccelerated = useGpuAccel,
141 .isDistributed = useDistrib,
144 .rank = (useDistrib)? comm_getRank() : 0,
145 .numNodes = (useDistrib)? comm_getNumNodes() : 1,
147 memcpy(globalEnvPtr, &env,
sizeof(
QuESTEnv));
157void printPrecisionInfo() {
166 {
"qreal", printer_getQrealType() +
" (" + printer_getMemoryWithUnitStr(
sizeof(qreal)) +
")"},
170 {
"qcomp", printer_getQcompType() +
" (" + printer_getMemoryWithUnitStr(
sizeof(qcomp)) +
")"},
172 {
"qindex", printer_getQindexType() +
" (" + printer_getMemoryWithUnitStr(
sizeof(qindex)) +
")"},
175 {
"validationEpsilon", printer_toStr(validateconfig_getEpsilon())},
180void printCompilationInfo() {
184 {
"isMpiCompiled", comm_isMpiCompiled()},
185 {
"isGpuCompiled", gpu_isGpuCompiled()},
186 {
"isOmpCompiled", cpu_isOpenmpCompiled()},
187 {
"isCuQuantumCompiled", gpu_isCuQuantumCompiled()},
192void printDeploymentInfo() {
196 {
"isMpiEnabled", globalEnvPtr->isDistributed},
197 {
"isGpuEnabled", globalEnvPtr->isGpuAccelerated},
198 {
"isOmpEnabled", globalEnvPtr->isMultithreaded},
205 using namespace printer_substrings;
210 ram = printer_getMemoryWithUnitStr(mem_tryGetLocalRamCapacityInBytes()) + pm;
211 }
catch(mem::COULD_NOT_QUERY_RAM e){};
218 {
"numCpuCores", printer_toStr(std::thread::hardware_concurrency()) + pm},
219 {
"numOmpProcs", (cpu_isOpenmpCompiled())? printer_toStr(cpu_getNumOpenmpProcessors()) + pm : na},
220 {
"numOmpThrds", (cpu_isOpenmpCompiled())? printer_toStr(cpu_getCurrentNumThreads()) + pn : na},
222 {
"cpuMemoryFree", un},
229 using namespace printer_substrings;
236 bool isComp = gpu_isGpuCompiled();
237 bool isGpu = isComp && gpu_isGpuAvailable();
241 {
"numGpus", isComp? printer_toStr(gpu_getNumberOfLocalGpus()) : na},
242 {
"gpuDirect", isGpu? printer_toStr(gpu_isDirectGpuCommPossible()) : na},
243 {
"gpuMemPools", isGpu? printer_toStr(gpu_doesGpuSupportMemPools()) : na},
244 {
"gpuMemory", isGpu? printer_getMemoryWithUnitStr(gpu_getTotalMemoryInBytes()) + pg : na},
245 {
"gpuMemoryFree", isGpu? printer_getMemoryWithUnitStr(gpu_getCurrentAvailableMemoryInBytes()) + pg : na},
246 {
"gpuCache", isGpu? printer_getMemoryWithUnitStr(gpu_getCacheMemoryInBytes()) + pg : na},
251void printDistributionInfo() {
253 using namespace printer_substrings;
257 {
"isMpiGpuAware", (comm_isMpiCompiled())? printer_toStr(comm_isMpiGpuAware()) : na},
258 {
"numMpiNodes", printer_toStr(globalEnvPtr->numNodes)},
263void printQuregSizeLimits(
bool isDensMatr) {
265 using namespace printer_substrings;
268 int numNodes = globalEnvPtr->numNodes;
271 string maxQbForCpu = un;
272 string maxQbForMpiCpu = un;
276 qindex cpuMem = mem_tryGetLocalRamCapacityInBytes();
277 maxQbForCpu = printer_toStr(mem_getMaxNumQuregQubitsWhichCanFitInMemory(isDensMatr, 1, cpuMem));
280 if (globalEnvPtr->isDistributed)
281 maxQbForMpiCpu = printer_toStr(mem_getMaxNumQuregQubitsWhichCanFitInMemory(isDensMatr, numNodes, cpuMem));
288 }
catch(mem::COULD_NOT_QUERY_RAM e) {};
291 string maxQbForGpu = na;
292 string maxQbForMpiGpu = na;
295 if (globalEnvPtr->isGpuAccelerated) {
296 qindex gpuMem = gpu_getCurrentAvailableMemoryInBytes();
297 maxQbForGpu = printer_toStr(mem_getMaxNumQuregQubitsWhichCanFitInMemory(isDensMatr, 1, gpuMem));
300 if (globalEnvPtr->isDistributed)
301 maxQbForMpiGpu = printer_toStr(mem_getMaxNumQuregQubitsWhichCanFitInMemory(isDensMatr, numNodes, gpuMem));
305 string prefix = (isDensMatr)?
"density matrix" :
"statevector";
306 string title = prefix +
" limits";
310 {
"minQubitsForMpi", (numNodes>1)? printer_toStr(mem_getMinNumQubitsForDistribution(numNodes)) : na},
311 {
"maxQubitsForCpu", maxQbForCpu},
312 {
"maxQubitsForGpu", maxQbForGpu},
313 {
"maxQubitsForMpiCpu", maxQbForMpiCpu},
314 {
"maxQubitsForMpiGpu", maxQbForMpiGpu},
315 {
"maxQubitsForMemOverflow", printer_toStr(mem_getMaxNumQuregQubitsBeforeGlobalMemSizeofOverflow(isDensMatr, numNodes))},
316 {
"maxQubitsForIndOverflow", printer_toStr(mem_getMaxNumQuregQubitsBeforeIndexOverflow(isDensMatr))},
321void printQuregAutoDeployments(
bool isDensMatr) {
324 std::vector<std::tuple<string, string>> rows;
327 int useDistrib, useGpuAccel, useMulti;
328 int prevDistrib, prevGpuAccel, prevMulti;
337 int maxQubits = mem_getMaxNumQuregQubitsBeforeGlobalMemSizeofOverflow(isDensMatr, globalEnvPtr->numNodes);
339 for (
int numQubits=1; numQubits<maxQubits; numQubits++) {
342 useDistrib = modeflag::USE_AUTO;
343 useGpuAccel = modeflag::USE_AUTO;
344 useMulti = modeflag::USE_AUTO;;
345 autodep_chooseQuregDeployment(numQubits, isDensMatr, useDistrib, useGpuAccel, useMulti, *globalEnvPtr);
348 if (useDistrib == prevDistrib &&
349 useGpuAccel == prevGpuAccel &&
350 useMulti == prevMulti)
363 rows.push_back({printer_toStr(numQubits) +
" qubits", value});
366 prevDistrib = useDistrib;
367 prevGpuAccel = useGpuAccel;
368 prevMulti = useMulti;
372 string prefix = (isDensMatr)?
"density matrix" :
"statevector";
373 string title = prefix +
" autodeployment";
375 print_table(title,
"(no parallelisations available)"):
376 print_table(title, rows);
392 validateAndInitCustomQuESTEnv(useDistrib, useGpuAccel, useMultithread, __func__);
398 validateAndInitCustomQuESTEnv(modeflag::USE_AUTO, modeflag::USE_AUTO, modeflag::USE_AUTO, __func__);
404 return (
int) (globalEnvPtr !=
nullptr);
409 validate_envIsInit(__func__);
412 return *globalEnvPtr;
417 validate_envIsInit(__func__);
423 if (globalEnvPtr->isGpuAccelerated)
426 if (globalEnvPtr->isGpuAccelerated && gpu_isCuQuantumCompiled())
427 gpu_finalizeCuQuantum();
429 if (globalEnvPtr->isDistributed) {
436 globalEnvPtr =
nullptr;
439 hasEnvBeenFinalized =
true;
444 validate_envIsInit(__func__);
446 if (globalEnvPtr->isGpuAccelerated)
449 if (globalEnvPtr->isDistributed)
455 validate_envIsInit(__func__);
456 validate_numReportedNewlinesAboveZero(__func__);
460 print_label(
"QuEST execution environment");
462 bool statevec =
false;
463 bool densmatr =
true;
468 printPrecisionInfo();
469 printCompilationInfo();
470 printDeploymentInfo();
473 printDistributionInfo();
474 printQuregSizeLimits(statevec);
475 printQuregSizeLimits(densmatr);
476 printQuregAutoDeployments(statevec);
477 printQuregAutoDeployments(densmatr);
480 print_oneFewerNewlines();
485 validate_envIsInit(__func__);
489 int numThreads = cpu_isOpenmpCompiled()? cpu_getCurrentNumThreads() : 1;
490 int cuQuantum = env.isGpuAccelerated && gpu_isCuQuantumCompiled();
491 int gpuDirect = env.isGpuAccelerated && gpu_isDirectGpuCommPossible();
493 snprintf(str, 200,
"CUDA=%d OpenMP=%d MPI=%d threads=%d ranks=%d cuQuantum=%d gpuDirect=%d",
494 env.isGpuAccelerated,
void getEnvironmentString(char str[200])
void initCustomQuESTEnv(int useDistrib, int useGpuAccel, int useMultithread)