8#include "quest/include/environment.h"
9#include "quest/include/precision.h"
10#include "quest/include/modes.h"
12#include "quest/src/core/errors.hpp"
13#include "quest/src/core/memory.hpp"
14#include "quest/src/core/parser.hpp"
15#include "quest/src/core/printer.hpp"
16#include "quest/src/core/envvars.hpp"
17#include "quest/src/core/autodeployer.hpp"
18#include "quest/src/core/validation.hpp"
19#include "quest/src/core/randomiser.hpp"
20#include "quest/src/comm/comm_config.hpp"
21#include "quest/src/cpu/cpu_config.hpp"
22#include "quest/src/gpu/gpu_config.hpp"
51static QuESTEnv* globalEnvPtr =
nullptr;
65static bool hasEnvBeenFinalized =
false;
74void validateAndInitCustomQuESTEnv(
int useDistrib,
int useGpuAccel,
int useMultithread,
const char* caller) {
78 validate_envNeverInit(globalEnvPtr !=
nullptr, hasEnvBeenFinalized, caller);
80 envvars_validateAndLoadEnvVars(caller);
81 validateconfig_setEpsilonToDefault();
87 validate_newEnvDeploymentMode(useDistrib, useGpuAccel, useMultithread, caller);
90 autodep_chooseQuESTEnvDeployment(useDistrib, useGpuAccel, useMultithread);
99 validate_newEnvDistributedBetweenPower2Nodes(caller);
108 gpu_bindLocalGPUsToNodes();
112 bool permitGpuSharing = envvars_getWhetherGpuSharingIsPermitted();
120 if (useGpuAccel && useDistrib && ! permitGpuSharing)
121 validate_newEnvNodesEachHaveUniqueGpu(caller);
128 bool useCuQuantum = useGpuAccel && gpu_isCuQuantumCompiled();
130 validate_gpuIsCuQuantumCompatible(caller);
135 rand_setSeedsToDefault();
141 if (globalEnvPtr ==
nullptr)
142 error_allocOfQuESTEnvFailed();
145 globalEnvPtr->isMultithreaded = useMultithread;
146 globalEnvPtr->isGpuAccelerated = useGpuAccel;
147 globalEnvPtr->isDistributed = useDistrib;
148 globalEnvPtr->isCuQuantumEnabled = useCuQuantum;
149 globalEnvPtr->isGpuSharingEnabled = permitGpuSharing;
152 globalEnvPtr->rank = (useDistrib)? comm_getRank() : 0;
153 globalEnvPtr->numNodes = (useDistrib)? comm_getNumNodes() : 1;
163void printPrecisionInfo() {
172 {
"qreal", printer_getQrealType() +
" (" + printer_getMemoryWithUnitStr(
sizeof(qreal)) +
")"},
176 {
"qcomp", printer_getQcompType() +
" (" + printer_getMemoryWithUnitStr(
sizeof(qcomp)) +
")"},
178 {
"qindex", printer_getQindexType() +
" (" + printer_getMemoryWithUnitStr(
sizeof(qindex)) +
")"},
181 {
"validationEpsilon", printer_toStr(validateconfig_getEpsilon())},
186void printCompilationInfo() {
190 {
"isMpiCompiled", comm_isMpiCompiled()},
191 {
"isGpuCompiled", gpu_isGpuCompiled()},
192 {
"isOmpCompiled", cpu_isOpenmpCompiled()},
193 {
"isCuQuantumCompiled", gpu_isCuQuantumCompiled()},
198void printDeploymentInfo() {
202 {
"isMpiEnabled", globalEnvPtr->isDistributed},
203 {
"isGpuEnabled", globalEnvPtr->isGpuAccelerated},
204 {
"isOmpEnabled", globalEnvPtr->isMultithreaded},
205 {
"isCuQuantumEnabled", globalEnvPtr->isCuQuantumEnabled},
206 {
"isGpuSharingEnabled", globalEnvPtr->isGpuSharingEnabled},
213 using namespace printer_substrings;
218 ram = printer_getMemoryWithUnitStr(mem_tryGetLocalRamCapacityInBytes()) + pm;
219 }
catch(mem::COULD_NOT_QUERY_RAM e){};
226 {
"numCpuCores", printer_toStr(std::thread::hardware_concurrency()) + pm},
227 {
"numOmpProcs", (cpu_isOpenmpCompiled())? printer_toStr(cpu_getNumOpenmpProcessors()) + pm : na},
228 {
"numOmpThrds", (cpu_isOpenmpCompiled())? printer_toStr(cpu_getAvailableNumThreads()) + pn : na},
230 {
"cpuMemoryFree", un},
237 using namespace printer_substrings;
244 bool isComp = gpu_isGpuCompiled();
245 bool isGpu = isComp && gpu_isGpuAvailable();
249 {
"numGpus", isComp? printer_toStr(gpu_getNumberOfLocalGpus()) : na},
250 {
"gpuDirect", isGpu? printer_toStr(gpu_isDirectGpuCommPossible()) : na},
251 {
"gpuMemPools", isGpu? printer_toStr(gpu_doesGpuSupportMemPools()) : na},
252 {
"gpuMemory", isGpu? printer_getMemoryWithUnitStr(gpu_getTotalMemoryInBytes()) + pg : na},
253 {
"gpuMemoryFree", isGpu? printer_getMemoryWithUnitStr(gpu_getCurrentAvailableMemoryInBytes()) + pg : na},
254 {
"gpuCache", isGpu? printer_getMemoryWithUnitStr(gpu_getCacheMemoryInBytes()) + pg : na},
259void printDistributionInfo() {
261 using namespace printer_substrings;
265 {
"isMpiGpuAware", (comm_isMpiCompiled())? printer_toStr(comm_isMpiGpuAware()) : na},
266 {
"numMpiNodes", printer_toStr(globalEnvPtr->numNodes)},
271void printQuregSizeLimits(
bool isDensMatr) {
273 using namespace printer_substrings;
276 int numNodes = globalEnvPtr->numNodes;
279 string maxQbForCpu = un;
280 string maxQbForMpiCpu = un;
284 qindex cpuMem = mem_tryGetLocalRamCapacityInBytes();
285 maxQbForCpu = printer_toStr(mem_getMaxNumQuregQubitsWhichCanFitInMemory(isDensMatr, 1, cpuMem));
288 if (globalEnvPtr->isDistributed)
289 maxQbForMpiCpu = printer_toStr(mem_getMaxNumQuregQubitsWhichCanFitInMemory(isDensMatr, numNodes, cpuMem));
296 }
catch(mem::COULD_NOT_QUERY_RAM e) {};
299 string maxQbForGpu = na;
300 string maxQbForMpiGpu = na;
303 if (globalEnvPtr->isGpuAccelerated) {
304 qindex gpuMem = gpu_getCurrentAvailableMemoryInBytes();
305 maxQbForGpu = printer_toStr(mem_getMaxNumQuregQubitsWhichCanFitInMemory(isDensMatr, 1, gpuMem));
308 if (globalEnvPtr->isDistributed)
309 maxQbForMpiGpu = printer_toStr(mem_getMaxNumQuregQubitsWhichCanFitInMemory(isDensMatr, numNodes, gpuMem));
313 string prefix = (isDensMatr)?
"density matrix" :
"statevector";
314 string title = prefix +
" limits";
318 {
"minQubitsForMpi", (numNodes>1)? printer_toStr(mem_getMinNumQubitsForDistribution(numNodes)) : na},
319 {
"maxQubitsForCpu", maxQbForCpu},
320 {
"maxQubitsForGpu", maxQbForGpu},
321 {
"maxQubitsForMpiCpu", maxQbForMpiCpu},
322 {
"maxQubitsForMpiGpu", maxQbForMpiGpu},
323 {
"maxQubitsForMemOverflow", printer_toStr(mem_getMaxNumQuregQubitsBeforeGlobalMemSizeofOverflow(isDensMatr, numNodes))},
324 {
"maxQubitsForIndOverflow", printer_toStr(mem_getMaxNumQuregQubitsBeforeIndexOverflow(isDensMatr))},
329void printQuregAutoDeployments(
bool isDensMatr) {
332 std::vector<std::tuple<string, string>> rows;
335 int useDistrib, useGpuAccel, useMulti;
336 int prevDistrib, prevGpuAccel, prevMulti;
345 int maxQubits = mem_getMaxNumQuregQubitsBeforeGlobalMemSizeofOverflow(isDensMatr, globalEnvPtr->numNodes);
347 for (
int numQubits=1; numQubits<maxQubits; numQubits++) {
350 useDistrib = modeflag::USE_AUTO;
351 useGpuAccel = modeflag::USE_AUTO;
352 useMulti = modeflag::USE_AUTO;;
353 autodep_chooseQuregDeployment(numQubits, isDensMatr, useDistrib, useGpuAccel, useMulti, *globalEnvPtr);
356 if (useDistrib == prevDistrib &&
357 useGpuAccel == prevGpuAccel &&
358 useMulti == prevMulti)
371 rows.push_back({printer_toStr(numQubits) +
" qubits", value});
374 prevDistrib = useDistrib;
375 prevGpuAccel = useGpuAccel;
376 prevMulti = useMulti;
380 string prefix = (isDensMatr)?
"density matrix" :
"statevector";
381 string title = prefix +
" autodeployment";
383 print_table(title,
"(no parallelisations available)"):
384 print_table(title, rows);
400 validateAndInitCustomQuESTEnv(useDistrib, useGpuAccel, useMultithread, __func__);
406 validateAndInitCustomQuESTEnv(modeflag::USE_AUTO, modeflag::USE_AUTO, modeflag::USE_AUTO, __func__);
412 return (
int) (globalEnvPtr !=
nullptr);
417 validate_envIsInit(__func__);
420 return *globalEnvPtr;
425 validate_envIsInit(__func__);
431 if (globalEnvPtr->isGpuAccelerated)
434 if (globalEnvPtr->isGpuAccelerated && gpu_isCuQuantumCompiled())
435 gpu_finalizeCuQuantum();
437 if (globalEnvPtr->isDistributed) {
444 globalEnvPtr =
nullptr;
447 hasEnvBeenFinalized =
true;
452 validate_envIsInit(__func__);
454 if (globalEnvPtr->isGpuAccelerated)
457 if (globalEnvPtr->isDistributed)
463 validate_envIsInit(__func__);
464 validate_numReportedNewlinesAboveZero(__func__);
468 print_label(
"QuEST execution environment");
470 bool statevec =
false;
471 bool densmatr =
true;
476 printPrecisionInfo();
477 printCompilationInfo();
478 printDeploymentInfo();
481 printDistributionInfo();
482 printQuregSizeLimits(statevec);
483 printQuregSizeLimits(densmatr);
484 printQuregAutoDeployments(statevec);
485 printQuregAutoDeployments(densmatr);
488 print_oneFewerNewlines();
493 validate_envIsInit(__func__);
497 int numThreads = cpu_isOpenmpCompiled()? cpu_getAvailableNumThreads() : 1;
498 int cuQuantum = env.isGpuAccelerated && gpu_isCuQuantumCompiled();
499 int gpuDirect = env.isGpuAccelerated && gpu_isDirectGpuCommPossible();
501 snprintf(str, 200,
"CUDA=%d OpenMP=%d MPI=%d threads=%d ranks=%d cuQuantum=%d gpuDirect=%d",
502 env.isGpuAccelerated,
void getEnvironmentString(char str[200])
void initCustomQuESTEnv(int useDistrib, int useGpuAccel, int useMultithread)