8#include "quest/include/qureg.h"
9#include "quest/include/environment.h"
10#include "quest/include/initialisations.h"
12#include "quest/src/core/validation.hpp"
13#include "quest/src/core/autodeployer.hpp"
14#include "quest/src/core/printer.hpp"
15#include "quest/src/core/bitwise.hpp"
16#include "quest/src/core/memory.hpp"
17#include "quest/src/core/utilities.hpp"
18#include "quest/src/core/localiser.hpp"
19#include "quest/src/comm/comm_config.hpp"
20#include "quest/src/comm/comm_routines.hpp"
21#include "quest/src/cpu/cpu_config.hpp"
22#include "quest/src/gpu/gpu_config.hpp"
35Qureg qureg_populateNonHeapFields(
int numQubits,
int isDensMatr,
int useDistrib,
int useGpuAccel,
int useMultithread) {
40 int logNumNodes = (useDistrib)?
41 logBase2(env.numNodes) : 0;
42 qindex logNumAmpsPerNode = (isDensMatr)?
43 (2*numQubits - logNumNodes) :
44 ( numQubits - logNumNodes);
48 .isMultithreaded = useMultithread,
49 .isGpuAccelerated = useGpuAccel,
50 .isDistributed = useDistrib,
57 .rank = (useDistrib)? env.rank : 0,
58 .numNodes = (useDistrib)? env.numNodes : 1,
59 .logNumNodes = (useDistrib)? logBase2(env.numNodes) : 0,
62 .isDensityMatrix = isDensMatr,
63 .numQubits = numQubits,
64 .numAmps = (isDensMatr)? powerOf2(2*numQubits) : powerOf2(numQubits),
65 .logNumAmps = (isDensMatr)? 2*numQubits : numQubits,
68 .numAmpsPerNode = powerOf2(logNumAmpsPerNode),
69 .logNumAmpsPerNode = logNumAmpsPerNode,
70 .logNumColsPerNode = (isDensMatr)? numQubits - logNumNodes : 0,
75 .cpuCommBuffer = nullptr,
76 .gpuCommBuffer = nullptr
87bool didAnyLocalAllocsFail(
Qureg qureg) {
90 if (! mem_isAllocated(qureg.cpuAmps))
94 if (qureg.isDistributed && ! mem_isAllocated(qureg.cpuCommBuffer))
98 if (qureg.isGpuAccelerated && ! mem_isAllocated(qureg.gpuAmps))
102 if (qureg.isDistributed && qureg.isGpuAccelerated && ! mem_isAllocated(qureg.gpuCommBuffer))
110bool didAnyAllocsFailOnAnyNode(
Qureg qureg) {
112 bool anyFail = didAnyLocalAllocsFail(qureg);
114 anyFail = comm_isTrueOnAllNodes(anyFail);
120void freeAllMemoryIfAnyAllocsFailed(
Qureg qureg) {
123 if (!didAnyAllocsFailOnAnyNode(qureg))
127 cpu_deallocArray(qureg.cpuAmps);
128 cpu_deallocArray(qureg.cpuCommBuffer);
131 if (qureg.isGpuAccelerated) {
132 gpu_deallocArray(qureg.gpuAmps);
133 gpu_deallocArray(qureg.gpuCommBuffer);
138Qureg validateAndCreateCustomQureg(
int numQubits,
int isDensMatr,
int useDistrib,
int useGpuAccel,
int useMultithread,
const char* caller) {
140 validate_envIsInit(caller);
144 validate_newQuregParams(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread, env, caller);
147 autodep_chooseQuregDeployment(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread, env);
149 Qureg qureg = qureg_populateNonHeapFields(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread);
152 qureg.cpuAmps = cpu_allocArray(qureg.numAmpsPerNode);
157 qureg.gpuAmps = (useGpuAccel)? gpu_allocArray(qureg.numAmpsPerNode) :
nullptr;
158 qureg.cpuCommBuffer = (useDistrib)? cpu_allocArray(qureg.numAmpsPerNode) :
nullptr;
159 qureg.gpuCommBuffer = (useGpuAccel && useDistrib)? gpu_allocArray(qureg.numAmpsPerNode) :
nullptr;
162 freeAllMemoryIfAnyAllocsFailed(qureg);
163 validate_newQuregAllocs(qureg, __func__);
178void printDeploymentInfo(
Qureg qureg) {
182 {
"isMpiEnabled", qureg.isDistributed},
183 {
"isGpuEnabled", qureg.isGpuAccelerated},
184 {
"isOmpEnabled", qureg.isMultithreaded},
188void printDimensionInfo(
Qureg qureg) {
190 using namespace printer_substrings;
194 ampsStr = bt + printer_toStr(qureg.numQubits * (qureg.isDensityMatrix? 2 : 1));
195 ampsStr += eq + printer_toStr(qureg.numAmps);
198 if (qureg.isDensityMatrix)
200 bt + printer_toStr(qureg.numQubits) +
201 eq + printer_toStr(powerOf2(qureg.numQubits)));
205 {
"isDensMatr", printer_toStr(qureg.isDensityMatrix)},
206 {
"numQubits", printer_toStr(qureg.numQubits)},
207 {
"numCols", colsStr},
208 {
"numAmps", ampsStr},
213void printDistributionInfo(
Qureg qureg) {
215 using namespace printer_substrings;
218 string nodesStr = na;
223 if (qureg.isDistributed) {
224 nodesStr = bt + printer_toStr(qureg.logNumNodes) + eq + printer_toStr(qureg.numNodes);
225 ampsStr = bt + printer_toStr(qureg.logNumAmpsPerNode) + eq + printer_toStr(qureg.numAmpsPerNode) + pn;
226 if (qureg.isDensityMatrix)
227 colsStr = bt + printer_toStr(qureg.logNumColsPerNode) + eq + printer_toStr(powerOf2(qureg.logNumColsPerNode)) + pn;
232 {
"numNodes", nodesStr},
233 {
"numCols", colsStr},
234 {
"numAmps", ampsStr},
239void printMemoryInfo(
Qureg qureg) {
241 using namespace printer_substrings;
243 size_t localArrayMem = mem_getLocalQuregMemoryRequired(qureg.numAmpsPerNode);
244 string localMemStr = printer_getMemoryWithUnitStr(localArrayMem) + (qureg.isDistributed? pn :
"");
248 qindex globalTotalMem = mem_getTotalGlobalMemoryUsed(qureg);
249 string globalMemStr = (globalTotalMem == 0)?
"overflowed" : printer_getMemoryWithUnitStr(globalTotalMem);
253 {
"cpuAmps", mem_isAllocated(qureg.cpuAmps)? localMemStr : na},
254 {
"gpuAmps", mem_isAllocated(qureg.gpuAmps)? localMemStr : na},
255 {
"cpuCommBuffer", mem_isAllocated(qureg.cpuCommBuffer)? localMemStr : na},
256 {
"gpuCommBuffer", mem_isAllocated(qureg.gpuCommBuffer)? localMemStr : na},
257 {
"globalTotal", globalMemStr},
273 return validateAndCreateCustomQureg(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread, __func__);
280 int autoMode = modeflag::USE_AUTO;
281 return validateAndCreateCustomQureg(numQubits, isDensMatr, autoMode, autoMode, autoMode, __func__);
288 int autoMode = modeflag::USE_AUTO;
289 return validateAndCreateCustomQureg(numQubits, isDensMatr, autoMode, autoMode, autoMode, __func__);
294 validate_envIsInit(__func__);
299 return validateAndCreateCustomQureg(numQubits, isDensMatr, env.isDistributed, env.isGpuAccelerated, env.isMultithreaded, __func__);
304 validate_envIsInit(__func__);
309 return validateAndCreateCustomQureg(numQubits, isDensMatr, env.isDistributed, env.isGpuAccelerated, env.isMultithreaded, __func__);
314 validate_quregFields(qureg, __func__);
317 Qureg clone = validateAndCreateCustomQureg(
318 qureg.numQubits, qureg.isDensityMatrix, qureg.isDistributed,
319 qureg.isGpuAccelerated, qureg.isMultithreaded, __func__);
329 validate_quregFields(qureg, __func__);
332 cpu_deallocArray(qureg.cpuAmps);
335 if (qureg.isDistributed)
336 cpu_deallocArray(qureg.cpuCommBuffer);
339 if (qureg.isGpuAccelerated)
340 gpu_deallocArray(qureg.gpuAmps);
343 if (qureg.isGpuAccelerated && qureg.isDistributed)
344 gpu_deallocArray(qureg.gpuCommBuffer);
352 validate_quregFields(qureg, __func__);
353 validate_numReportedNewlinesAboveZero(__func__);
358 print_label(
"Qureg");
359 printDeploymentInfo(qureg);
360 printDimensionInfo(qureg);
361 printDistributionInfo(qureg);
362 printMemoryInfo(qureg);
365 print_oneFewerNewlines();
370 validate_quregFields(qureg, __func__);
371 validate_numReportedNewlinesAboveZero(__func__);
375 size_t localMem = mem_getLocalQuregMemoryRequired(qureg.numAmpsPerNode);
376 if (qureg.isDistributed)
380 localMem +=
sizeof(qureg);
382 print_header(qureg, localMem);
386 print_oneFewerNewlines();
391 validate_quregFields(qureg, __func__);
394 if (qureg.isGpuAccelerated)
395 gpu_copyCpuToGpu(qureg);
398 validate_quregFields(qureg, __func__);
401 if (qureg.isGpuAccelerated)
402 gpu_copyGpuToCpu(qureg);
407 validate_quregFields(qureg, __func__);
408 validate_localAmpIndices(qureg, localStartInd, numLocalAmps, __func__);
418 if (!qureg.isGpuAccelerated)
423 gpu_copyCpuToGpu(&qureg.cpuAmps[localStartInd], &qureg.gpuAmps[localStartInd], numLocalAmps);
426 validate_quregFields(qureg, __func__);
427 validate_localAmpIndices(qureg, localStartInd, numLocalAmps, __func__);
437 if (!qureg.isGpuAccelerated)
442 gpu_copyGpuToCpu(&qureg.gpuAmps[localStartInd], &qureg.cpuAmps[localStartInd], numLocalAmps);
447 validate_quregFields(qureg, __func__);
448 validate_quregIsStateVector(qureg, __func__);
449 validate_basisStateIndices(qureg, startInd, numAmps, __func__);
451 localiser_statevec_getAmps(outAmps, qureg, startInd, numAmps);
456 validate_quregFields(qureg, __func__);
457 validate_quregIsDensityMatrix(qureg, __func__);
458 validate_basisStateRowCols(qureg, startRow, startCol, numRows, numCols, __func__);
460 localiser_densmatr_getAmps(outAmps, qureg, startRow, startCol, numRows, numCols);
484 validate_quregFields(qureg, __func__);
485 validate_quregIsStateVector(qureg, __func__);
486 validate_basisStateIndex(qureg, index, __func__);
488 return localiser_statevec_getAmp(qureg, index);
490extern "C" void _wrap_getQuregAmp(qcomp* out,
Qureg qureg, qindex index) {
497 validate_quregFields(qureg, __func__);
498 validate_quregIsDensityMatrix(qureg, __func__);
499 validate_basisStateRowCol(qureg, row, column, __func__);
501 qindex ind = util_getGlobalFlatIndex(qureg, row, column);
502 qcomp amp = localiser_statevec_getAmp(qureg, ind);
505extern "C" void _wrap_getDensityQuregAmp(qcomp* out,
Qureg qureg, qindex row, qindex column) {
void setQuregToClone(Qureg targetQureg, Qureg copyQureg)
void initZeroState(Qureg qureg)
Qureg createDensityQureg(int numQubits)
Qureg createForcedQureg(int numQubits)
Qureg createForcedDensityQureg(int numQubits)
Qureg createCloneQureg(Qureg qureg)
Qureg createCustomQureg(int numQubits, int isDensMatr, int useDistrib, int useGpuAccel, int useMultithread)
Qureg createQureg(int numQubits)
void destroyQureg(Qureg qureg)
qcomp getQuregAmp(Qureg qureg, qindex index)
void getDensityQuregAmps(qcomp **outAmps, Qureg qureg, qindex startRow, qindex startCol, qindex numRows, qindex numCols)
void getQuregAmps(qcomp *outAmps, Qureg qureg, qindex startInd, qindex numAmps)
qcomp getDensityQuregAmp(Qureg qureg, qindex row, qindex column)
void reportQureg(Qureg qureg)
void reportQuregParams(Qureg qureg)
void syncQuregFromGpu(Qureg qureg)
void syncSubQuregToGpu(Qureg qureg, qindex localStartInd, qindex numLocalAmps)
void syncSubQuregFromGpu(Qureg qureg, qindex localStartInd, qindex numLocalAmps)
void syncQuregToGpu(Qureg qureg)