8#include "quest/include/qureg.h"
9#include "quest/include/environment.h"
10#include "quest/include/initialisations.h"
12#include "quest/src/core/validation.hpp"
13#include "quest/src/core/autodeployer.hpp"
14#include "quest/src/core/printer.hpp"
15#include "quest/src/core/bitwise.hpp"
16#include "quest/src/core/memory.hpp"
17#include "quest/src/core/utilities.hpp"
18#include "quest/src/core/localiser.hpp"
19#include "quest/src/comm/comm_config.hpp"
20#include "quest/src/comm/comm_routines.hpp"
21#include "quest/src/cpu/cpu_config.hpp"
22#include "quest/src/gpu/gpu_config.hpp"
37Qureg qureg_populateNonHeapFields(
int numQubits,
int isDensMatr,
int useDistrib,
int useGpuAccel,
int useMultithread) {
42 int logNumNodes = (useDistrib)?
43 logBase2(env.numNodes) : 0;
44 qindex logNumAmpsPerNode = (isDensMatr)?
45 (2*numQubits - logNumNodes) :
46 ( numQubits - logNumNodes);
50 .isMultithreaded = useMultithread,
51 .isGpuAccelerated = useGpuAccel,
52 .isDistributed = useDistrib,
59 .rank = (useDistrib)? env.rank : 0,
60 .numNodes = (useDistrib)? env.numNodes : 1,
61 .logNumNodes = (useDistrib)? logBase2(env.numNodes) : 0,
64 .isDensityMatrix = isDensMatr,
65 .numQubits = numQubits,
66 .numAmps = (isDensMatr)? powerOf2(2*numQubits) : powerOf2(numQubits),
67 .logNumAmps = (isDensMatr)? 2*numQubits : numQubits,
70 .numAmpsPerNode = powerOf2(logNumAmpsPerNode),
71 .logNumAmpsPerNode = logNumAmpsPerNode,
72 .logNumColsPerNode = (isDensMatr)? numQubits - logNumNodes : 0,
77 .cpuCommBuffer = nullptr,
78 .gpuCommBuffer = nullptr
89bool didAnyLocalAllocsFail(
Qureg qureg) {
92 if (! mem_isAllocated(qureg.cpuAmps))
96 if (qureg.isDistributed && ! mem_isAllocated(qureg.cpuCommBuffer))
100 if (qureg.isGpuAccelerated && ! mem_isAllocated(qureg.gpuAmps))
104 if (qureg.isDistributed && qureg.isGpuAccelerated && ! mem_isAllocated(qureg.gpuCommBuffer))
112bool didAnyAllocsFailOnAnyNode(
Qureg qureg) {
114 bool anyFail = didAnyLocalAllocsFail(qureg);
116 anyFail = comm_isTrueOnAllNodes(anyFail);
122void freeAllMemoryIfAnyAllocsFailed(
Qureg qureg) {
125 if (!didAnyAllocsFailOnAnyNode(qureg))
129 cpu_deallocArray(qureg.cpuAmps);
130 cpu_deallocArray(qureg.cpuCommBuffer);
133 if (qureg.isGpuAccelerated) {
134 gpu_deallocArray(qureg.gpuAmps);
135 gpu_deallocArray(qureg.gpuCommBuffer);
140Qureg validateAndCreateCustomQureg(
int numQubits,
int isDensMatr,
int useDistrib,
int useGpuAccel,
int useMultithread,
const char* caller) {
142 validate_envIsInit(caller);
146 validate_newQuregParams(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread, env, caller);
149 autodep_chooseQuregDeployment(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread, env);
151 Qureg qureg = qureg_populateNonHeapFields(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread);
154 qureg.cpuAmps = cpu_allocArray(qureg.numAmpsPerNode);
159 qureg.gpuAmps = (useGpuAccel)? gpu_allocArray(qureg.numAmpsPerNode) :
nullptr;
160 qureg.cpuCommBuffer = (useDistrib)? cpu_allocArray(qureg.numAmpsPerNode) :
nullptr;
161 qureg.gpuCommBuffer = (useGpuAccel && useDistrib)? gpu_allocArray(qureg.numAmpsPerNode) :
nullptr;
164 freeAllMemoryIfAnyAllocsFailed(qureg);
165 validate_newQuregAllocs(qureg, __func__);
180void printDeploymentInfo(
Qureg qureg) {
184 {
"isMpiEnabled", qureg.isDistributed},
185 {
"isGpuEnabled", qureg.isGpuAccelerated},
186 {
"isOmpEnabled", qureg.isMultithreaded},
190void printDimensionInfo(
Qureg qureg) {
192 using namespace printer_substrings;
196 ampsStr = bt + printer_toStr(qureg.numQubits * (qureg.isDensityMatrix? 2 : 1));
197 ampsStr += eq + printer_toStr(qureg.numAmps);
200 if (qureg.isDensityMatrix)
202 bt + printer_toStr(qureg.numQubits) +
203 eq + printer_toStr(powerOf2(qureg.numQubits)));
207 {
"isDensMatr", printer_toStr(qureg.isDensityMatrix)},
208 {
"numQubits", printer_toStr(qureg.numQubits)},
209 {
"numCols", colsStr},
210 {
"numAmps", ampsStr},
215void printDistributionInfo(
Qureg qureg) {
217 using namespace printer_substrings;
220 string nodesStr = na;
225 if (qureg.isDistributed) {
226 nodesStr = bt + printer_toStr(qureg.logNumNodes) + eq + printer_toStr(qureg.numNodes);
227 ampsStr = bt + printer_toStr(qureg.logNumAmpsPerNode) + eq + printer_toStr(qureg.numAmpsPerNode) + pn;
228 if (qureg.isDensityMatrix)
229 colsStr = bt + printer_toStr(qureg.logNumColsPerNode) + eq + printer_toStr(powerOf2(qureg.logNumColsPerNode)) + pn;
234 {
"numNodes", nodesStr},
235 {
"numCols", colsStr},
236 {
"numAmps", ampsStr},
241void printMemoryInfo(
Qureg qureg) {
243 using namespace printer_substrings;
245 size_t localArrayMem = mem_getLocalQuregMemoryRequired(qureg.numAmpsPerNode);
246 string localMemStr = printer_getMemoryWithUnitStr(localArrayMem) + (qureg.isDistributed? pn :
"");
250 qindex globalTotalMem = mem_getTotalGlobalMemoryUsed(qureg);
251 string globalMemStr = (globalTotalMem == 0)?
"overflowed" : printer_getMemoryWithUnitStr(globalTotalMem);
255 {
"cpuAmps", mem_isAllocated(qureg.cpuAmps)? localMemStr : na},
256 {
"gpuAmps", mem_isAllocated(qureg.gpuAmps)? localMemStr : na},
257 {
"cpuCommBuffer", mem_isAllocated(qureg.cpuCommBuffer)? localMemStr : na},
258 {
"gpuCommBuffer", mem_isAllocated(qureg.gpuCommBuffer)? localMemStr : na},
259 {
"globalTotal", globalMemStr},
275 return validateAndCreateCustomQureg(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread, __func__);
282 int autoMode = modeflag::USE_AUTO;
283 return validateAndCreateCustomQureg(numQubits, isDensMatr, autoMode, autoMode, autoMode, __func__);
290 int autoMode = modeflag::USE_AUTO;
291 return validateAndCreateCustomQureg(numQubits, isDensMatr, autoMode, autoMode, autoMode, __func__);
296 validate_envIsInit(__func__);
301 return validateAndCreateCustomQureg(numQubits, isDensMatr, env.isDistributed, env.isGpuAccelerated, env.isMultithreaded, __func__);
306 validate_envIsInit(__func__);
311 return validateAndCreateCustomQureg(numQubits, isDensMatr, env.isDistributed, env.isGpuAccelerated, env.isMultithreaded, __func__);
316 validate_quregFields(qureg, __func__);
319 Qureg clone = validateAndCreateCustomQureg(
320 qureg.numQubits, qureg.isDensityMatrix, qureg.isDistributed,
321 qureg.isGpuAccelerated, qureg.isMultithreaded, __func__);
331 validate_quregFields(qureg, __func__);
334 cpu_deallocArray(qureg.cpuAmps);
337 if (qureg.isDistributed)
338 cpu_deallocArray(qureg.cpuCommBuffer);
341 if (qureg.isGpuAccelerated)
342 gpu_deallocArray(qureg.gpuAmps);
345 if (qureg.isGpuAccelerated && qureg.isDistributed)
346 gpu_deallocArray(qureg.gpuCommBuffer);
354 validate_quregFields(qureg, __func__);
355 validate_numReportedNewlinesAboveZero(__func__);
360 print_label(
"Qureg");
361 printDeploymentInfo(qureg);
362 printDimensionInfo(qureg);
363 printDistributionInfo(qureg);
364 printMemoryInfo(qureg);
367 print_oneFewerNewlines();
372 validate_quregFields(qureg, __func__);
373 validate_numReportedNewlinesAboveZero(__func__);
377 size_t localMem = mem_getLocalQuregMemoryRequired(qureg.numAmpsPerNode);
378 if (qureg.isDistributed)
382 localMem +=
sizeof(qureg);
384 print_header(qureg, localMem);
388 print_oneFewerNewlines();
393 validate_quregFields(qureg, __func__);
396 if (qureg.isGpuAccelerated)
397 gpu_copyCpuToGpu(qureg);
400 validate_quregFields(qureg, __func__);
403 if (qureg.isGpuAccelerated)
404 gpu_copyGpuToCpu(qureg);
409 validate_quregFields(qureg, __func__);
410 validate_localAmpIndices(qureg, localStartInd, numLocalAmps, __func__);
420 if (!qureg.isGpuAccelerated)
425 gpu_copyCpuToGpu(&qureg.cpuAmps[localStartInd], &qureg.gpuAmps[localStartInd], numLocalAmps);
428 validate_quregFields(qureg, __func__);
429 validate_localAmpIndices(qureg, localStartInd, numLocalAmps, __func__);
439 if (!qureg.isGpuAccelerated)
444 gpu_copyGpuToCpu(&qureg.gpuAmps[localStartInd], &qureg.cpuAmps[localStartInd], numLocalAmps);
449 validate_quregFields(qureg, __func__);
450 validate_quregIsStateVector(qureg, __func__);
451 validate_basisStateIndices(qureg, startInd, numAmps, __func__);
453 localiser_statevec_getAmps(outAmps, qureg, startInd, numAmps);
458 validate_quregFields(qureg, __func__);
459 validate_quregIsDensityMatrix(qureg, __func__);
460 validate_basisStateRowCols(qureg, startRow, startCol, numRows, numCols, __func__);
462 localiser_densmatr_getAmps(outAmps, qureg, startRow, startCol, numRows, numCols);
485 validate_quregFields(qureg, __func__);
486 validate_quregIsStateVector(qureg, __func__);
487 validate_basisStateIndex(qureg, index, __func__);
489 return localiser_statevec_getAmp(qureg, index);
491extern "C" void _wrap_getQuregAmp(qcomp* out,
Qureg qureg, qindex index) {
498 validate_quregFields(qureg, __func__);
499 validate_quregIsDensityMatrix(qureg, __func__);
500 validate_basisStateRowCol(qureg, row, column, __func__);
502 qindex ind = util_getGlobalFlatIndex(qureg, row, column);
503 qcomp amp = localiser_statevec_getAmp(qureg, ind);
506extern "C" void _wrap_getDensityQuregAmp(qcomp* out,
Qureg qureg, qindex row, qindex column) {
522 auto callback = [&]() { validate_tempAllocSucceeded(
false, numAmps,
sizeof(qcomp), __func__); };
523 util_tryAllocVector(out, numAmps, callback);
534 vector<vector<qcomp>> out;
535 qindex numElems = numRows * numCols;
536 auto callback1 = [&]() { validate_tempAllocSucceeded(
false, numElems,
sizeof(qcomp), __func__); };
537 util_tryAllocMatrix(out, numRows, numCols, callback1);
541 auto callback2 = [&]() { validate_tempAllocSucceeded(
false, numRows,
sizeof(qcomp*), __func__); };
542 util_tryAllocVector(ptrs, numRows, callback2);
545 for (qindex i=0; i<numRows; i++)
546 ptrs[i] = out[i].data();
void setQuregToClone(Qureg targetQureg, Qureg copyQureg)
void initZeroState(Qureg qureg)
Qureg createDensityQureg(int numQubits)
Qureg createForcedQureg(int numQubits)
Qureg createForcedDensityQureg(int numQubits)
Qureg createCloneQureg(Qureg qureg)
Qureg createCustomQureg(int numQubits, int isDensMatr, int useDistrib, int useGpuAccel, int useMultithread)
Qureg createQureg(int numQubits)
void destroyQureg(Qureg qureg)
qcomp getQuregAmp(Qureg qureg, qindex index)
void getDensityQuregAmps(qcomp **outAmps, Qureg qureg, qindex startRow, qindex startCol, qindex numRows, qindex numCols)
void getQuregAmps(qcomp *outAmps, Qureg qureg, qindex startInd, qindex numAmps)
qcomp getDensityQuregAmp(Qureg qureg, qindex row, qindex column)
void reportQureg(Qureg qureg)
void reportQuregParams(Qureg qureg)
void syncQuregFromGpu(Qureg qureg)
void syncSubQuregToGpu(Qureg qureg, qindex localStartInd, qindex numLocalAmps)
void syncSubQuregFromGpu(Qureg qureg, qindex localStartInd, qindex numLocalAmps)
void syncQuregToGpu(Qureg qureg)