8#include "quest/include/qureg.h"
9#include "quest/include/modes.h"
10#include "quest/include/environment.h"
11#include "quest/include/initialisations.h"
13#include "quest/src/core/validation.hpp"
14#include "quest/src/core/autodeployer.hpp"
15#include "quest/src/core/printer.hpp"
16#include "quest/src/core/bitwise.hpp"
17#include "quest/src/core/memory.hpp"
18#include "quest/src/core/utilities.hpp"
19#include "quest/src/core/localiser.hpp"
20#include "quest/src/comm/comm_config.hpp"
21#include "quest/src/comm/comm_routines.hpp"
22#include "quest/src/cpu/cpu_config.hpp"
23#include "quest/src/gpu/gpu_config.hpp"
38Qureg qureg_populateNonHeapFields(
int numQubits,
int isDensMatr,
int useDistrib,
int useGpuAccel,
int useMultithread) {
43 int logNumNodes = (useDistrib)?
44 logBase2(env.numNodes) : 0;
45 qindex logNumAmpsPerNode = (isDensMatr)?
46 (2*numQubits - logNumNodes) :
47 ( numQubits - logNumNodes);
53 out.isMultithreaded = useMultithread;
54 out.isGpuAccelerated = useGpuAccel;
55 out.isDistributed = useDistrib;
62 out.rank = (useDistrib)? env.rank : 0;
63 out.numNodes = (useDistrib)? env.numNodes : 1;
64 out.logNumNodes = (useDistrib)? logBase2(env.numNodes) : 0;
67 out.isDensityMatrix = isDensMatr;
68 out.numQubits = numQubits;
69 out.numAmps = (isDensMatr)? powerOf2(2*numQubits) : powerOf2(numQubits);
70 out.logNumAmps = (isDensMatr)? 2*numQubits : numQubits;
73 out.numAmpsPerNode = powerOf2(logNumAmpsPerNode);
74 out.logNumAmpsPerNode = logNumAmpsPerNode;
75 out.logNumColsPerNode = (isDensMatr)? numQubits - logNumNodes : 0;
78 out.cpuAmps =
nullptr;
79 out.gpuAmps =
nullptr;
80 out.cpuCommBuffer =
nullptr;
81 out.gpuCommBuffer =
nullptr;
93bool didAnyLocalAllocsFail(
Qureg qureg) {
96 if (! mem_isAllocated(qureg.cpuAmps))
100 if (qureg.isDistributed && ! mem_isAllocated(qureg.cpuCommBuffer))
104 if (qureg.isGpuAccelerated && ! mem_isAllocated(qureg.gpuAmps))
108 if (qureg.isDistributed && qureg.isGpuAccelerated && ! mem_isAllocated(qureg.gpuCommBuffer))
116bool didAnyAllocsFailOnAnyNode(
Qureg qureg) {
118 bool anyFail = didAnyLocalAllocsFail(qureg);
120 anyFail = comm_isTrueOnAllNodes(anyFail);
126void freeAllMemoryIfAnyAllocsFailed(
Qureg qureg) {
129 if (!didAnyAllocsFailOnAnyNode(qureg))
133 cpu_deallocArray(qureg.cpuAmps);
134 cpu_deallocArray(qureg.cpuCommBuffer);
137 if (qureg.isGpuAccelerated) {
138 gpu_deallocArray(qureg.gpuAmps);
139 gpu_deallocArray(qureg.gpuCommBuffer);
144Qureg validateAndCreateCustomQureg(
int numQubits,
int isDensMatr,
int useDistrib,
int useGpuAccel,
int useMultithread,
const char* caller) {
146 validate_envIsInit(caller);
150 validate_newQuregParams(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread, env, caller);
153 autodep_chooseQuregDeployment(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread, env);
155 Qureg qureg = qureg_populateNonHeapFields(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread);
158 qureg.cpuAmps = cpu_allocNumaArray(qureg.numAmpsPerNode);
163 qureg.gpuAmps = (useGpuAccel)? gpu_allocArray(qureg.numAmpsPerNode) :
nullptr;
164 qureg.cpuCommBuffer = (useDistrib)? cpu_allocArray(qureg.numAmpsPerNode) :
nullptr;
165 qureg.gpuCommBuffer = (useGpuAccel && useDistrib)? gpu_allocArray(qureg.numAmpsPerNode) :
nullptr;
168 freeAllMemoryIfAnyAllocsFailed(qureg);
169 validate_newQuregAllocs(qureg, __func__);
184void printDeploymentInfo(
Qureg qureg) {
188 {
"isMpiEnabled", qureg.isDistributed},
189 {
"isGpuEnabled", qureg.isGpuAccelerated},
190 {
"isOmpEnabled", qureg.isMultithreaded},
194void printDimensionInfo(
Qureg qureg) {
196 using namespace printer_substrings;
200 ampsStr = bt + printer_toStr(qureg.numQubits * (qureg.isDensityMatrix? 2 : 1));
201 ampsStr += eq + printer_toStr(qureg.numAmps);
204 if (qureg.isDensityMatrix)
206 bt + printer_toStr(qureg.numQubits) +
207 eq + printer_toStr(powerOf2(qureg.numQubits)));
211 {
"isDensMatr", printer_toStr(qureg.isDensityMatrix)},
212 {
"numQubits", printer_toStr(qureg.numQubits)},
213 {
"numCols", colsStr},
214 {
"numAmps", ampsStr},
219void printDistributionInfo(
Qureg qureg) {
221 using namespace printer_substrings;
224 string nodesStr = na;
229 if (qureg.isDistributed) {
230 nodesStr = bt + printer_toStr(qureg.logNumNodes) + eq + printer_toStr(qureg.numNodes);
231 ampsStr = bt + printer_toStr(qureg.logNumAmpsPerNode) + eq + printer_toStr(qureg.numAmpsPerNode) + pn;
232 if (qureg.isDensityMatrix)
233 colsStr = bt + printer_toStr(qureg.logNumColsPerNode) + eq + printer_toStr(powerOf2(qureg.logNumColsPerNode)) + pn;
238 {
"numNodes", nodesStr},
239 {
"numCols", colsStr},
240 {
"numAmps", ampsStr},
245void printMemoryInfo(
Qureg qureg) {
247 using namespace printer_substrings;
249 size_t localArrayMem = mem_getLocalQuregMemoryRequired(qureg.numAmpsPerNode);
250 string localMemStr = printer_getMemoryWithUnitStr(localArrayMem) + (qureg.isDistributed? pn :
"");
254 qindex globalTotalMem = mem_getTotalGlobalMemoryUsed(qureg);
255 string globalMemStr = (globalTotalMem == 0)?
"overflowed" : printer_getMemoryWithUnitStr(globalTotalMem);
259 {
"cpuAmps", mem_isAllocated(qureg.cpuAmps)? localMemStr : na},
260 {
"gpuAmps", mem_isAllocated(qureg.gpuAmps)? localMemStr : na},
261 {
"cpuCommBuffer", mem_isAllocated(qureg.cpuCommBuffer)? localMemStr : na},
262 {
"gpuCommBuffer", mem_isAllocated(qureg.gpuCommBuffer)? localMemStr : na},
263 {
"globalTotal", globalMemStr},
279 return validateAndCreateCustomQureg(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread, __func__);
286 int autoMode = modeflag::USE_AUTO;
287 return validateAndCreateCustomQureg(numQubits, isDensMatr, autoMode, autoMode, autoMode, __func__);
294 int autoMode = modeflag::USE_AUTO;
295 return validateAndCreateCustomQureg(numQubits, isDensMatr, autoMode, autoMode, autoMode, __func__);
300 validate_envIsInit(__func__);
305 return validateAndCreateCustomQureg(numQubits, isDensMatr, env.isDistributed, env.isGpuAccelerated, env.isMultithreaded, __func__);
310 validate_envIsInit(__func__);
315 return validateAndCreateCustomQureg(numQubits, isDensMatr, env.isDistributed, env.isGpuAccelerated, env.isMultithreaded, __func__);
320 validate_quregFields(qureg, __func__);
323 Qureg clone = validateAndCreateCustomQureg(
324 qureg.numQubits, qureg.isDensityMatrix, qureg.isDistributed,
325 qureg.isGpuAccelerated, qureg.isMultithreaded, __func__);
335 validate_quregFields(qureg, __func__);
338 cpu_deallocNumaArray(qureg.cpuAmps, qureg.numAmpsPerNode);
341 if (qureg.isDistributed)
342 cpu_deallocArray(qureg.cpuCommBuffer);
345 if (qureg.isGpuAccelerated)
346 gpu_deallocArray(qureg.gpuAmps);
349 if (qureg.isGpuAccelerated && qureg.isDistributed)
350 gpu_deallocArray(qureg.gpuCommBuffer);
358 validate_quregFields(qureg, __func__);
359 validate_numReportedNewlinesAboveZero(__func__);
364 print_label(
"Qureg");
365 printDeploymentInfo(qureg);
366 printDimensionInfo(qureg);
367 printDistributionInfo(qureg);
368 printMemoryInfo(qureg);
371 print_oneFewerNewlines();
376 validate_quregFields(qureg, __func__);
377 validate_numReportedNewlinesAboveZero(__func__);
381 size_t localMem = mem_getLocalQuregMemoryRequired(qureg.numAmpsPerNode);
382 if (qureg.isDistributed)
386 localMem +=
sizeof(qureg);
388 print_header(qureg, localMem);
392 print_oneFewerNewlines();
397 validate_quregFields(qureg, __func__);
400 if (qureg.isGpuAccelerated)
401 gpu_copyCpuToGpu(qureg);
404 validate_quregFields(qureg, __func__);
407 if (qureg.isGpuAccelerated)
408 gpu_copyGpuToCpu(qureg);
413 validate_quregFields(qureg, __func__);
414 validate_localAmpIndices(qureg, localStartInd, numLocalAmps, __func__);
424 if (!qureg.isGpuAccelerated)
429 gpu_copyCpuToGpu(&qureg.cpuAmps[localStartInd], &qureg.gpuAmps[localStartInd], numLocalAmps);
432 validate_quregFields(qureg, __func__);
433 validate_localAmpIndices(qureg, localStartInd, numLocalAmps, __func__);
443 if (!qureg.isGpuAccelerated)
448 gpu_copyGpuToCpu(&qureg.gpuAmps[localStartInd], &qureg.cpuAmps[localStartInd], numLocalAmps);
453 validate_quregFields(qureg, __func__);
454 validate_quregIsStateVector(qureg, __func__);
455 validate_basisStateIndices(qureg, startInd, numAmps, __func__);
457 localiser_statevec_getAmps(outAmps, qureg, startInd, numAmps);
462 validate_quregFields(qureg, __func__);
463 validate_quregIsDensityMatrix(qureg, __func__);
464 validate_basisStateRowCols(qureg, startRow, startCol, numRows, numCols, __func__);
466 localiser_densmatr_getAmps(outAmps, qureg, startRow, startCol, numRows, numCols);
489 validate_quregFields(qureg, __func__);
490 validate_quregIsStateVector(qureg, __func__);
491 validate_basisStateIndex(qureg, index, __func__);
493 return localiser_statevec_getAmp(qureg, index);
495extern "C" void _wrap_getQuregAmp(qcomp* out,
Qureg qureg, qindex index) {
502 validate_quregFields(qureg, __func__);
503 validate_quregIsDensityMatrix(qureg, __func__);
504 validate_basisStateRowCol(qureg, row, column, __func__);
506 qindex ind = util_getGlobalFlatIndex(qureg, row, column);
507 qcomp amp = localiser_statevec_getAmp(qureg, ind);
510extern "C" void _wrap_getDensityQuregAmp(qcomp* out,
Qureg qureg, qindex row, qindex column) {
526 auto callback = [&]() { validate_tempAllocSucceeded(
false, numAmps,
sizeof(qcomp), __func__); };
527 util_tryAllocVector(out, numAmps, callback);
538 vector<vector<qcomp>> out;
539 qindex numElems = numRows * numCols;
540 auto callback1 = [&]() { validate_tempAllocSucceeded(
false, numElems,
sizeof(qcomp), __func__); };
541 util_tryAllocMatrix(out, numRows, numCols, callback1);
545 auto callback2 = [&]() { validate_tempAllocSucceeded(
false, numRows,
sizeof(qcomp*), __func__); };
546 util_tryAllocVector(ptrs, numRows, callback2);
549 for (qindex i=0; i<numRows; i++)
550 ptrs[i] = out[i].data();
void setQuregToClone(Qureg outQureg, Qureg inQureg)
void initZeroState(Qureg qureg)
Qureg createDensityQureg(int numQubits)
Qureg createForcedQureg(int numQubits)
Qureg createForcedDensityQureg(int numQubits)
Qureg createCloneQureg(Qureg qureg)
Qureg createCustomQureg(int numQubits, int isDensMatr, int useDistrib, int useGpuAccel, int useMultithread)
Qureg createQureg(int numQubits)
void destroyQureg(Qureg qureg)
qcomp getQuregAmp(Qureg qureg, qindex index)
void getDensityQuregAmps(qcomp **outAmps, Qureg qureg, qindex startRow, qindex startCol, qindex numRows, qindex numCols)
void getQuregAmps(qcomp *outAmps, Qureg qureg, qindex startInd, qindex numAmps)
qcomp getDensityQuregAmp(Qureg qureg, qindex row, qindex column)
void reportQureg(Qureg qureg)
void reportQuregParams(Qureg qureg)
void syncQuregFromGpu(Qureg qureg)
void syncSubQuregToGpu(Qureg qureg, qindex localStartInd, qindex numLocalAmps)
void syncSubQuregFromGpu(Qureg qureg, qindex localStartInd, qindex numLocalAmps)
void syncQuregToGpu(Qureg qureg)