8#include "quest/include/qureg.h"
9#include "quest/include/environment.h"
10#include "quest/include/initialisations.h"
12#include "quest/src/core/validation.hpp"
13#include "quest/src/core/autodeployer.hpp"
14#include "quest/src/core/printer.hpp"
15#include "quest/src/core/bitwise.hpp"
16#include "quest/src/core/memory.hpp"
17#include "quest/src/core/utilities.hpp"
18#include "quest/src/core/localiser.hpp"
19#include "quest/src/comm/comm_config.hpp"
20#include "quest/src/comm/comm_routines.hpp"
21#include "quest/src/cpu/cpu_config.hpp"
22#include "quest/src/gpu/gpu_config.hpp"
37Qureg qureg_populateNonHeapFields(
int numQubits,
int isDensMatr,
int useDistrib,
int useGpuAccel,
int useMultithread) {
42 int logNumNodes = (useDistrib)?
43 logBase2(env.numNodes) : 0;
44 qindex logNumAmpsPerNode = (isDensMatr)?
45 (2*numQubits - logNumNodes) :
46 ( numQubits - logNumNodes);
52 out.isMultithreaded = useMultithread;
53 out.isGpuAccelerated = useGpuAccel;
54 out.isDistributed = useDistrib;
61 out.rank = (useDistrib)? env.rank : 0;
62 out.numNodes = (useDistrib)? env.numNodes : 1;
63 out.logNumNodes = (useDistrib)? logBase2(env.numNodes) : 0;
66 out.isDensityMatrix = isDensMatr;
67 out.numQubits = numQubits;
68 out.numAmps = (isDensMatr)? powerOf2(2*numQubits) : powerOf2(numQubits);
69 out.logNumAmps = (isDensMatr)? 2*numQubits : numQubits;
72 out.numAmpsPerNode = powerOf2(logNumAmpsPerNode);
73 out.logNumAmpsPerNode = logNumAmpsPerNode;
74 out.logNumColsPerNode = (isDensMatr)? numQubits - logNumNodes : 0;
77 out.cpuAmps =
nullptr;
78 out.gpuAmps =
nullptr;
79 out.cpuCommBuffer =
nullptr;
80 out.gpuCommBuffer =
nullptr;
92bool didAnyLocalAllocsFail(
Qureg qureg) {
95 if (! mem_isAllocated(qureg.cpuAmps))
99 if (qureg.isDistributed && ! mem_isAllocated(qureg.cpuCommBuffer))
103 if (qureg.isGpuAccelerated && ! mem_isAllocated(qureg.gpuAmps))
107 if (qureg.isDistributed && qureg.isGpuAccelerated && ! mem_isAllocated(qureg.gpuCommBuffer))
115bool didAnyAllocsFailOnAnyNode(
Qureg qureg) {
117 bool anyFail = didAnyLocalAllocsFail(qureg);
119 anyFail = comm_isTrueOnAllNodes(anyFail);
125void freeAllMemoryIfAnyAllocsFailed(
Qureg qureg) {
128 if (!didAnyAllocsFailOnAnyNode(qureg))
132 cpu_deallocArray(qureg.cpuAmps);
133 cpu_deallocArray(qureg.cpuCommBuffer);
136 if (qureg.isGpuAccelerated) {
137 gpu_deallocArray(qureg.gpuAmps);
138 gpu_deallocArray(qureg.gpuCommBuffer);
143Qureg validateAndCreateCustomQureg(
int numQubits,
int isDensMatr,
int useDistrib,
int useGpuAccel,
int useMultithread,
const char* caller) {
145 validate_envIsInit(caller);
149 validate_newQuregParams(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread, env, caller);
152 autodep_chooseQuregDeployment(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread, env);
154 Qureg qureg = qureg_populateNonHeapFields(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread);
157 qureg.cpuAmps = cpu_allocArray(qureg.numAmpsPerNode);
162 qureg.gpuAmps = (useGpuAccel)? gpu_allocArray(qureg.numAmpsPerNode) :
nullptr;
163 qureg.cpuCommBuffer = (useDistrib)? cpu_allocArray(qureg.numAmpsPerNode) :
nullptr;
164 qureg.gpuCommBuffer = (useGpuAccel && useDistrib)? gpu_allocArray(qureg.numAmpsPerNode) :
nullptr;
167 freeAllMemoryIfAnyAllocsFailed(qureg);
168 validate_newQuregAllocs(qureg, __func__);
183void printDeploymentInfo(
Qureg qureg) {
187 {
"isMpiEnabled", qureg.isDistributed},
188 {
"isGpuEnabled", qureg.isGpuAccelerated},
189 {
"isOmpEnabled", qureg.isMultithreaded},
193void printDimensionInfo(
Qureg qureg) {
195 using namespace printer_substrings;
199 ampsStr = bt + printer_toStr(qureg.numQubits * (qureg.isDensityMatrix? 2 : 1));
200 ampsStr += eq + printer_toStr(qureg.numAmps);
203 if (qureg.isDensityMatrix)
205 bt + printer_toStr(qureg.numQubits) +
206 eq + printer_toStr(powerOf2(qureg.numQubits)));
210 {
"isDensMatr", printer_toStr(qureg.isDensityMatrix)},
211 {
"numQubits", printer_toStr(qureg.numQubits)},
212 {
"numCols", colsStr},
213 {
"numAmps", ampsStr},
218void printDistributionInfo(
Qureg qureg) {
220 using namespace printer_substrings;
223 string nodesStr = na;
228 if (qureg.isDistributed) {
229 nodesStr = bt + printer_toStr(qureg.logNumNodes) + eq + printer_toStr(qureg.numNodes);
230 ampsStr = bt + printer_toStr(qureg.logNumAmpsPerNode) + eq + printer_toStr(qureg.numAmpsPerNode) + pn;
231 if (qureg.isDensityMatrix)
232 colsStr = bt + printer_toStr(qureg.logNumColsPerNode) + eq + printer_toStr(powerOf2(qureg.logNumColsPerNode)) + pn;
237 {
"numNodes", nodesStr},
238 {
"numCols", colsStr},
239 {
"numAmps", ampsStr},
244void printMemoryInfo(
Qureg qureg) {
246 using namespace printer_substrings;
248 size_t localArrayMem = mem_getLocalQuregMemoryRequired(qureg.numAmpsPerNode);
249 string localMemStr = printer_getMemoryWithUnitStr(localArrayMem) + (qureg.isDistributed? pn :
"");
253 qindex globalTotalMem = mem_getTotalGlobalMemoryUsed(qureg);
254 string globalMemStr = (globalTotalMem == 0)?
"overflowed" : printer_getMemoryWithUnitStr(globalTotalMem);
258 {
"cpuAmps", mem_isAllocated(qureg.cpuAmps)? localMemStr : na},
259 {
"gpuAmps", mem_isAllocated(qureg.gpuAmps)? localMemStr : na},
260 {
"cpuCommBuffer", mem_isAllocated(qureg.cpuCommBuffer)? localMemStr : na},
261 {
"gpuCommBuffer", mem_isAllocated(qureg.gpuCommBuffer)? localMemStr : na},
262 {
"globalTotal", globalMemStr},
278 return validateAndCreateCustomQureg(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread, __func__);
285 int autoMode = modeflag::USE_AUTO;
286 return validateAndCreateCustomQureg(numQubits, isDensMatr, autoMode, autoMode, autoMode, __func__);
293 int autoMode = modeflag::USE_AUTO;
294 return validateAndCreateCustomQureg(numQubits, isDensMatr, autoMode, autoMode, autoMode, __func__);
299 validate_envIsInit(__func__);
304 return validateAndCreateCustomQureg(numQubits, isDensMatr, env.isDistributed, env.isGpuAccelerated, env.isMultithreaded, __func__);
309 validate_envIsInit(__func__);
314 return validateAndCreateCustomQureg(numQubits, isDensMatr, env.isDistributed, env.isGpuAccelerated, env.isMultithreaded, __func__);
319 validate_quregFields(qureg, __func__);
322 Qureg clone = validateAndCreateCustomQureg(
323 qureg.numQubits, qureg.isDensityMatrix, qureg.isDistributed,
324 qureg.isGpuAccelerated, qureg.isMultithreaded, __func__);
334 validate_quregFields(qureg, __func__);
337 cpu_deallocArray(qureg.cpuAmps);
340 if (qureg.isDistributed)
341 cpu_deallocArray(qureg.cpuCommBuffer);
344 if (qureg.isGpuAccelerated)
345 gpu_deallocArray(qureg.gpuAmps);
348 if (qureg.isGpuAccelerated && qureg.isDistributed)
349 gpu_deallocArray(qureg.gpuCommBuffer);
357 validate_quregFields(qureg, __func__);
358 validate_numReportedNewlinesAboveZero(__func__);
363 print_label(
"Qureg");
364 printDeploymentInfo(qureg);
365 printDimensionInfo(qureg);
366 printDistributionInfo(qureg);
367 printMemoryInfo(qureg);
370 print_oneFewerNewlines();
375 validate_quregFields(qureg, __func__);
376 validate_numReportedNewlinesAboveZero(__func__);
380 size_t localMem = mem_getLocalQuregMemoryRequired(qureg.numAmpsPerNode);
381 if (qureg.isDistributed)
385 localMem +=
sizeof(qureg);
387 print_header(qureg, localMem);
391 print_oneFewerNewlines();
396 validate_quregFields(qureg, __func__);
399 if (qureg.isGpuAccelerated)
400 gpu_copyCpuToGpu(qureg);
403 validate_quregFields(qureg, __func__);
406 if (qureg.isGpuAccelerated)
407 gpu_copyGpuToCpu(qureg);
412 validate_quregFields(qureg, __func__);
413 validate_localAmpIndices(qureg, localStartInd, numLocalAmps, __func__);
423 if (!qureg.isGpuAccelerated)
428 gpu_copyCpuToGpu(&qureg.cpuAmps[localStartInd], &qureg.gpuAmps[localStartInd], numLocalAmps);
431 validate_quregFields(qureg, __func__);
432 validate_localAmpIndices(qureg, localStartInd, numLocalAmps, __func__);
442 if (!qureg.isGpuAccelerated)
447 gpu_copyGpuToCpu(&qureg.gpuAmps[localStartInd], &qureg.cpuAmps[localStartInd], numLocalAmps);
452 validate_quregFields(qureg, __func__);
453 validate_quregIsStateVector(qureg, __func__);
454 validate_basisStateIndices(qureg, startInd, numAmps, __func__);
456 localiser_statevec_getAmps(outAmps, qureg, startInd, numAmps);
461 validate_quregFields(qureg, __func__);
462 validate_quregIsDensityMatrix(qureg, __func__);
463 validate_basisStateRowCols(qureg, startRow, startCol, numRows, numCols, __func__);
465 localiser_densmatr_getAmps(outAmps, qureg, startRow, startCol, numRows, numCols);
488 validate_quregFields(qureg, __func__);
489 validate_quregIsStateVector(qureg, __func__);
490 validate_basisStateIndex(qureg, index, __func__);
492 return localiser_statevec_getAmp(qureg, index);
494extern "C" void _wrap_getQuregAmp(qcomp* out,
Qureg qureg, qindex index) {
501 validate_quregFields(qureg, __func__);
502 validate_quregIsDensityMatrix(qureg, __func__);
503 validate_basisStateRowCol(qureg, row, column, __func__);
505 qindex ind = util_getGlobalFlatIndex(qureg, row, column);
506 qcomp amp = localiser_statevec_getAmp(qureg, ind);
509extern "C" void _wrap_getDensityQuregAmp(qcomp* out,
Qureg qureg, qindex row, qindex column) {
525 auto callback = [&]() { validate_tempAllocSucceeded(
false, numAmps,
sizeof(qcomp), __func__); };
526 util_tryAllocVector(out, numAmps, callback);
537 vector<vector<qcomp>> out;
538 qindex numElems = numRows * numCols;
539 auto callback1 = [&]() { validate_tempAllocSucceeded(
false, numElems,
sizeof(qcomp), __func__); };
540 util_tryAllocMatrix(out, numRows, numCols, callback1);
544 auto callback2 = [&]() { validate_tempAllocSucceeded(
false, numRows,
sizeof(qcomp*), __func__); };
545 util_tryAllocVector(ptrs, numRows, callback2);
548 for (qindex i=0; i<numRows; i++)
549 ptrs[i] = out[i].data();
void setQuregToClone(Qureg targetQureg, Qureg copyQureg)
void initZeroState(Qureg qureg)
Qureg createDensityQureg(int numQubits)
Qureg createForcedQureg(int numQubits)
Qureg createForcedDensityQureg(int numQubits)
Qureg createCloneQureg(Qureg qureg)
Qureg createCustomQureg(int numQubits, int isDensMatr, int useDistrib, int useGpuAccel, int useMultithread)
Qureg createQureg(int numQubits)
void destroyQureg(Qureg qureg)
qcomp getQuregAmp(Qureg qureg, qindex index)
void getDensityQuregAmps(qcomp **outAmps, Qureg qureg, qindex startRow, qindex startCol, qindex numRows, qindex numCols)
void getQuregAmps(qcomp *outAmps, Qureg qureg, qindex startInd, qindex numAmps)
qcomp getDensityQuregAmp(Qureg qureg, qindex row, qindex column)
void reportQureg(Qureg qureg)
void reportQuregParams(Qureg qureg)
void syncQuregFromGpu(Qureg qureg)
void syncSubQuregToGpu(Qureg qureg, qindex localStartInd, qindex numLocalAmps)
void syncSubQuregFromGpu(Qureg qureg, qindex localStartInd, qindex numLocalAmps)
void syncQuregToGpu(Qureg qureg)