11#include "quest/include/matrices.h"
12#include "quest/include/modes.h"
13#include "quest/include/types.h"
14#include "quest/include/environment.h"
16#include "quest/src/core/validation.hpp"
17#include "quest/src/core/autodeployer.hpp"
18#include "quest/src/core/utilities.hpp"
19#include "quest/src/core/localiser.hpp"
20#include "quest/src/core/paulilogic.hpp"
21#include "quest/src/core/printer.hpp"
22#include "quest/src/core/bitwise.hpp"
23#include "quest/src/core/fastmath.hpp"
24#include "quest/src/core/memory.hpp"
25#include "quest/src/comm/comm_config.hpp"
26#include "quest/src/comm/comm_routines.hpp"
27#include "quest/src/cpu/cpu_config.hpp"
28#include "quest/src/gpu/gpu_config.hpp"
29#include "quest/src/cpu/cpu_subroutines.hpp"
51 validate_matrixNumNewElems(1, in, __func__);
53 qcomp* rowPtrs[] = {in[0].data(), in[1].data()};
58 validate_matrixNumNewElems(2, in, __func__);
60 qcomp* rowPtrs[] = {in[0].data(), in[1].data(), in[2].data(), in[3].data()};
66 validate_matrixNumNewElems(1, in, __func__);
72 validate_matrixNumNewElems(2, in, __func__);
86void freeHeapMatrix(T matr) {
95 if constexpr (util_isDenseMatrixType<T>()) {
96 cpu_deallocMatrixWrapper(matr.cpuElems);
97 cpu_deallocArray(matr.cpuElemsFlat);
99 cpu_deallocArray(matr.cpuElems);
102 auto gpuPtr = util_getGpuMemPtr(matr);
103 if (mem_isAllocated(gpuPtr))
104 gpu_deallocArray(gpuPtr);
107 util_deallocEpsilonSensitiveHeapFlag(matr.isApproxUnitary);
108 util_deallocEpsilonSensitiveHeapFlag(matr.isApproxHermitian);
109 cpu_deallocHeapFlag(matr.wasGpuSynced);
113 if constexpr (!util_isDenseMatrixType<T>()) {
114 util_deallocEpsilonSensitiveHeapFlag(matr.isApproxNonZero);
115 cpu_deallocHeapFlag(matr.isStrictlyNonNegative);
122bool didAnyLocalAllocsFail(T matr) {
125 if (!mem_isAllocated(matr.isApproxUnitary))
return true;
126 if (!mem_isAllocated(matr.isApproxHermitian))
return true;
127 if (!mem_isAllocated(matr.wasGpuSynced))
return true;
131 if constexpr (!util_isDenseMatrixType<T>()) {
132 if (!mem_isAllocated(matr.isApproxNonZero))
return true;
133 if (!mem_isAllocated(matr.isStrictlyNonNegative))
return true;
137 if constexpr (util_isDenseMatrixType<T>()) {
138 if (!mem_isAllocated(matr.cpuElemsFlat))
return true;
139 if (!mem_isOuterAllocated(matr.cpuElems))
return true;
141 if (!mem_isAllocated(matr.cpuElems))
return true;
144 bool isGpuAlloc = mem_isAllocated(util_getGpuMemPtr(matr));
145 if (
getQuESTEnv().isGpuAccelerated && !isGpuAlloc) {
148 if constexpr (util_isFullStateDiagMatr<T>()) {
149 if (matr.isGpuAccelerated)
164void freeAllMemoryIfAnyAllocsFailed(T matr) {
167 bool anyFail = didAnyLocalAllocsFail(matr);
169 anyFail = comm_isTrueOnAllNodes(anyFail);
173 freeHeapMatrix(matr);
179void validateMatrixAllocs(T matr,
const char* caller) {
182 freeAllMemoryIfAnyAllocsFailed(matr);
183 validate_newMatrixAllocs(matr, caller);
189void setInitialHeapFlags(T matr) {
192 util_setFlagToUnknown(matr.isApproxUnitary);
193 util_setFlagToUnknown(matr.isApproxHermitian);
197 if constexpr (!util_isDenseMatrixType<T>()) {
198 util_setFlagToUnknown(matr.isApproxNonZero);
199 util_setFlagToUnknown(matr.isStrictlyNonNegative);
203 *(matr.wasGpuSynced) = 0;
214 validate_envIsInit(__func__);
215 validate_newCompMatrParams(numQubits, __func__);
218 qindex numRows = powerOf2(numQubits);
219 qindex numElems = numRows * numRows;
222 qcomp* cpuMem = cpu_allocArray(numElems);
223 qcomp* gpuMem =
nullptr;
225 gpuMem = gpu_allocArray(numElems);
229 out.numQubits = numQubits;
230 out.numRows = numRows;
233 out.isApproxUnitary = util_allocEpsilonSensitiveHeapFlag();
234 out.isApproxHermitian = util_allocEpsilonSensitiveHeapFlag();
238 out.cpuElems = cpu_allocAndInitMatrixWrapper(cpuMem, numRows);
239 out.cpuElemsFlat = cpuMem;
240 out.gpuElemsFlat = gpuMem;
242 validateMatrixAllocs(out, __func__);
243 setInitialHeapFlags(out);
249 validate_envIsInit(__func__);
250 validate_newDiagMatrParams(numQubits, __func__);
253 qindex numElems = powerOf2(numQubits);
257 out.numQubits = numQubits,
258 out.numElems = numElems,
261 out.isApproxUnitary = util_allocEpsilonSensitiveHeapFlag();
262 out.isApproxHermitian = util_allocEpsilonSensitiveHeapFlag();
268 out.cpuElems = cpu_allocArray(numElems);
269 out.gpuElems = (
getQuESTEnv().isGpuAccelerated)? gpu_allocArray(numElems) :
nullptr;
271 validateMatrixAllocs(out, __func__);
272 setInitialHeapFlags(out);
277FullStateDiagMatr validateAndCreateCustomFullStateDiagMatr(
int numQubits,
int useDistrib,
int useGpuAccel,
int useMultithread,
const char* caller) {
278 validate_envIsInit(caller);
282 validate_newFullStateDiagMatrParams(numQubits, useDistrib, useGpuAccel, useMultithread, caller);
285 autodep_chooseFullStateDiagMatrDeployment(numQubits, useDistrib, useGpuAccel, useMultithread, env);
288 qindex numElems = powerOf2(numQubits);
289 qindex numElemsPerNode = numElems / (useDistrib? env.numNodes : 1);
293 out.numQubits = numQubits;
294 out.numElems = numElems;
297 out.isGpuAccelerated = useGpuAccel;
298 out.isMultithreaded = useMultithread;
299 out.isDistributed = useDistrib && (env.numNodes > 1);
300 out.numElemsPerNode = numElemsPerNode;
303 out.isApproxUnitary = util_allocEpsilonSensitiveHeapFlag();
304 out.isApproxHermitian = util_allocEpsilonSensitiveHeapFlag();
305 out.isApproxNonZero = util_allocEpsilonSensitiveHeapFlag();
306 out.isStrictlyNonNegative = cpu_allocHeapFlag();
307 out.wasGpuSynced = cpu_allocHeapFlag();
310 out.cpuElems = cpu_allocArray(numElemsPerNode);
311 out.gpuElems = (useGpuAccel)? gpu_allocArray(numElemsPerNode) :
nullptr;
313 validateMatrixAllocs(out, __func__);
314 setInitialHeapFlags(out);
320 return validateAndCreateCustomFullStateDiagMatr(numQubits, useDistrib, useGpuAccel, useMultithread, __func__);
325 return validateAndCreateCustomFullStateDiagMatr(numQubits, modeflag::USE_AUTO, modeflag::USE_AUTO, modeflag::USE_AUTO, __func__);
337void markMatrixAsSynced(T matr) {
341 *(matr.wasGpuSynced) = 1;
345 util_setFlagToUnknown(matr.isApproxUnitary);
346 util_setFlagToUnknown(matr.isApproxHermitian);
350 if constexpr (!util_isDenseMatrixType<T>()) {
351 util_setFlagToUnknown(matr.isApproxNonZero);
352 util_setFlagToUnknown(matr.isStrictlyNonNegative);
359void validateAndSyncMatrix(T matr,
const char* caller) {
360 validate_matrixFields(matr, caller);
363 if (mem_isAllocated(util_getGpuMemPtr(matr)))
364 gpu_copyCpuToGpu(matr);
366 markMatrixAsSynced(matr);
388void validateAndDestroyMatrix(T matrix,
const char* caller) {
389 validate_matrixFields(matrix, caller);
391 freeHeapMatrix(matrix);
413void setAndSyncDenseMatrElems(
CompMatr out, T elems) {
416 cpu_copyMatrix(out.cpuElems, elems, out.numRows);
424 validate_matrixFields(out, __func__);
425 validate_matrixNewElemsPtrNotNull(in, out.numRows, __func__);
427 setAndSyncDenseMatrElems(out, in);
432 validate_matrixFields(out, __func__);
433 validate_matrixNewElemsPtrNotNull(in, __func__);
436 cpu_copyArray(out.cpuElems, in, out.numElems);
444 validate_matrixFields(out, __func__);
445 validate_fullStateDiagMatrNewElems(out, startInd, numElems, __func__);
446 validate_matrixNewElemsPtrNotNull(in, __func__);
451 localiser_fullstatediagmatr_setElems(out, startInd, in, numElems);
455 markMatrixAsSynced(out);
466 validate_matrixFields(out, __func__);
467 validate_matrixNumNewElems(out.numQubits, in, __func__);
469 setAndSyncDenseMatrElems(out, in);
474 validate_matrixFields(out, __func__);
475 validate_matrixNumNewElems(out.numQubits, in, __func__);
504 validate_matrixFields(matr, __func__);
505 validate_matrixNumQubitsMatchesParam(matr.numQubits, numQb, __func__);
506 validate_matrixNumNewElems(matr.numQubits, in, __func__);
508 setAndSyncDenseMatrElems(matr, in);
512 validate_matrixFields(matr, __func__);
513 validate_matrixNumQubitsMatchesParam(matr.numQubits, numQb, __func__);
514 validate_matrixNumNewElems(matr.numQubits, in, __func__);
520 validate_matrixFields(matr, __func__);
521 validate_declaredNumElemsMatchesVectorLength(numElems, in.size(), __func__);
522 validate_fullStateDiagMatrNewElems(matr, startInd, numElems, __func__);
537 validate_envIsInit(__func__);
538 validate_newCompMatrParams(numQb, __func__);
539 validate_matrixNumNewElems(numQb, elems, __func__);
544 setAndSyncDenseMatrElems(matr, elems);
550 validate_envIsInit(__func__);
551 validate_newDiagMatrParams(numQb, __func__);
552 validate_matrixNumNewElems(numQb, elems, __func__);
576 void _validateNewNestedElemsPtrNotNull(qcomp** ptrs,
int numQubits,
const char* caller) {
578 validate_matrixNewElemsPtrNotNull(ptrs, powerOf2(numQubits), caller);
580 void _validateNewElemsPtrNotNull(qcomp* ptr,
const char* caller) {
582 validate_matrixNewElemsPtrNotNull(ptr, caller);
585 void _validateParamsToSetCompMatrFromArr(
CompMatr matr) {
587 validate_matrixFields(matr,
"setCompMatr");
590 void _validateParamsToSetInlineCompMatr(
CompMatr matr,
int numQb) {
592 const char* caller =
"setInlineCompMatr";
593 validate_matrixFields(matr, caller);
594 validate_matrixNumQubitsMatchesParam(matr.numQubits, numQb, caller);
597 void _validateParamsToSetInlineDiagMatr(
DiagMatr matr,
int numQb) {
599 const char* caller =
"setInlineDiagMatr";
600 validate_matrixFields(matr, caller);
601 validate_matrixNumQubitsMatchesParam(matr.numQubits, numQb, caller);
604 void _validateParamsToSetInlineFullStateDiagMatr(
FullStateDiagMatr matr, qindex startInd, qindex numElems) {
606 const char* caller =
"setInlineFullStateDiagMatr";
607 validate_matrixFields(matr, caller);
608 validate_fullStateDiagMatrNewElems(matr, startInd, numElems, caller);
611 void _validateParamsToCreateInlineCompMatr(
int numQb) {
613 const char* caller =
"createInlineCompMatr";
614 validate_envIsInit(caller);
615 validate_newCompMatrParams(numQb, caller);
618 void _validateParamsToCreateInlineDiagMatr(
int numQb) {
620 const char* caller =
"createInlineDiagMatr";
621 validate_envIsInit(caller);
622 validate_newDiagMatrParams(numQb, caller);
635 validate_matrixFields(out, __func__);
636 validate_pauliStrSumFields(in, __func__);
637 validate_pauliStrSumCanInitMatrix(out, in, __func__);
645 localiser_fullstatediagmatr_setElemsToPauliStrSum(out, in);
647 markMatrixAsSynced(out);
652 validate_pauliStrSumFields(in, __func__);
655 int numQubits = 1 + paulis_getIndOfLefmostNonIdentityPauli(in);
656 validate_newFullStateDiagMatrParams(numQubits, modeflag::USE_AUTO, modeflag::USE_AUTO, modeflag::USE_AUTO, __func__);
661 localiser_fullstatediagmatr_setElemsToPauliStrSum(out, in);
662 markMatrixAsSynced(out);
668 validate_matrixFields(out, __func__);
669 validate_multiVarFuncQubits(out.numQubits, numQubitsPerVar, numVars, __func__);
670 validate_funcVarSignedFlag(areSigned, __func__);
672 vector<qindex> varValues(numVars);
675 for (qindex elemInd=0; elemInd<out.numElems; elemInd++) {
676 fast_getSubQuregValues(elemInd, numQubitsPerVar, numVars, areSigned, varValues.data());
679 out.cpuElems[elemInd] = callbackFunc(varValues.data());
688 validate_matrixFields(out, __func__);
689 validate_multiVarFuncQubits(out.numQubits, numQubitsPerVar, numVars, __func__);
690 validate_funcVarSignedFlag(areSigned, __func__);
694 cpu_fullstatediagmatr_setElemsFromMultiVarFunc(out, callbackFunc, numQubitsPerVar, numVars, areSigned);
702 validate_matrixFields(out, __func__);
703 validate_multiVarFuncQubits(out.numQubits, numQubitsPerDim, numDims, __func__);
705 vector<qindex> listInds(numDims);
709 for (qindex elemInd=0; elemInd<out.numElems; elemInd++) {
712 fast_getSubQuregValues(elemInd, numQubitsPerDim, numDims,
false, listInds.data());
715 out.cpuElems[elemInd] = util_getElemFromNestedPtrs(lists, listInds.data(), numDims);
724 validate_matrixFields(out, __func__);
725 validate_multiVarFuncQubits(out.numQubits, numQubitsPerDim, numDims, __func__);
729 cpu_fullstatediagmatr_setElemsFromMultiDimLists(out, lists, numQubitsPerDim, numDims);
744void validateAndPrintMatrix(T matr,
const char* caller) {
745 validate_matrixFields(matr, caller);
746 validate_numReportedNewlinesAboveZero(__func__);
749 if constexpr (util_isHeapMatrixType<T>())
750 validate_matrixIsSynced(matr, caller);
757 int numNodes = (util_isDistributedMatrix(matr))? comm_getNumNodes() : 1;
758 size_t elemMem = mem_getLocalMatrixMemoryRequired(matr.numQubits, util_isDenseMatrixType<T>(), numNodes);
759 size_t structMem =
sizeof(matr);
762 if (util_isFixedSizeMatrixType<T>())
763 structMem -= elemMem;
765 size_t numBytesPerNode = elemMem + structMem;
766 print_header(matr, numBytesPerNode);
770 print_oneFewerNewlines();
FullStateDiagMatr createCustomFullStateDiagMatr(int numQubits, int useDistrib, int useGpuAccel, int useMultithread)
DiagMatr createInlineDiagMatr(int numQb, std::vector< qcomp > elems)
FullStateDiagMatr createFullStateDiagMatr(int numQubits)
CompMatr createInlineCompMatr(int numQb, std::vector< std::vector< qcomp > > elems)
CompMatr createCompMatr(int numQubits)
DiagMatr createDiagMatr(int numQubits)
FullStateDiagMatr createFullStateDiagMatrFromPauliStrSum(PauliStrSum in)
void destroyDiagMatr(DiagMatr matr)
void destroyFullStateDiagMatr(FullStateDiagMatr matr)
void destroyCompMatr(CompMatr matr)
DiagMatr1 getDiagMatr1(std::vector< qcomp > in)
CompMatr1 getCompMatr1(std::vector< std::vector< qcomp > > in)
CompMatr2 getCompMatr2(std::vector< std::vector< qcomp > > in)
DiagMatr2 getDiagMatr2(std::vector< qcomp > in)
void reportCompMatr2(CompMatr2 matr)
void reportDiagMatr1(DiagMatr1 matr)
void reportCompMatr(CompMatr matr)
void reportDiagMatr2(DiagMatr2 matr)
void reportCompMatr1(CompMatr1 matr)
void reportDiagMatr(DiagMatr matr)
void reportFullStateDiagMatr(FullStateDiagMatr matr)
void setInlineDiagMatr(DiagMatr matr, int numQb, std::vector< qcomp > in)
void setFullStateDiagMatrFromMultiDimLists(FullStateDiagMatr out, void *lists, int *numQubitsPerDim, int numDims)
void setDiagMatrFromMultiDimLists(DiagMatr out, void *lists, int *numQubitsPerDim, int numDims)
void setFullStateDiagMatrFromMultiVarFunc(FullStateDiagMatr out, qcomp(*callbackFunc)(qindex *), int *numQubitsPerVar, int numVars, int areSigned)
void setInlineCompMatr(CompMatr matr, int numQb, std::vector< std::vector< qcomp > > in)
void setDiagMatr(DiagMatr out, qcomp *in)
void setCompMatr(CompMatr out, qcomp **in)
void setFullStateDiagMatr(FullStateDiagMatr out, qindex startInd, qcomp *in, qindex numElems)
void setDiagMatrFromMultiVarFunc(DiagMatr out, qcomp(*callbackFunc)(qindex *), int *numQubitsPerVar, int numVars, int areSigned)
void setFullStateDiagMatrFromPauliStrSum(FullStateDiagMatr out, PauliStrSum in)
void setInlineFullStateDiagMatr(FullStateDiagMatr matr, qindex startInd, qindex numElems, std::vector< qcomp > in)
void syncFullStateDiagMatr(FullStateDiagMatr matr)
void syncDiagMatr(DiagMatr matr)
void syncCompMatr(CompMatr matr)
int * isStrictlyNonNegative