11#include "quest/include/matrices.h"
12#include "quest/include/environment.h"
13#include "quest/include/types.h"
15#include "quest/src/core/validation.hpp"
16#include "quest/src/core/autodeployer.hpp"
17#include "quest/src/core/utilities.hpp"
18#include "quest/src/core/localiser.hpp"
19#include "quest/src/core/printer.hpp"
20#include "quest/src/core/bitwise.hpp"
21#include "quest/src/core/fastmath.hpp"
22#include "quest/src/core/memory.hpp"
23#include "quest/src/comm/comm_config.hpp"
24#include "quest/src/comm/comm_routines.hpp"
25#include "quest/src/cpu/cpu_config.hpp"
26#include "quest/src/gpu/gpu_config.hpp"
27#include "quest/src/cpu/cpu_subroutines.hpp"
49 validate_matrixNumNewElems(1, in, __func__);
51 qcomp* rowPtrs[] = {in[0].data(), in[1].data()};
56 validate_matrixNumNewElems(2, in, __func__);
58 qcomp* rowPtrs[] = {in[0].data(), in[1].data(), in[2].data(), in[3].data()};
64 validate_matrixNumNewElems(1, in, __func__);
70 validate_matrixNumNewElems(2, in, __func__);
84void freeHeapMatrix(T matr) {
93 if constexpr (util_isDenseMatrixType<T>()) {
94 cpu_deallocMatrixWrapper(matr.cpuElems);
95 cpu_deallocArray(matr.cpuElemsFlat);
97 cpu_deallocArray(matr.cpuElems);
100 auto gpuPtr = util_getGpuMemPtr(matr);
101 if (mem_isAllocated(gpuPtr))
102 gpu_deallocArray(gpuPtr);
105 util_deallocEpsilonSensitiveHeapFlag(matr.isApproxUnitary);
106 util_deallocEpsilonSensitiveHeapFlag(matr.isApproxHermitian);
107 cpu_deallocHeapFlag(matr.wasGpuSynced);
111 if constexpr (!util_isDenseMatrixType<T>()) {
112 util_deallocEpsilonSensitiveHeapFlag(matr.isApproxNonZero);
113 cpu_deallocHeapFlag(matr.isStrictlyNonNegative);
120bool didAnyLocalAllocsFail(T matr) {
123 if (!mem_isAllocated(matr.isApproxUnitary))
return true;
124 if (!mem_isAllocated(matr.isApproxHermitian))
return true;
125 if (!mem_isAllocated(matr.wasGpuSynced))
return true;
129 if constexpr (!util_isDenseMatrixType<T>()) {
130 if (!mem_isAllocated(matr.isApproxNonZero))
return true;
131 if (!mem_isAllocated(matr.isStrictlyNonNegative))
return true;
135 if constexpr (util_isDenseMatrixType<T>()) {
136 if (!mem_isAllocated(matr.cpuElemsFlat))
return true;
137 if (!mem_isOuterAllocated(matr.cpuElems))
return true;
139 if (!mem_isAllocated(matr.cpuElems))
return true;
142 bool isGpuAlloc = mem_isAllocated(util_getGpuMemPtr(matr));
143 if (
getQuESTEnv().isGpuAccelerated && !isGpuAlloc) {
146 if constexpr (util_isFullStateDiagMatr<T>()) {
147 if (matr.isGpuAccelerated)
162void freeAllMemoryIfAnyAllocsFailed(T matr) {
165 bool anyFail = didAnyLocalAllocsFail(matr);
167 anyFail = comm_isTrueOnAllNodes(anyFail);
171 freeHeapMatrix(matr);
177void validateMatrixAllocs(T matr,
const char* caller) {
180 freeAllMemoryIfAnyAllocsFailed(matr);
181 validate_newMatrixAllocs(matr, caller);
187void setInitialHeapFlags(T matr) {
190 util_setFlagToUnknown(matr.isApproxUnitary);
191 util_setFlagToUnknown(matr.isApproxHermitian);
195 if constexpr (!util_isDenseMatrixType<T>()) {
196 util_setFlagToUnknown(matr.isApproxNonZero);
197 util_setFlagToUnknown(matr.isStrictlyNonNegative);
201 *(matr.wasGpuSynced) = 0;
212 validate_envIsInit(__func__);
213 validate_newCompMatrParams(numQubits, __func__);
216 qindex numRows = powerOf2(numQubits);
217 qindex numElems = numRows * numRows;
219 qcomp* cpuMem = cpu_allocArray(numElems);
220 qcomp* gpuMem =
nullptr;
222 gpuMem = gpu_allocArray(numElems);
226 .numQubits = numQubits,
230 .isApproxUnitary = util_allocEpsilonSensitiveHeapFlag(),
231 .isApproxHermitian = util_allocEpsilonSensitiveHeapFlag(),
233 .wasGpuSynced = cpu_allocHeapFlag(),
235 .cpuElems = cpu_allocAndInitMatrixWrapper(cpuMem, numRows),
236 .cpuElemsFlat = cpuMem,
237 .gpuElemsFlat = gpuMem
240 validateMatrixAllocs(out, __func__);
241 setInitialHeapFlags(out);
247 validate_envIsInit(__func__);
248 validate_newDiagMatrParams(numQubits, __func__);
251 qindex numElems = powerOf2(numQubits);
255 .numQubits = numQubits,
256 .numElems = numElems,
259 .isApproxUnitary = util_allocEpsilonSensitiveHeapFlag(),
260 .isApproxHermitian = util_allocEpsilonSensitiveHeapFlag(),
261 .isApproxNonZero = util_allocEpsilonSensitiveHeapFlag(),
262 .isStrictlyNonNegative = cpu_allocHeapFlag(),
263 .wasGpuSynced = cpu_allocHeapFlag(),
266 .cpuElems = cpu_allocArray(numElems),
269 .gpuElems = (
getQuESTEnv().isGpuAccelerated)? gpu_allocArray(numElems) :
nullptr
272 validateMatrixAllocs(out, __func__);
273 setInitialHeapFlags(out);
278FullStateDiagMatr validateAndCreateCustomFullStateDiagMatr(
int numQubits,
int useDistrib,
int useGpuAccel,
int useMultithread,
const char* caller) {
279 validate_envIsInit(caller);
283 validate_newFullStateDiagMatrParams(numQubits, useDistrib, useGpuAccel, useMultithread, caller);
286 autodep_chooseFullStateDiagMatrDeployment(numQubits, useDistrib, useGpuAccel, useMultithread, env);
289 qindex numElems = powerOf2(numQubits);
290 qindex numElemsPerNode = numElems / (useDistrib? env.numNodes : 1);
294 .numQubits = numQubits,
295 .numElems = numElems,
298 .isGpuAccelerated = useGpuAccel,
299 .isMultithreaded = useMultithread,
300 .isDistributed = useDistrib && (env.numNodes > 1),
301 .numElemsPerNode = numElemsPerNode,
304 .isApproxUnitary = util_allocEpsilonSensitiveHeapFlag(),
305 .isApproxHermitian = util_allocEpsilonSensitiveHeapFlag(),
306 .isApproxNonZero = util_allocEpsilonSensitiveHeapFlag(),
307 .isStrictlyNonNegative = cpu_allocHeapFlag(),
308 .wasGpuSynced = cpu_allocHeapFlag(),
311 .cpuElems = cpu_allocArray(numElemsPerNode),
314 .gpuElems = (useGpuAccel)? gpu_allocArray(numElemsPerNode) :
nullptr,
317 validateMatrixAllocs(out, __func__);
318 setInitialHeapFlags(out);
324 return validateAndCreateCustomFullStateDiagMatr(numQubits, useDistrib, useGpuAccel, useMultithread, __func__);
329 return validateAndCreateCustomFullStateDiagMatr(numQubits, modeflag::USE_AUTO, modeflag::USE_AUTO, modeflag::USE_AUTO, __func__);
341void markMatrixAsSynced(T matr) {
345 *(matr.wasGpuSynced) = 1;
349 util_setFlagToUnknown(matr.isApproxUnitary);
350 util_setFlagToUnknown(matr.isApproxHermitian);
354 if constexpr (!util_isDenseMatrixType<T>()) {
355 util_setFlagToUnknown(matr.isApproxNonZero);
356 util_setFlagToUnknown(matr.isStrictlyNonNegative);
363void validateAndSyncMatrix(T matr,
const char* caller) {
364 validate_matrixFields(matr, caller);
367 if (mem_isAllocated(util_getGpuMemPtr(matr)))
368 gpu_copyCpuToGpu(matr);
370 markMatrixAsSynced(matr);
392void validateAndDestroyMatrix(T matrix,
const char* caller) {
393 validate_matrixFields(matrix, caller);
395 freeHeapMatrix(matrix);
417void setAndSyncDenseMatrElems(
CompMatr out, T elems) {
420 cpu_copyMatrix(out.cpuElems, elems, out.numRows);
428 validate_matrixFields(out, __func__);
429 validate_matrixNewElemsPtrNotNull(in, out.numRows, __func__);
431 setAndSyncDenseMatrElems(out, in);
436 validate_matrixFields(out, __func__);
437 validate_matrixNewElemsPtrNotNull(in, __func__);
440 cpu_copyArray(out.cpuElems, in, out.numElems);
448 validate_matrixFields(out, __func__);
449 validate_fullStateDiagMatrNewElems(out, startInd, numElems, __func__);
450 validate_matrixNewElemsPtrNotNull(in, __func__);
455 localiser_fullstatediagmatr_setElems(out, startInd, in, numElems);
459 markMatrixAsSynced(out);
470 validate_matrixFields(out, __func__);
471 validate_matrixNumNewElems(out.numQubits, in, __func__);
473 setAndSyncDenseMatrElems(out, in);
478 validate_matrixFields(out, __func__);
479 validate_matrixNumNewElems(out.numQubits, in, __func__);
508 validate_matrixFields(matr, __func__);
509 validate_matrixNumQubitsMatchesParam(matr.numQubits, numQb, __func__);
510 validate_matrixNumNewElems(matr.numQubits, in, __func__);
512 setAndSyncDenseMatrElems(matr, in);
516 validate_matrixFields(matr, __func__);
517 validate_matrixNumQubitsMatchesParam(matr.numQubits, numQb, __func__);
518 validate_matrixNumNewElems(matr.numQubits, in, __func__);
524 validate_matrixFields(matr, __func__);
525 validate_declaredNumElemsMatchesVectorLength(numElems, in.size(), __func__);
526 validate_fullStateDiagMatrNewElems(matr, startInd, numElems, __func__);
541 validate_envIsInit(__func__);
542 validate_newCompMatrParams(numQb, __func__);
543 validate_matrixNumNewElems(numQb, elems, __func__);
548 setAndSyncDenseMatrElems(matr, elems);
554 validate_envIsInit(__func__);
555 validate_newDiagMatrParams(numQb, __func__);
556 validate_matrixNumNewElems(numQb, elems, __func__);
580 void _validateNewNestedElemsPtrNotNull(qcomp** ptrs,
int numQubits,
const char* caller) {
582 validate_matrixNewElemsPtrNotNull(ptrs, powerOf2(numQubits), caller);
584 void _validateNewElemsPtrNotNull(qcomp* ptr,
const char* caller) {
586 validate_matrixNewElemsPtrNotNull(ptr, caller);
589 void _validateParamsToSetCompMatrFromArr(
CompMatr matr) {
591 validate_matrixFields(matr,
"setCompMatr");
594 void _validateParamsToSetInlineCompMatr(
CompMatr matr,
int numQb) {
596 const char* caller =
"setInlineCompMatr";
597 validate_matrixFields(matr, caller);
598 validate_matrixNumQubitsMatchesParam(matr.numQubits, numQb, caller);
601 void _validateParamsToSetInlineDiagMatr(
DiagMatr matr,
int numQb) {
603 const char* caller =
"setInlineDiagMatr";
604 validate_matrixFields(matr, caller);
605 validate_matrixNumQubitsMatchesParam(matr.numQubits, numQb, caller);
608 void _validateParamsToSetInlineFullStateDiagMatr(
FullStateDiagMatr matr, qindex startInd, qindex numElems) {
610 const char* caller =
"setInlineFullStateDiagMatr";
611 validate_matrixFields(matr, caller);
612 validate_fullStateDiagMatrNewElems(matr, startInd, numElems, caller);
615 void _validateParamsToCreateInlineCompMatr(
int numQb) {
617 const char* caller =
"createInlineCompMatr";
618 validate_envIsInit(caller);
619 validate_newCompMatrParams(numQb, caller);
622 void _validateParamsToCreateInlineDiagMatr(
int numQb) {
624 const char* caller =
"createInlineDiagMatr";
625 validate_envIsInit(caller);
626 validate_newDiagMatrParams(numQb, caller);
638extern int paulis_getIndOfLefmostNonIdentityPauli(
PauliStrSum sum);
642 validate_matrixFields(out, __func__);
643 validate_pauliStrSumFields(in, __func__);
644 validate_pauliStrSumCanInitMatrix(out, in, __func__);
652 localiser_fullstatediagmatr_setElemsToPauliStrSum(out, in);
654 markMatrixAsSynced(out);
659 validate_pauliStrSumFields(in, __func__);
662 int numQubits = 1 + paulis_getIndOfLefmostNonIdentityPauli(in);
663 validate_newFullStateDiagMatrParams(numQubits, modeflag::USE_AUTO, modeflag::USE_AUTO, modeflag::USE_AUTO, __func__);
668 localiser_fullstatediagmatr_setElemsToPauliStrSum(out, in);
669 markMatrixAsSynced(out);
675 validate_matrixFields(out, __func__);
676 validate_multiVarFuncQubits(out.numQubits, numQubitsPerVar, numVars, __func__);
677 validate_funcVarSignedFlag(areSigned, __func__);
679 vector<qindex> varValues(numVars);
682 for (qindex elemInd=0; elemInd<out.numElems; elemInd++) {
683 fast_getSubQuregValues(elemInd, numQubitsPerVar, numVars, areSigned, varValues.data());
686 out.cpuElems[elemInd] = callbackFunc(varValues.data());
695 validate_matrixFields(out, __func__);
696 validate_multiVarFuncQubits(out.numQubits, numQubitsPerVar, numVars, __func__);
697 validate_funcVarSignedFlag(areSigned, __func__);
701 cpu_fullstatediagmatr_setElemsFromMultiVarFunc(out, callbackFunc, numQubitsPerVar, numVars, areSigned);
709 validate_matrixFields(out, __func__);
710 validate_multiVarFuncQubits(out.numQubits, numQubitsPerDim, numDims, __func__);
712 vector<qindex> listInds(numDims);
716 for (qindex elemInd=0; elemInd<out.numElems; elemInd++) {
719 fast_getSubQuregValues(elemInd, numQubitsPerDim, numDims,
false, listInds.data());
722 out.cpuElems[elemInd] = util_getElemFromNestedPtrs(lists, listInds.data(), numDims);
731 validate_matrixFields(out, __func__);
732 validate_multiVarFuncQubits(out.numQubits, numQubitsPerDim, numDims, __func__);
736 cpu_fullstatediagmatr_setElemsFromMultiDimLists(out, lists, numQubitsPerDim, numDims);
751void validateAndPrintMatrix(T matr,
const char* caller) {
752 validate_matrixFields(matr, caller);
753 validate_numReportedNewlinesAboveZero(__func__);
756 if constexpr (util_isHeapMatrixType<T>())
757 validate_matrixIsSynced(matr, caller);
764 int numNodes = (util_isDistributedMatrix(matr))? comm_getNumNodes() : 1;
765 size_t elemMem = mem_getLocalMatrixMemoryRequired(matr.numQubits, util_isDenseMatrixType<T>(), numNodes);
766 size_t structMem =
sizeof(matr);
769 if (util_isFixedSizeMatrixType<T>())
770 structMem -= elemMem;
772 size_t numBytesPerNode = elemMem + structMem;
773 print_header(matr, numBytesPerNode);
777 print_oneFewerNewlines();
FullStateDiagMatr createCustomFullStateDiagMatr(int numQubits, int useDistrib, int useGpuAccel, int useMultithread)
DiagMatr createInlineDiagMatr(int numQb, std::vector< qcomp > elems)
FullStateDiagMatr createFullStateDiagMatr(int numQubits)
CompMatr createInlineCompMatr(int numQb, std::vector< std::vector< qcomp > > elems)
CompMatr createCompMatr(int numQubits)
DiagMatr createDiagMatr(int numQubits)
FullStateDiagMatr createFullStateDiagMatrFromPauliStrSum(PauliStrSum in)
void destroyDiagMatr(DiagMatr matr)
void destroyFullStateDiagMatr(FullStateDiagMatr matr)
void destroyCompMatr(CompMatr matr)
DiagMatr1 getDiagMatr1(std::vector< qcomp > in)
CompMatr1 getCompMatr1(std::vector< std::vector< qcomp > > in)
CompMatr2 getCompMatr2(std::vector< std::vector< qcomp > > in)
DiagMatr2 getDiagMatr2(std::vector< qcomp > in)
void reportCompMatr2(CompMatr2 matr)
void reportDiagMatr1(DiagMatr1 matr)
void reportCompMatr(CompMatr matr)
void reportDiagMatr2(DiagMatr2 matr)
void reportCompMatr1(CompMatr1 matr)
void reportDiagMatr(DiagMatr matr)
void reportFullStateDiagMatr(FullStateDiagMatr matr)
void setInlineDiagMatr(DiagMatr matr, int numQb, std::vector< qcomp > in)
void setFullStateDiagMatrFromMultiDimLists(FullStateDiagMatr out, void *lists, int *numQubitsPerDim, int numDims)
void setDiagMatrFromMultiDimLists(DiagMatr out, void *lists, int *numQubitsPerDim, int numDims)
void setFullStateDiagMatrFromMultiVarFunc(FullStateDiagMatr out, qcomp(*callbackFunc)(qindex *), int *numQubitsPerVar, int numVars, int areSigned)
void setInlineCompMatr(CompMatr matr, int numQb, std::vector< std::vector< qcomp > > in)
void setDiagMatr(DiagMatr out, qcomp *in)
void setCompMatr(CompMatr out, qcomp **in)
void setFullStateDiagMatr(FullStateDiagMatr out, qindex startInd, qcomp *in, qindex numElems)
void setDiagMatrFromMultiVarFunc(DiagMatr out, qcomp(*callbackFunc)(qindex *), int *numQubitsPerVar, int numVars, int areSigned)
void setFullStateDiagMatrFromPauliStrSum(FullStateDiagMatr out, PauliStrSum in)
void setInlineFullStateDiagMatr(FullStateDiagMatr matr, qindex startInd, qindex numElems, std::vector< qcomp > in)
void syncFullStateDiagMatr(FullStateDiagMatr matr)
void syncDiagMatr(DiagMatr matr)
void syncCompMatr(CompMatr matr)