The Quantum Exact Simulation Toolkit v4.2.0
Loading...
Searching...
No Matches
matrices.cpp
1/** @file
2 * Definitions of API matrix data structures, and their getters
3 * and setters, as well as reporting utilities.
4 *
5 * This file defines several layers of initialisation of complex
6 * matrices, as explained in the header file.
7 *
8 * @author Tyson Jones
9 */
10
11#include "quest/include/matrices.h"
12#include "quest/include/modes.h"
13#include "quest/include/types.h"
14#include "quest/include/environment.h"
15
16#include "quest/src/core/validation.hpp"
17#include "quest/src/core/autodeployer.hpp"
18#include "quest/src/core/utilities.hpp"
19#include "quest/src/core/localiser.hpp"
20#include "quest/src/core/paulilogic.hpp"
21#include "quest/src/core/printer.hpp"
22#include "quest/src/core/bitwise.hpp"
23#include "quest/src/core/fastmath.hpp"
24#include "quest/src/core/memory.hpp"
25#include "quest/src/comm/comm_config.hpp"
26#include "quest/src/comm/comm_routines.hpp"
27#include "quest/src/cpu/cpu_config.hpp"
28#include "quest/src/gpu/gpu_config.hpp"
29#include "quest/src/cpu/cpu_subroutines.hpp"
30
31#include <iostream>
32#include <cstdlib>
33#include <string>
34#include <vector>
35
36using std::vector;
37
38
39
40/*
41 * FIZED-SIZE MATRIX VECTOR GETTERS
42 *
43 * enabling getCompMatr1 (etc) to receive vectors, in addition
44 * to the existing pointer and array overloads in the header.
45 * These are necessarily defined here in the source, unlike the
46 * other header functions, so that they may use private valdiation.
47 */
48
49
50CompMatr1 getCompMatr1(vector<vector<qcomp>> in) {
51 validate_matrixNumNewElems(1, in, __func__);
52
53 qcomp* rowPtrs[] = {in[0].data(), in[1].data()};
54 return getCompMatr1(rowPtrs);
55}
56
57CompMatr2 getCompMatr2(vector<vector<qcomp>> in) {
58 validate_matrixNumNewElems(2, in, __func__);
59
60 qcomp* rowPtrs[] = {in[0].data(), in[1].data(), in[2].data(), in[3].data()};
61 return getCompMatr2(rowPtrs);
62}
63
64
65DiagMatr1 getDiagMatr1(vector<qcomp> in) {
66 validate_matrixNumNewElems(1, in, __func__);
67
68 return getDiagMatr1(in.data());
69}
70
71DiagMatr2 getDiagMatr2(vector<qcomp> in) {
72 validate_matrixNumNewElems(2, in, __func__);
73
74 return getDiagMatr2(in.data());
75}
76
77
78
79/*
80 * PRIVATE VARIABLE-SIZE MATRIX UTILITIES
81 */
82
83
84// type T can be CompMatr, DiagMatr or FullStateDiagMatr
85template <class T>
86void freeHeapMatrix(T matr) {
87
88 // WARNING: this is not overwriting any freed pointers with null,
89 // since the caller's struct is not changed. This is fine here,
90 // but would be an issue if the struct contained nested pointers,
91 // since caller would not know an outer pointer was freed and
92 // ergo that it should not be enumerated (to check/free inner ptr)
93
94 // free the 1D or 2D matrix - safe even if nullptr
95 if constexpr (util_isDenseMatrixType<T>()) {
96 cpu_deallocMatrixWrapper(matr.cpuElems);
97 cpu_deallocArray(matr.cpuElemsFlat);
98 } else
99 cpu_deallocArray(matr.cpuElems);
100
101 // we avoid invoking a GPU function in non-GPU mode
102 auto gpuPtr = util_getGpuMemPtr(matr);
103 if (mem_isAllocated(gpuPtr))
104 gpu_deallocArray(gpuPtr);
105
106 // free the teeny tiny heap flags
107 util_deallocEpsilonSensitiveHeapFlag(matr.isApproxUnitary);
108 util_deallocEpsilonSensitiveHeapFlag(matr.isApproxHermitian);
109 cpu_deallocHeapFlag(matr.wasGpuSynced);
110
111 // only diagonal matrices (which can be raised to
112 // exponents) need their negativity/zeroness checked
113 if constexpr (!util_isDenseMatrixType<T>()) {
114 util_deallocEpsilonSensitiveHeapFlag(matr.isApproxNonZero);
115 cpu_deallocHeapFlag(matr.isStrictlyNonNegative);
116 }
117}
118
119
120// type T can be CompMatr, DiagMatr or FullStateDiagMatr
121template <class T>
122bool didAnyLocalAllocsFail(T matr) {
123
124 // god help us if these single-integer malloc failed
125 if (!mem_isAllocated(matr.isApproxUnitary)) return true;
126 if (!mem_isAllocated(matr.isApproxHermitian)) return true;
127 if (!mem_isAllocated(matr.wasGpuSynced)) return true;
128
129 // only diagonal matrices (which can be raised to
130 // exponents) have these addtional fields
131 if constexpr (!util_isDenseMatrixType<T>()) {
132 if (!mem_isAllocated(matr.isApproxNonZero)) return true;
133 if (!mem_isAllocated(matr.isStrictlyNonNegative)) return true;
134 }
135
136 // outer CPU memory should always be allocated
137 if constexpr (util_isDenseMatrixType<T>()) {
138 if (!mem_isAllocated(matr.cpuElemsFlat)) return true;
139 if (!mem_isOuterAllocated(matr.cpuElems)) return true;
140 } else
141 if (!mem_isAllocated(matr.cpuElems)) return true;
142
143 // if GPU memory is not allocated in a GPU environment...
144 bool isGpuAlloc = mem_isAllocated(util_getGpuMemPtr(matr));
145 if (getQuESTEnv().isGpuAccelerated && !isGpuAlloc) {
146
147 // then FullStateDiagMatr GPU alloc failed only if it tried...
148 if constexpr (util_isFullStateDiagMatr<T>()) {
149 if (matr.isGpuAccelerated)
150 return true;
151
152 // but all other matrices always try to alloc, so must have failed
153 } else
154 return true;
155 }
156
157 // otherwise, all pointers were non-NULL and ergo all allocs were successful
158 return false;
159}
160
161
162// type T can be CompMatr, DiagMatr or FullStateDiagMatr
163template <class T>
164void freeAllMemoryIfAnyAllocsFailed(T matr) {
165
166 // ascertain whether any allocs failed on any node
167 bool anyFail = didAnyLocalAllocsFail(matr);
168 if (comm_isInit())
169 anyFail = comm_isTrueOnAllNodes(anyFail);
170
171 // if so, free all heap fields
172 if (anyFail)
173 freeHeapMatrix(matr);
174}
175
176
177// type T can be CompMatr, DiagMatr or FullStateDiagMatr
178template <class T>
179void validateMatrixAllocs(T matr, const char* caller) {
180
181 // free memory before throwing validation error to avoid memory leaks
182 freeAllMemoryIfAnyAllocsFailed(matr);
183 validate_newMatrixAllocs(matr, caller);
184}
185
186
187// type T can be CompMatr, DiagMatr or FullStateDiagMatr
188template <class T>
189void setInitialHeapFlags(T matr) {
190
191 // set initial propreties of the newly created matrix to unknown
192 util_setFlagToUnknown(matr.isApproxUnitary);
193 util_setFlagToUnknown(matr.isApproxHermitian);
194
195 // only diagonal matrices (which can be exponentiated)
196 // have these additional fields
197 if constexpr (!util_isDenseMatrixType<T>()) {
198 util_setFlagToUnknown(matr.isApproxNonZero);
199 util_setFlagToUnknown(matr.isStrictlyNonNegative);
200 }
201
202 // indicate that GPU memory has not yet been synchronised
203 *(matr.wasGpuSynced) = 0;
204}
205
206
207
208/*
209 * VARIABLE-SIZE MATRIX CONSTRUCTORS
210 */
211
212
213extern "C" CompMatr createCompMatr(int numQubits) {
214 validate_envIsInit(__func__);
215 validate_newCompMatrParams(numQubits, __func__);
216
217 // validation ensures these never overflow
218 qindex numRows = powerOf2(numQubits);
219 qindex numElems = numRows * numRows;
220
221 // attempt to allocate 1D memory
222 qcomp* cpuMem = cpu_allocArray(numElems); // nullptr if failed
223 qcomp* gpuMem = nullptr;
224 if (getQuESTEnv().isGpuAccelerated)
225 gpuMem = gpu_allocArray(numElems); // nullptr if failed
226
227 // prepare output CompMatr (avoiding C++20 designated initialiser)
228 CompMatr out;
229 out.numQubits = numQubits;
230 out.numRows = numRows;
231
232 // attemptedly allocate (un-initialised) flags in the heap so that struct copies are mutable
233 out.isApproxUnitary = util_allocEpsilonSensitiveHeapFlag(); // nullptr if failed
234 out.isApproxHermitian = util_allocEpsilonSensitiveHeapFlag();
235 out.wasGpuSynced = cpu_allocHeapFlag(); // nullptr if failed
236
237 // attemptedly allocate 2D alias for 1D CPU memory
238 out.cpuElems = cpu_allocAndInitMatrixWrapper(cpuMem, numRows); // nullptr if failed
239 out.cpuElemsFlat = cpuMem;
240 out.gpuElemsFlat = gpuMem;
241
242 validateMatrixAllocs(out, __func__);
243 setInitialHeapFlags(out);
244 return out;
245}
246
247
248extern "C" DiagMatr createDiagMatr(int numQubits) {
249 validate_envIsInit(__func__);
250 validate_newDiagMatrParams(numQubits, __func__);
251
252 // validation ensures this never overflows
253 qindex numElems = powerOf2(numQubits);
254
255 // prepare output DiagMatr (avoiding C++20 designated initialiser)
256 DiagMatr out;
257 out.numQubits = numQubits,
258 out.numElems = numElems,
259
260 // attempt to allocate (uninitialised) flags in the heap so that struct copies are mutable
261 out.isApproxUnitary = util_allocEpsilonSensitiveHeapFlag(); // nullptr if failed
262 out.isApproxHermitian = util_allocEpsilonSensitiveHeapFlag();
263 out.isApproxNonZero = util_allocEpsilonSensitiveHeapFlag();
264 out.isStrictlyNonNegative = cpu_allocHeapFlag(); // nullptr if failed
265 out.wasGpuSynced = cpu_allocHeapFlag();
266
267 // attempt to allocate 1D memory (nullptr if failed or not allocated)
268 out.cpuElems = cpu_allocArray(numElems);
269 out.gpuElems = (getQuESTEnv().isGpuAccelerated)? gpu_allocArray(numElems) : nullptr;
270
271 validateMatrixAllocs(out, __func__);
272 setInitialHeapFlags(out);
273 return out;
274}
275
276
277FullStateDiagMatr validateAndCreateCustomFullStateDiagMatr(int numQubits, int useDistrib, int useGpuAccel, int useMultithread, const char* caller) {
278 validate_envIsInit(caller);
279 QuESTEnv env = getQuESTEnv();
280
281 // validate parameters before passing them to autodeployer
282 validate_newFullStateDiagMatrParams(numQubits, useDistrib, useGpuAccel, useMultithread, caller);
283
284 // overwrite all args left as AUTO_FLAG
285 autodep_chooseFullStateDiagMatrDeployment(numQubits, useDistrib, useGpuAccel, useMultithread, env);
286
287 // validation ensures this never overflows
288 qindex numElems = powerOf2(numQubits);
289 qindex numElemsPerNode = numElems / (useDistrib? env.numNodes : 1); // divides evenly
290
291 // prepare output FullStateDiagMatr (avoiding C++20 designated initialiser)
293 out.numQubits = numQubits;
294 out.numElems = numElems;
295
296 // bind deployments, disabling distribution if using a single MPI node
297 out.isGpuAccelerated = useGpuAccel;
298 out.isMultithreaded = useMultithread;
299 out.isDistributed = useDistrib && (env.numNodes > 1);
300 out.numElemsPerNode = numElemsPerNode;
301
302 // allocate (unitialised) flags in the heap so that struct copies are mutable
303 out.isApproxUnitary = util_allocEpsilonSensitiveHeapFlag(); // nullptr if failed
304 out.isApproxHermitian = util_allocEpsilonSensitiveHeapFlag();
305 out.isApproxNonZero = util_allocEpsilonSensitiveHeapFlag();
306 out.isStrictlyNonNegative = cpu_allocHeapFlag(); // nullptr if failed
307 out.wasGpuSynced = cpu_allocHeapFlag();
308
309 // allocate 1D memory (nullptr if failed or not allocated)
310 out.cpuElems = cpu_allocArray(numElemsPerNode);
311 out.gpuElems = (useGpuAccel)? gpu_allocArray(numElemsPerNode) : nullptr;
312
313 validateMatrixAllocs(out, __func__);
314 setInitialHeapFlags(out);
315 return out;
316}
317
318extern "C" FullStateDiagMatr createCustomFullStateDiagMatr(int numQubits, int useDistrib, int useGpuAccel, int useMultithread) {
319
320 return validateAndCreateCustomFullStateDiagMatr(numQubits, useDistrib, useGpuAccel, useMultithread, __func__);
321}
322
324
325 return validateAndCreateCustomFullStateDiagMatr(numQubits, modeflag::USE_AUTO, modeflag::USE_AUTO, modeflag::USE_AUTO, __func__);
326}
327
328
329
330/*
331 * VARIABLE-SIZE MATRIX SYNC
332 */
333
334
335// type T can be CompMatr, DiagMatr or FullStateDiagMatr
336template <class T>
337void markMatrixAsSynced(T matr) {
338
339 // indicate that the matrix is now permanently GPU synchronised, even
340 // if we are not in GPU-accelerated mode (in which case it's never consulted)
341 *(matr.wasGpuSynced) = 1;
342
343 // indicate that we do not know the revised matrix properties;
344 // we defer establishing that until validation needs to check them
345 util_setFlagToUnknown(matr.isApproxUnitary);
346 util_setFlagToUnknown(matr.isApproxHermitian);
347
348 // only diagonal matrices (which can be exponentiated)
349 // have these additional fields
350 if constexpr (!util_isDenseMatrixType<T>()) {
351 util_setFlagToUnknown(matr.isApproxNonZero);
352 util_setFlagToUnknown(matr.isStrictlyNonNegative);
353 }
354}
355
356
357// type T can be CompMatr, DiagMatr or FullStateDiagMatr
358template <class T>
359void validateAndSyncMatrix(T matr, const char* caller) {
360 validate_matrixFields(matr, caller);
361
362 // optionally overwrite GPU elements with user-modified CPU elements
363 if (mem_isAllocated(util_getGpuMemPtr(matr)))
364 gpu_copyCpuToGpu(matr);
365
366 markMatrixAsSynced(matr);
367}
368
369
370// de-mangled for both C++ and C compatibility
371extern "C" {
372
373 void syncCompMatr(CompMatr matr) { validateAndSyncMatrix(matr, __func__); }
374 void syncDiagMatr(DiagMatr matr) { validateAndSyncMatrix(matr, __func__); }
375 void syncFullStateDiagMatr(FullStateDiagMatr matr) { validateAndSyncMatrix(matr, __func__); }
376
377}
378
379
380
381/*
382 * VARIABLE-SIZE MATRIX DESTRUCTION
383 */
384
385
386// type T can be CompMatr, DiagMatr or FullStateDiagMatr
387template <class T>
388void validateAndDestroyMatrix(T matrix, const char* caller) {
389 validate_matrixFields(matrix, caller);
390
391 freeHeapMatrix(matrix);
392}
393
394
395// de-mangled for C++ and C compatibility
396extern "C" {
397
398 void destroyCompMatr(CompMatr matr) { validateAndDestroyMatrix(matr, __func__); }
399 void destroyDiagMatr(DiagMatr matr) { validateAndDestroyMatrix(matr, __func__); }
400 void destroyFullStateDiagMatr(FullStateDiagMatr matr) { validateAndDestroyMatrix(matr, __func__); }
401
402}
403
404
405
406/*
407 * VARIABLE-SIZE MATRIX SETTERS VIA POINTERS
408 */
409
410
411// type T can be qcomp** or vector<vector<qcomp>>, but qcomp(*)[] is handled by header
412template <typename T>
413void setAndSyncDenseMatrElems(CompMatr out, T elems) {
414
415 // copy elems into matrix's CPU memory
416 cpu_copyMatrix(out.cpuElems, elems, out.numRows);
417
418 // overwrite GPU elements; validation gauranteed to pass
419 syncCompMatr(out);
420}
421
422
423extern "C" void setCompMatr(CompMatr out, qcomp** in) {
424 validate_matrixFields(out, __func__);
425 validate_matrixNewElemsPtrNotNull(in, out.numRows, __func__);
426
427 setAndSyncDenseMatrElems(out, in);
428}
429
430
431extern "C" void setDiagMatr(DiagMatr out, qcomp* in) {
432 validate_matrixFields(out, __func__);
433 validate_matrixNewElemsPtrNotNull(in, __func__);
434
435 // overwrite CPU memory
436 cpu_copyArray(out.cpuElems, in, out.numElems);
437
438 // overwrite GPU elements; validation gauranteed to pass
439 syncDiagMatr(out);
440}
441
442
443extern "C" void setFullStateDiagMatr(FullStateDiagMatr out, qindex startInd, qcomp* in, qindex numElems) {
444 validate_matrixFields(out, __func__);
445 validate_fullStateDiagMatrNewElems(out, startInd, numElems, __func__);
446 validate_matrixNewElemsPtrNotNull(in, __func__);
447
448 // overwrites both the CPU and GPU memory (if it exists), maintaining consistency.
449 // note that cpu_copyArray() isn't called directly here like setDiagMatr() above
450 // because we must handle when 'out' is and isn't distributed
451 localiser_fullstatediagmatr_setElems(out, startInd, in, numElems);
452
453 // even though we have not necessarily overwritten every element, we must mark
454 // the matrix as synced so that it can be subsequently used without error
455 markMatrixAsSynced(out);
456}
457
458
459
460/*
461 * VARIABLE-SIZE MATRIX SETTERS VIA VECTORS
462 */
463
464
465void setCompMatr(CompMatr out, vector<vector<qcomp>> in) {
466 validate_matrixFields(out, __func__);
467 validate_matrixNumNewElems(out.numQubits, in, __func__);
468
469 setAndSyncDenseMatrElems(out, in);
470}
471
472
473void setDiagMatr(DiagMatr out, vector<qcomp> in) {
474 validate_matrixFields(out, __func__);
475 validate_matrixNumNewElems(out.numQubits, in, __func__); // validates 'in' dim
476
477 setDiagMatr(out, in.data()); // harmessly re-validates
478}
479
480
481void setFullStateDiagMatr(FullStateDiagMatr out, qindex startInd, vector<qcomp> in) {
482
483 setFullStateDiagMatr(out, startInd, in.data(), in.size());
484}
485
486
487// no bespoke array functions are necessary for diagonal matrices initialisation,
488// since passed arrays automatically decay to pointers
489
490
491
492/*
493 * VARIABLE-SIZE MATRIX SETTERS VIA LITERALS
494 *
495 * Only the C++ versions are defined here, while the C versions are macros
496 * defined in the header. Note the C++ versions themselves are entirely
497 * superfluous and merely call the above vector setters, but we still define
498 * them for API consistency, and we additionally validate the superfluous
499 * additional parameters they pass.
500 */
501
502
503void setInlineCompMatr(CompMatr matr, int numQb, vector<vector<qcomp>> in) {
504 validate_matrixFields(matr, __func__);
505 validate_matrixNumQubitsMatchesParam(matr.numQubits, numQb, __func__);
506 validate_matrixNumNewElems(matr.numQubits, in, __func__);
507
508 setAndSyncDenseMatrElems(matr, in);
509}
510
511void setInlineDiagMatr(DiagMatr matr, int numQb, vector<qcomp> in) {
512 validate_matrixFields(matr, __func__);
513 validate_matrixNumQubitsMatchesParam(matr.numQubits, numQb, __func__);
514 validate_matrixNumNewElems(matr.numQubits, in, __func__);
515
516 setDiagMatr(matr, in.data()); // validation gauranteed to pass
517}
518
519void setInlineFullStateDiagMatr(FullStateDiagMatr matr, qindex startInd, qindex numElems, vector<qcomp> in) {
520 validate_matrixFields(matr, __func__);
521 validate_declaredNumElemsMatchesVectorLength(numElems, in.size(), __func__);
522 validate_fullStateDiagMatrNewElems(matr, startInd, numElems, __func__);
523
524 setFullStateDiagMatr(matr, startInd, in); // validation gauranteed to pass
525}
526
527
528
529/*
530 * VARIABLE-SIZE MATRIX INLINE-SETTER CONSTRUCTORS
531 *
532 * Only the C++ versions are defined here; the C versions are header macros
533 */
534
535
536CompMatr createInlineCompMatr(int numQb, vector<vector<qcomp>> elems) {
537 validate_envIsInit(__func__);
538 validate_newCompMatrParams(numQb, __func__);
539 validate_matrixNumNewElems(numQb, elems, __func__);
540
541 // pre-validation gauranteed to pass, but malloc failures will trigger an error
542 // message specific to 'createCompMatr', rather than this 'inline' version. Alas!
543 CompMatr matr = createCompMatr(numQb);
544 setAndSyncDenseMatrElems(matr, elems);
545 return matr;
546}
547
548
549DiagMatr createInlineDiagMatr(int numQb, vector<qcomp> elems) {
550 validate_envIsInit(__func__);
551 validate_newDiagMatrParams(numQb, __func__);
552 validate_matrixNumNewElems(numQb, elems, __func__);
553
554 // pre-validation gauranteed to pass, but malloc failures will trigger an error
555 // message specific to 'createCompMatr', rather than this 'inline' version. Alas!
556 DiagMatr matr = createDiagMatr(numQb);
557 setDiagMatr(matr, elems.data()); // validation gauranteed to pass
558 return matr;
559}
560
561
562
563/*
564 * EXPOSING SOME SETTER VALIDATION TO HEADER
565 *
566 * Some setters are necessarily defined in the header, because they accept
567 * C-only VLAs which need to be cast into pointers before being passed to
568 * this C++ backend (which does not support VLA). These setters need their
569 * validators exposed, though we cannot expose the entirety of validation.hpp
570 * because it cannot be parsed by C; so we here wrap specific functions.
571 */
572
573
574extern "C" {
575
576 void _validateNewNestedElemsPtrNotNull(qcomp** ptrs, int numQubits, const char* caller) {
577
578 validate_matrixNewElemsPtrNotNull(ptrs, powerOf2(numQubits), caller);
579 }
580 void _validateNewElemsPtrNotNull(qcomp* ptr, const char* caller) {
581
582 validate_matrixNewElemsPtrNotNull(ptr, caller);
583 }
584
585 void _validateParamsToSetCompMatrFromArr(CompMatr matr) {
586
587 validate_matrixFields(matr, "setCompMatr");
588 }
589
590 void _validateParamsToSetInlineCompMatr(CompMatr matr, int numQb) {
591
592 const char* caller = "setInlineCompMatr";
593 validate_matrixFields(matr, caller);
594 validate_matrixNumQubitsMatchesParam(matr.numQubits, numQb, caller);
595 }
596
597 void _validateParamsToSetInlineDiagMatr(DiagMatr matr, int numQb) {
598
599 const char* caller = "setInlineDiagMatr";
600 validate_matrixFields(matr, caller);
601 validate_matrixNumQubitsMatchesParam(matr.numQubits, numQb, caller);
602 }
603
604 void _validateParamsToSetInlineFullStateDiagMatr(FullStateDiagMatr matr, qindex startInd, qindex numElems) {
605
606 const char* caller = "setInlineFullStateDiagMatr";
607 validate_matrixFields(matr, caller);
608 validate_fullStateDiagMatrNewElems(matr, startInd, numElems, caller);
609 }
610
611 void _validateParamsToCreateInlineCompMatr(int numQb) {
612
613 const char* caller = "createInlineCompMatr";
614 validate_envIsInit(caller);
615 validate_newCompMatrParams(numQb, caller);
616 }
617
618 void _validateParamsToCreateInlineDiagMatr(int numQb) {
619
620 const char* caller = "createInlineDiagMatr";
621 validate_envIsInit(caller);
622 validate_newDiagMatrParams(numQb, caller);
623 }
624
625}
626
627
628
629/*
630 * SPECIAL CREATORS AND SETTERS
631 */
632
633
635 validate_matrixFields(out, __func__);
636 validate_pauliStrSumFields(in, __func__);
637 validate_pauliStrSumCanInitMatrix(out, in, __func__);
638
639 // permit 'in' to be non-Hermitian since it does not determine 'out' unitarity
640
641 // unlike other FullStateDiagMatr initialisers, we employ an accelerated
642 // backend since the input data 'in' is expectedly significantly smaller
643 // than the created data in 'out', making parallelisation worthwhile as
644 // the memory-movement costs of copying 'in' to a GPU are small
645 localiser_fullstatediagmatr_setElemsToPauliStrSum(out, in);
646
647 markMatrixAsSynced(out);
648}
649
650
652 validate_pauliStrSumFields(in, __func__);
653
654 // ensure createFullStateDiagMatr() below succeeds (so if not, that thrower name is correct)
655 int numQubits = 1 + paulis_getIndOfLefmostNonIdentityPauli(in);
656 validate_newFullStateDiagMatrParams(numQubits, modeflag::USE_AUTO, modeflag::USE_AUTO, modeflag::USE_AUTO, __func__);
657
658 // permit 'in' to be non-Hermitian since it does not determine 'out' unitarity
659
661 localiser_fullstatediagmatr_setElemsToPauliStrSum(out, in);
662 markMatrixAsSynced(out);
663 return out;
664}
665
666
667extern "C" void setDiagMatrFromMultiVarFunc(DiagMatr out, qcomp (*callbackFunc)(qindex*), int* numQubitsPerVar, int numVars, int areSigned) {
668 validate_matrixFields(out, __func__);
669 validate_multiVarFuncQubits(out.numQubits, numQubitsPerVar, numVars, __func__);
670 validate_funcVarSignedFlag(areSigned, __func__);
671
672 vector<qindex> varValues(numVars);
673
674 // set each element of the diagonal in-turn; user's callback might not be thread-safe
675 for (qindex elemInd=0; elemInd<out.numElems; elemInd++) {
676 fast_getSubQuregValues(elemInd, numQubitsPerVar, numVars, areSigned, varValues.data());
677
678 // call user function, and update only the CPU elems
679 out.cpuElems[elemInd] = callbackFunc(varValues.data());
680 }
681
682 // overwrite all GPU elems
683 syncDiagMatr(out);
684}
685
686
687extern "C" void setFullStateDiagMatrFromMultiVarFunc(FullStateDiagMatr out, qcomp (*callbackFunc)(qindex*), int* numQubitsPerVar, int numVars, int areSigned) {
688 validate_matrixFields(out, __func__);
689 validate_multiVarFuncQubits(out.numQubits, numQubitsPerVar, numVars, __func__);
690 validate_funcVarSignedFlag(areSigned, __func__);
691
692 // we assume callbackFunc is thread-safe (!!!!) and possibly use multithreading, but never
693 // GPU acceleration, since we cannot invoke user callback functions from GPU kernels
694 cpu_fullstatediagmatr_setElemsFromMultiVarFunc(out, callbackFunc, numQubitsPerVar, numVars, areSigned);
695
696 // overwrite all GPU elems
698}
699
700
701extern "C" void setDiagMatrFromMultiDimLists(DiagMatr out, void* lists, int* numQubitsPerDim, int numDims) {
702 validate_matrixFields(out, __func__);
703 validate_multiVarFuncQubits(out.numQubits, numQubitsPerDim, numDims, __func__);
704
705 vector<qindex> listInds(numDims);
706
707 // set each element of the diagonal in-turn, which is embarrassingly parallel,
708 // although we do not parallelise - the DiagMatr is intendedly small
709 for (qindex elemInd=0; elemInd<out.numElems; elemInd++) {
710
711 // nested list indices = unsigned integer values of variables
712 fast_getSubQuregValues(elemInd, numQubitsPerDim, numDims, false, listInds.data());
713
714 // update only the CPU elems
715 out.cpuElems[elemInd] = util_getElemFromNestedPtrs(lists, listInds.data(), numDims);
716 }
717
718 // overwrite all GPU elems
719 syncDiagMatr(out);
720}
721
722
723extern "C" void setFullStateDiagMatrFromMultiDimLists(FullStateDiagMatr out, void* lists, int* numQubitsPerDim, int numDims) {
724 validate_matrixFields(out, __func__);
725 validate_multiVarFuncQubits(out.numQubits, numQubitsPerDim, numDims, __func__);
726
727 // possibly use multithreading, but never GPU acceleration, due to the
728 // arbitrarily nested nature of the input lists
729 cpu_fullstatediagmatr_setElemsFromMultiDimLists(out, lists, numQubitsPerDim, numDims);
730
731 // overwrite all GPU elems
733}
734
735
736
737/*
738 * MATRIX REPORTERS
739 */
740
741
742// type T can be CompMatr1, CompMatr2, CompMatr, DiagMatr1, DiagMatr2, DiagMatr, FullStateDiagMatr
743template<class T>
744void validateAndPrintMatrix(T matr, const char* caller) {
745 validate_matrixFields(matr, caller);
746 validate_numReportedNewlinesAboveZero(__func__); // because trailing newline mandatory
747
748 // syncable matrices must be synced before reporting (though only CPU elems are printed)
749 if constexpr (util_isHeapMatrixType<T>())
750 validate_matrixIsSynced(matr, caller);
751
752 // calculate the total memory (in bytes) consumed by the matrix on each
753 // node, which will depend on whether the matrix is distributed, and
754 // includes the size of the matrix struct itself. Note that GPU memory
755 // is not included since it is less than or equal to the CPU memory, and
756 // occupies different memory spaces, confusing capacity accounting
757 int numNodes = (util_isDistributedMatrix(matr))? comm_getNumNodes() : 1;
758 size_t elemMem = mem_getLocalMatrixMemoryRequired(matr.numQubits, util_isDenseMatrixType<T>(), numNodes);
759 size_t structMem = sizeof(matr);
760
761 // struct memory includes fixed-size arrays (qcomp[][]), so we undo double-counting
762 if (util_isFixedSizeMatrixType<T>())
763 structMem -= elemMem;
764
765 size_t numBytesPerNode = elemMem + structMem;
766 print_header(matr, numBytesPerNode);
767 print_elems(matr);
768
769 // exclude mandatory newline above
770 print_oneFewerNewlines();
771}
772
773
774// all reporters are C and C++ accessible, so are de-mangled
775extern "C" {
776
777 void reportCompMatr1(CompMatr1 matr) { validateAndPrintMatrix(matr, __func__); }
778 void reportCompMatr2(CompMatr2 matr) { validateAndPrintMatrix(matr, __func__); }
779 void reportCompMatr (CompMatr matr) { validateAndPrintMatrix(matr, __func__); }
780 void reportDiagMatr1(DiagMatr1 matr) { validateAndPrintMatrix(matr, __func__); }
781 void reportDiagMatr2(DiagMatr2 matr) { validateAndPrintMatrix(matr, __func__); }
782 void reportDiagMatr (DiagMatr matr) { validateAndPrintMatrix(matr, __func__); }
783 void reportFullStateDiagMatr(FullStateDiagMatr matr) { validateAndPrintMatrix(matr, __func__); }
784
785}
QuESTEnv getQuESTEnv()
FullStateDiagMatr createCustomFullStateDiagMatr(int numQubits, int useDistrib, int useGpuAccel, int useMultithread)
Definition matrices.cpp:318
DiagMatr createInlineDiagMatr(int numQb, std::vector< qcomp > elems)
FullStateDiagMatr createFullStateDiagMatr(int numQubits)
Definition matrices.cpp:323
CompMatr createInlineCompMatr(int numQb, std::vector< std::vector< qcomp > > elems)
CompMatr createCompMatr(int numQubits)
Definition matrices.cpp:213
DiagMatr createDiagMatr(int numQubits)
Definition matrices.cpp:248
FullStateDiagMatr createFullStateDiagMatrFromPauliStrSum(PauliStrSum in)
Definition matrices.cpp:651
void destroyDiagMatr(DiagMatr matr)
Definition matrices.cpp:399
void destroyFullStateDiagMatr(FullStateDiagMatr matr)
Definition matrices.cpp:400
void destroyCompMatr(CompMatr matr)
Definition matrices.cpp:398
DiagMatr1 getDiagMatr1(std::vector< qcomp > in)
CompMatr1 getCompMatr1(std::vector< std::vector< qcomp > > in)
CompMatr2 getCompMatr2(std::vector< std::vector< qcomp > > in)
DiagMatr2 getDiagMatr2(std::vector< qcomp > in)
void reportCompMatr2(CompMatr2 matr)
Definition matrices.cpp:778
void reportDiagMatr1(DiagMatr1 matr)
Definition matrices.cpp:780
void reportCompMatr(CompMatr matr)
Definition matrices.cpp:779
void reportDiagMatr2(DiagMatr2 matr)
Definition matrices.cpp:781
void reportCompMatr1(CompMatr1 matr)
Definition matrices.cpp:777
void reportDiagMatr(DiagMatr matr)
Definition matrices.cpp:782
void reportFullStateDiagMatr(FullStateDiagMatr matr)
Definition matrices.cpp:783
void setInlineDiagMatr(DiagMatr matr, int numQb, std::vector< qcomp > in)
void setFullStateDiagMatrFromMultiDimLists(FullStateDiagMatr out, void *lists, int *numQubitsPerDim, int numDims)
Definition matrices.cpp:723
void setDiagMatrFromMultiDimLists(DiagMatr out, void *lists, int *numQubitsPerDim, int numDims)
Definition matrices.cpp:701
void setFullStateDiagMatrFromMultiVarFunc(FullStateDiagMatr out, qcomp(*callbackFunc)(qindex *), int *numQubitsPerVar, int numVars, int areSigned)
Definition matrices.cpp:687
void setInlineCompMatr(CompMatr matr, int numQb, std::vector< std::vector< qcomp > > in)
void setDiagMatr(DiagMatr out, qcomp *in)
Definition matrices.cpp:431
void setCompMatr(CompMatr out, qcomp **in)
Definition matrices.cpp:423
void setFullStateDiagMatr(FullStateDiagMatr out, qindex startInd, qcomp *in, qindex numElems)
Definition matrices.cpp:443
void setDiagMatrFromMultiVarFunc(DiagMatr out, qcomp(*callbackFunc)(qindex *), int *numQubitsPerVar, int numVars, int areSigned)
Definition matrices.cpp:667
void setFullStateDiagMatrFromPauliStrSum(FullStateDiagMatr out, PauliStrSum in)
Definition matrices.cpp:634
void setInlineFullStateDiagMatr(FullStateDiagMatr matr, qindex startInd, qindex numElems, std::vector< qcomp > in)
void syncFullStateDiagMatr(FullStateDiagMatr matr)
Definition matrices.cpp:375
void syncDiagMatr(DiagMatr matr)
Definition matrices.cpp:374
void syncCompMatr(CompMatr matr)
Definition matrices.cpp:373
int * wasGpuSynced
Definition matrices.h:112
int * isApproxNonZero
Definition matrices.h:170
int * isStrictlyNonNegative
Definition matrices.h:171
int * wasGpuSynced
Definition matrices.h:178