The Quantum Exact Simulation Toolkit v4.1.0
Loading...
Searching...
No Matches
matrices.cpp
1/** @file
2 * Definitions of API matrix data structures, and their getters
3 * and setters, as well as reporting utilities.
4 *
5 * This file defines several layers of initialisation of complex
6 * matrices, as explained in the header file.
7 *
8 * @author Tyson Jones
9 */
10
11#include "quest/include/matrices.h"
12#include "quest/include/environment.h"
13#include "quest/include/types.h"
14
15#include "quest/src/core/validation.hpp"
16#include "quest/src/core/autodeployer.hpp"
17#include "quest/src/core/utilities.hpp"
18#include "quest/src/core/localiser.hpp"
19#include "quest/src/core/printer.hpp"
20#include "quest/src/core/bitwise.hpp"
21#include "quest/src/core/fastmath.hpp"
22#include "quest/src/core/memory.hpp"
23#include "quest/src/comm/comm_config.hpp"
24#include "quest/src/comm/comm_routines.hpp"
25#include "quest/src/cpu/cpu_config.hpp"
26#include "quest/src/gpu/gpu_config.hpp"
27#include "quest/src/cpu/cpu_subroutines.hpp"
28
29#include <iostream>
30#include <cstdlib>
31#include <string>
32#include <vector>
33
34using std::vector;
35
36
37
38/*
39 * FIZED-SIZE MATRIX VECTOR GETTERS
40 *
41 * enabling getCompMatr1 (etc) to receive vectors, in addition
42 * to the existing pointer and array overloads in the header.
43 * These are necessarily defined here in the source, unlike the
44 * other header functions, so that they may use private valdiation.
45 */
46
47
48CompMatr1 getCompMatr1(vector<vector<qcomp>> in) {
49 validate_matrixNumNewElems(1, in, __func__);
50
51 qcomp* rowPtrs[] = {in[0].data(), in[1].data()};
52 return getCompMatr1(rowPtrs);
53}
54
55CompMatr2 getCompMatr2(vector<vector<qcomp>> in) {
56 validate_matrixNumNewElems(2, in, __func__);
57
58 qcomp* rowPtrs[] = {in[0].data(), in[1].data(), in[2].data(), in[3].data()};
59 return getCompMatr2(rowPtrs);
60}
61
62
63DiagMatr1 getDiagMatr1(vector<qcomp> in) {
64 validate_matrixNumNewElems(1, in, __func__);
65
66 return getDiagMatr1(in.data());
67}
68
69DiagMatr2 getDiagMatr2(vector<qcomp> in) {
70 validate_matrixNumNewElems(2, in, __func__);
71
72 return getDiagMatr2(in.data());
73}
74
75
76
77/*
78 * PRIVATE VARIABLE-SIZE MATRIX UTILITIES
79 */
80
81
82// type T can be CompMatr, DiagMatr or FullStateDiagMatr
83template <class T>
84void freeHeapMatrix(T matr) {
85
86 // WARNING: this is not overwriting any freed pointers with null,
87 // since the caller's struct is not changed. This is fine here,
88 // but would be an issue if the struct contained nested pointers,
89 // since caller would not know an outer pointer was freed and
90 // ergo that it should not be enumerated (to check/free inner ptr)
91
92 // free the 1D or 2D matrix - safe even if nullptr
93 if constexpr (util_isDenseMatrixType<T>()) {
94 cpu_deallocMatrixWrapper(matr.cpuElems);
95 cpu_deallocArray(matr.cpuElemsFlat);
96 } else
97 cpu_deallocArray(matr.cpuElems);
98
99 // we avoid invoking a GPU function in non-GPU mode
100 auto gpuPtr = util_getGpuMemPtr(matr);
101 if (mem_isAllocated(gpuPtr))
102 gpu_deallocArray(gpuPtr);
103
104 // free the teeny tiny heap flags
105 util_deallocEpsilonSensitiveHeapFlag(matr.isApproxUnitary);
106 util_deallocEpsilonSensitiveHeapFlag(matr.isApproxHermitian);
107 cpu_deallocHeapFlag(matr.wasGpuSynced);
108
109 // only diagonal matrices (which can be raised to
110 // exponents) need their negativity/zeroness checked
111 if constexpr (!util_isDenseMatrixType<T>()) {
112 util_deallocEpsilonSensitiveHeapFlag(matr.isApproxNonZero);
113 cpu_deallocHeapFlag(matr.isStrictlyNonNegative);
114 }
115}
116
117
118// type T can be CompMatr, DiagMatr or FullStateDiagMatr
119template <class T>
120bool didAnyLocalAllocsFail(T matr) {
121
122 // god help us if these single-integer malloc failed
123 if (!mem_isAllocated(matr.isApproxUnitary)) return true;
124 if (!mem_isAllocated(matr.isApproxHermitian)) return true;
125 if (!mem_isAllocated(matr.wasGpuSynced)) return true;
126
127 // only diagonal matrices (which can be raised to
128 // exponents) have these addtional fields
129 if constexpr (!util_isDenseMatrixType<T>()) {
130 if (!mem_isAllocated(matr.isApproxNonZero)) return true;
131 if (!mem_isAllocated(matr.isStrictlyNonNegative)) return true;
132 }
133
134 // outer CPU memory should always be allocated
135 if constexpr (util_isDenseMatrixType<T>()) {
136 if (!mem_isAllocated(matr.cpuElemsFlat)) return true;
137 if (!mem_isOuterAllocated(matr.cpuElems)) return true;
138 } else
139 if (!mem_isAllocated(matr.cpuElems)) return true;
140
141 // if GPU memory is not allocated in a GPU environment...
142 bool isGpuAlloc = mem_isAllocated(util_getGpuMemPtr(matr));
143 if (getQuESTEnv().isGpuAccelerated && !isGpuAlloc) {
144
145 // then FullStateDiagMatr GPU alloc failed only if it tried...
146 if constexpr (util_isFullStateDiagMatr<T>()) {
147 if (matr.isGpuAccelerated)
148 return true;
149
150 // but all other matrices always try to alloc, so must have failed
151 } else
152 return true;
153 }
154
155 // otherwise, all pointers were non-NULL and ergo all allocs were successful
156 return false;
157}
158
159
160// type T can be CompMatr, DiagMatr or FullStateDiagMatr
161template <class T>
162void freeAllMemoryIfAnyAllocsFailed(T matr) {
163
164 // ascertain whether any allocs failed on any node
165 bool anyFail = didAnyLocalAllocsFail(matr);
166 if (comm_isInit())
167 anyFail = comm_isTrueOnAllNodes(anyFail);
168
169 // if so, free all heap fields
170 if (anyFail)
171 freeHeapMatrix(matr);
172}
173
174
175// type T can be CompMatr, DiagMatr or FullStateDiagMatr
176template <class T>
177void validateMatrixAllocs(T matr, const char* caller) {
178
179 // free memory before throwing validation error to avoid memory leaks
180 freeAllMemoryIfAnyAllocsFailed(matr);
181 validate_newMatrixAllocs(matr, caller);
182}
183
184
185// type T can be CompMatr, DiagMatr or FullStateDiagMatr
186template <class T>
187void setInitialHeapFlags(T matr) {
188
189 // set initial propreties of the newly created matrix to unknown
190 util_setFlagToUnknown(matr.isApproxUnitary);
191 util_setFlagToUnknown(matr.isApproxHermitian);
192
193 // only diagonal matrices (which can be exponentiated)
194 // have these additional fields
195 if constexpr (!util_isDenseMatrixType<T>()) {
196 util_setFlagToUnknown(matr.isApproxNonZero);
197 util_setFlagToUnknown(matr.isStrictlyNonNegative);
198 }
199
200 // indicate that GPU memory has not yet been synchronised
201 *(matr.wasGpuSynced) = 0;
202}
203
204
205
206/*
207 * VARIABLE-SIZE MATRIX CONSTRUCTORS
208 */
209
210
211extern "C" CompMatr createCompMatr(int numQubits) {
212 validate_envIsInit(__func__);
213 validate_newCompMatrParams(numQubits, __func__);
214
215 // validation ensures these never overflow
216 qindex numRows = powerOf2(numQubits);
217 qindex numElems = numRows * numRows;
218
219 // attempt to allocate 1D memory
220 qcomp* cpuMem = cpu_allocArray(numElems); // nullptr if failed
221 qcomp* gpuMem = nullptr;
222 if (getQuESTEnv().isGpuAccelerated)
223 gpuMem = gpu_allocArray(numElems); // nullptr if failed
224
225 // prepare output CompMatr (avoiding C++20 designated initialiser)
226 CompMatr out;
227 out.numQubits = numQubits;
228 out.numRows = numRows;
229
230 // attemptedly allocate (un-initialised) flags in the heap so that struct copies are mutable
231 out.isApproxUnitary = util_allocEpsilonSensitiveHeapFlag(); // nullptr if failed
232 out.isApproxHermitian = util_allocEpsilonSensitiveHeapFlag();
233 out.wasGpuSynced = cpu_allocHeapFlag(); // nullptr if failed
234
235 // attemptedly allocate 2D alias for 1D CPU memory
236 out.cpuElems = cpu_allocAndInitMatrixWrapper(cpuMem, numRows); // nullptr if failed
237 out.cpuElemsFlat = cpuMem;
238 out.gpuElemsFlat = gpuMem;
239
240 validateMatrixAllocs(out, __func__);
241 setInitialHeapFlags(out);
242 return out;
243}
244
245
246extern "C" DiagMatr createDiagMatr(int numQubits) {
247 validate_envIsInit(__func__);
248 validate_newDiagMatrParams(numQubits, __func__);
249
250 // validation ensures this never overflows
251 qindex numElems = powerOf2(numQubits);
252
253 // prepare output DiagMatr (avoiding C++20 designated initialiser)
254 DiagMatr out;
255 out.numQubits = numQubits,
256 out.numElems = numElems,
257
258 // attempt to allocate (uninitialised) flags in the heap so that struct copies are mutable
259 out.isApproxUnitary = util_allocEpsilonSensitiveHeapFlag(); // nullptr if failed
260 out.isApproxHermitian = util_allocEpsilonSensitiveHeapFlag();
261 out.isApproxNonZero = util_allocEpsilonSensitiveHeapFlag();
262 out.isStrictlyNonNegative = cpu_allocHeapFlag(); // nullptr if failed
263 out.wasGpuSynced = cpu_allocHeapFlag();
264
265 // attempt to allocate 1D memory (nullptr if failed or not allocated)
266 out.cpuElems = cpu_allocArray(numElems);
267 out.gpuElems = (getQuESTEnv().isGpuAccelerated)? gpu_allocArray(numElems) : nullptr;
268
269 validateMatrixAllocs(out, __func__);
270 setInitialHeapFlags(out);
271 return out;
272}
273
274
275FullStateDiagMatr validateAndCreateCustomFullStateDiagMatr(int numQubits, int useDistrib, int useGpuAccel, int useMultithread, const char* caller) {
276 validate_envIsInit(caller);
277 QuESTEnv env = getQuESTEnv();
278
279 // validate parameters before passing them to autodeployer
280 validate_newFullStateDiagMatrParams(numQubits, useDistrib, useGpuAccel, useMultithread, caller);
281
282 // overwrite useDistrib and useGpuAccel if they were left as AUTO_FLAG
283 autodep_chooseFullStateDiagMatrDeployment(numQubits, useDistrib, useGpuAccel, useMultithread, env);
284
285 // validation ensures this never overflows
286 qindex numElems = powerOf2(numQubits);
287 qindex numElemsPerNode = numElems / (useDistrib? env.numNodes : 1); // divides evenly
288
289 // prepare output FullStateDiagMatr (avoiding C++20 designated initialiser)
291 out.numQubits = numQubits;
292 out.numElems = numElems;
293
294 // bind deployments, disabling distribution if using a single MPI node
295 out.isGpuAccelerated = useGpuAccel;
296 out.isMultithreaded = useMultithread;
297 out.isDistributed = useDistrib && (env.numNodes > 1);
298 out.numElemsPerNode = numElemsPerNode;
299
300 // allocate (unitialised) flags in the heap so that struct copies are mutable
301 out.isApproxUnitary = util_allocEpsilonSensitiveHeapFlag(); // nullptr if failed
302 out.isApproxHermitian = util_allocEpsilonSensitiveHeapFlag();
303 out.isApproxNonZero = util_allocEpsilonSensitiveHeapFlag();
304 out.isStrictlyNonNegative = cpu_allocHeapFlag(); // nullptr if failed
305 out.wasGpuSynced = cpu_allocHeapFlag();
306
307 // allocate 1D memory (nullptr if failed or not allocated)
308 out.cpuElems = cpu_allocArray(numElemsPerNode);
309 out.gpuElems = (useGpuAccel)? gpu_allocArray(numElemsPerNode) : nullptr;
310
311 validateMatrixAllocs(out, __func__);
312 setInitialHeapFlags(out);
313 return out;
314}
315
316extern "C" FullStateDiagMatr createCustomFullStateDiagMatr(int numQubits, int useDistrib, int useGpuAccel, int useMultithread) {
317
318 return validateAndCreateCustomFullStateDiagMatr(numQubits, useDistrib, useGpuAccel, useMultithread, __func__);
319}
320
322
323 return validateAndCreateCustomFullStateDiagMatr(numQubits, modeflag::USE_AUTO, modeflag::USE_AUTO, modeflag::USE_AUTO, __func__);
324}
325
326
327
328/*
329 * VARIABLE-SIZE MATRIX SYNC
330 */
331
332
333// type T can be CompMatr, DiagMatr or FullStateDiagMatr
334template <class T>
335void markMatrixAsSynced(T matr) {
336
337 // indicate that the matrix is now permanently GPU synchronised, even
338 // if we are not in GPU-accelerated mode (in which case it's never consulted)
339 *(matr.wasGpuSynced) = 1;
340
341 // indicate that we do not know the revised matrix properties;
342 // we defer establishing that until validation needs to check them
343 util_setFlagToUnknown(matr.isApproxUnitary);
344 util_setFlagToUnknown(matr.isApproxHermitian);
345
346 // only diagonal matrices (which can be exponentiated)
347 // have these additional fields
348 if constexpr (!util_isDenseMatrixType<T>()) {
349 util_setFlagToUnknown(matr.isApproxNonZero);
350 util_setFlagToUnknown(matr.isStrictlyNonNegative);
351 }
352}
353
354
355// type T can be CompMatr, DiagMatr or FullStateDiagMatr
356template <class T>
357void validateAndSyncMatrix(T matr, const char* caller) {
358 validate_matrixFields(matr, caller);
359
360 // optionally overwrite GPU elements with user-modified CPU elements
361 if (mem_isAllocated(util_getGpuMemPtr(matr)))
362 gpu_copyCpuToGpu(matr);
363
364 markMatrixAsSynced(matr);
365}
366
367
368// de-mangled for both C++ and C compatibility
369extern "C" {
370
371 void syncCompMatr(CompMatr matr) { validateAndSyncMatrix(matr, __func__); }
372 void syncDiagMatr(DiagMatr matr) { validateAndSyncMatrix(matr, __func__); }
373 void syncFullStateDiagMatr(FullStateDiagMatr matr) { validateAndSyncMatrix(matr, __func__); }
374
375}
376
377
378
379/*
380 * VARIABLE-SIZE MATRIX DESTRUCTION
381 */
382
383
384// type T can be CompMatr, DiagMatr or FullStateDiagMatr
385template <class T>
386void validateAndDestroyMatrix(T matrix, const char* caller) {
387 validate_matrixFields(matrix, caller);
388
389 freeHeapMatrix(matrix);
390}
391
392
393// de-mangled for C++ and C compatibility
394extern "C" {
395
396 void destroyCompMatr(CompMatr matr) { validateAndDestroyMatrix(matr, __func__); }
397 void destroyDiagMatr(DiagMatr matr) { validateAndDestroyMatrix(matr, __func__); }
398 void destroyFullStateDiagMatr(FullStateDiagMatr matr) { validateAndDestroyMatrix(matr, __func__); }
399
400}
401
402
403
404/*
405 * VARIABLE-SIZE MATRIX SETTERS VIA POINTERS
406 */
407
408
409// type T can be qcomp** or vector<vector<qcomp>>, but qcomp(*)[] is handled by header
410template <typename T>
411void setAndSyncDenseMatrElems(CompMatr out, T elems) {
412
413 // copy elems into matrix's CPU memory
414 cpu_copyMatrix(out.cpuElems, elems, out.numRows);
415
416 // overwrite GPU elements; validation gauranteed to pass
417 syncCompMatr(out);
418}
419
420
421extern "C" void setCompMatr(CompMatr out, qcomp** in) {
422 validate_matrixFields(out, __func__);
423 validate_matrixNewElemsPtrNotNull(in, out.numRows, __func__);
424
425 setAndSyncDenseMatrElems(out, in);
426}
427
428
429extern "C" void setDiagMatr(DiagMatr out, qcomp* in) {
430 validate_matrixFields(out, __func__);
431 validate_matrixNewElemsPtrNotNull(in, __func__);
432
433 // overwrite CPU memory
434 cpu_copyArray(out.cpuElems, in, out.numElems);
435
436 // overwrite GPU elements; validation gauranteed to pass
437 syncDiagMatr(out);
438}
439
440
441extern "C" void setFullStateDiagMatr(FullStateDiagMatr out, qindex startInd, qcomp* in, qindex numElems) {
442 validate_matrixFields(out, __func__);
443 validate_fullStateDiagMatrNewElems(out, startInd, numElems, __func__);
444 validate_matrixNewElemsPtrNotNull(in, __func__);
445
446 // overwrites both the CPU and GPU memory (if it exists), maintaining consistency.
447 // note that cpu_copyArray() isn't called directly here like setDiagMatr() above
448 // because we must handle when 'out' is and isn't distributed
449 localiser_fullstatediagmatr_setElems(out, startInd, in, numElems);
450
451 // even though we have not necessarily overwritten every element, we must mark
452 // the matrix as synced so that it can be subsequently used without error
453 markMatrixAsSynced(out);
454}
455
456
457
458/*
459 * VARIABLE-SIZE MATRIX SETTERS VIA VECTORS
460 */
461
462
463void setCompMatr(CompMatr out, vector<vector<qcomp>> in) {
464 validate_matrixFields(out, __func__);
465 validate_matrixNumNewElems(out.numQubits, in, __func__);
466
467 setAndSyncDenseMatrElems(out, in);
468}
469
470
471void setDiagMatr(DiagMatr out, vector<qcomp> in) {
472 validate_matrixFields(out, __func__);
473 validate_matrixNumNewElems(out.numQubits, in, __func__); // validates 'in' dim
474
475 setDiagMatr(out, in.data()); // harmessly re-validates
476}
477
478
479void setFullStateDiagMatr(FullStateDiagMatr out, qindex startInd, vector<qcomp> in) {
480
481 setFullStateDiagMatr(out, startInd, in.data(), in.size());
482}
483
484
485// no bespoke array functions are necessary for diagonal matrices initialisation,
486// since passed arrays automatically decay to pointers
487
488
489
490/*
491 * VARIABLE-SIZE MATRIX SETTERS VIA LITERALS
492 *
493 * Only the C++ versions are defined here, while the C versions are macros
494 * defined in the header. Note the C++ versions themselves are entirely
495 * superfluous and merely call the above vector setters, but we still define
496 * them for API consistency, and we additionally validate the superfluous
497 * additional parameters they pass.
498 */
499
500
501void setInlineCompMatr(CompMatr matr, int numQb, vector<vector<qcomp>> in) {
502 validate_matrixFields(matr, __func__);
503 validate_matrixNumQubitsMatchesParam(matr.numQubits, numQb, __func__);
504 validate_matrixNumNewElems(matr.numQubits, in, __func__);
505
506 setAndSyncDenseMatrElems(matr, in);
507}
508
509void setInlineDiagMatr(DiagMatr matr, int numQb, vector<qcomp> in) {
510 validate_matrixFields(matr, __func__);
511 validate_matrixNumQubitsMatchesParam(matr.numQubits, numQb, __func__);
512 validate_matrixNumNewElems(matr.numQubits, in, __func__);
513
514 setDiagMatr(matr, in.data()); // validation gauranteed to pass
515}
516
517void setInlineFullStateDiagMatr(FullStateDiagMatr matr, qindex startInd, qindex numElems, vector<qcomp> in) {
518 validate_matrixFields(matr, __func__);
519 validate_declaredNumElemsMatchesVectorLength(numElems, in.size(), __func__);
520 validate_fullStateDiagMatrNewElems(matr, startInd, numElems, __func__);
521
522 setFullStateDiagMatr(matr, startInd, in); // validation gauranteed to pass
523}
524
525
526
527/*
528 * VARIABLE-SIZE MATRIX INLINE-SETTER CONSTRUCTORS
529 *
530 * Only the C++ versions are defined here; the C versions are header macros
531 */
532
533
534CompMatr createInlineCompMatr(int numQb, vector<vector<qcomp>> elems) {
535 validate_envIsInit(__func__);
536 validate_newCompMatrParams(numQb, __func__);
537 validate_matrixNumNewElems(numQb, elems, __func__);
538
539 // pre-validation gauranteed to pass, but malloc failures will trigger an error
540 // message specific to 'createCompMatr', rather than this 'inline' version. Alas!
541 CompMatr matr = createCompMatr(numQb);
542 setAndSyncDenseMatrElems(matr, elems);
543 return matr;
544}
545
546
547DiagMatr createInlineDiagMatr(int numQb, vector<qcomp> elems) {
548 validate_envIsInit(__func__);
549 validate_newDiagMatrParams(numQb, __func__);
550 validate_matrixNumNewElems(numQb, elems, __func__);
551
552 // pre-validation gauranteed to pass, but malloc failures will trigger an error
553 // message specific to 'createCompMatr', rather than this 'inline' version. Alas!
554 DiagMatr matr = createDiagMatr(numQb);
555 setDiagMatr(matr, elems.data()); // validation gauranteed to pass
556 return matr;
557}
558
559
560
561/*
562 * EXPOSING SOME SETTER VALIDATION TO HEADER
563 *
564 * Some setters are necessarily defined in the header, because they accept
565 * C-only VLAs which need to be cast into pointers before being passed to
566 * this C++ backend (which does not support VLA). These setters need their
567 * validators exposed, though we cannot expose the entirety of validation.hpp
568 * because it cannot be parsed by C; so we here wrap specific functions.
569 */
570
571
572extern "C" {
573
574 void _validateNewNestedElemsPtrNotNull(qcomp** ptrs, int numQubits, const char* caller) {
575
576 validate_matrixNewElemsPtrNotNull(ptrs, powerOf2(numQubits), caller);
577 }
578 void _validateNewElemsPtrNotNull(qcomp* ptr, const char* caller) {
579
580 validate_matrixNewElemsPtrNotNull(ptr, caller);
581 }
582
583 void _validateParamsToSetCompMatrFromArr(CompMatr matr) {
584
585 validate_matrixFields(matr, "setCompMatr");
586 }
587
588 void _validateParamsToSetInlineCompMatr(CompMatr matr, int numQb) {
589
590 const char* caller = "setInlineCompMatr";
591 validate_matrixFields(matr, caller);
592 validate_matrixNumQubitsMatchesParam(matr.numQubits, numQb, caller);
593 }
594
595 void _validateParamsToSetInlineDiagMatr(DiagMatr matr, int numQb) {
596
597 const char* caller = "setInlineDiagMatr";
598 validate_matrixFields(matr, caller);
599 validate_matrixNumQubitsMatchesParam(matr.numQubits, numQb, caller);
600 }
601
602 void _validateParamsToSetInlineFullStateDiagMatr(FullStateDiagMatr matr, qindex startInd, qindex numElems) {
603
604 const char* caller = "setInlineFullStateDiagMatr";
605 validate_matrixFields(matr, caller);
606 validate_fullStateDiagMatrNewElems(matr, startInd, numElems, caller);
607 }
608
609 void _validateParamsToCreateInlineCompMatr(int numQb) {
610
611 const char* caller = "createInlineCompMatr";
612 validate_envIsInit(caller);
613 validate_newCompMatrParams(numQb, caller);
614 }
615
616 void _validateParamsToCreateInlineDiagMatr(int numQb) {
617
618 const char* caller = "createInlineDiagMatr";
619 validate_envIsInit(caller);
620 validate_newDiagMatrParams(numQb, caller);
621 }
622
623}
624
625
626
627/*
628 * SPECIAL CREATORS AND SETTERS
629 */
630
631
632extern int paulis_getIndOfLefmostNonIdentityPauli(PauliStrSum sum);
633
634
636 validate_matrixFields(out, __func__);
637 validate_pauliStrSumFields(in, __func__);
638 validate_pauliStrSumCanInitMatrix(out, in, __func__);
639
640 // permit 'in' to be non-Hermitian since it does not determine 'out' unitarity
641
642 // unlike other FullStateDiagMatr initialisers, we employ an accelerated
643 // backend since the input data 'in' is expectedly significantly smaller
644 // than the created data in 'out', making parallelisation worthwhile as
645 // the memory-movement costs of copying 'in' to a GPU are small
646 localiser_fullstatediagmatr_setElemsToPauliStrSum(out, in);
647
648 markMatrixAsSynced(out);
649}
650
651
653 validate_pauliStrSumFields(in, __func__);
654
655 // ensure createFullStateDiagMatr() below succeeds (so if not, that thrower name is correct)
656 int numQubits = 1 + paulis_getIndOfLefmostNonIdentityPauli(in);
657 validate_newFullStateDiagMatrParams(numQubits, modeflag::USE_AUTO, modeflag::USE_AUTO, modeflag::USE_AUTO, __func__);
658
659 // permit 'in' to be non-Hermitian since it does not determine 'out' unitarity
660
662 localiser_fullstatediagmatr_setElemsToPauliStrSum(out, in);
663 markMatrixAsSynced(out);
664 return out;
665}
666
667
668extern "C" void setDiagMatrFromMultiVarFunc(DiagMatr out, qcomp (*callbackFunc)(qindex*), int* numQubitsPerVar, int numVars, int areSigned) {
669 validate_matrixFields(out, __func__);
670 validate_multiVarFuncQubits(out.numQubits, numQubitsPerVar, numVars, __func__);
671 validate_funcVarSignedFlag(areSigned, __func__);
672
673 vector<qindex> varValues(numVars);
674
675 // set each element of the diagonal in-turn; user's callback might not be thread-safe
676 for (qindex elemInd=0; elemInd<out.numElems; elemInd++) {
677 fast_getSubQuregValues(elemInd, numQubitsPerVar, numVars, areSigned, varValues.data());
678
679 // call user function, and update only the CPU elems
680 out.cpuElems[elemInd] = callbackFunc(varValues.data());
681 }
682
683 // overwrite all GPU elems
684 syncDiagMatr(out);
685}
686
687
688extern "C" void setFullStateDiagMatrFromMultiVarFunc(FullStateDiagMatr out, qcomp (*callbackFunc)(qindex*), int* numQubitsPerVar, int numVars, int areSigned) {
689 validate_matrixFields(out, __func__);
690 validate_multiVarFuncQubits(out.numQubits, numQubitsPerVar, numVars, __func__);
691 validate_funcVarSignedFlag(areSigned, __func__);
692
693 // we assume callbackFunc is thread-safe (!!!!) and possibly use multithreading, but never
694 // GPU acceleration, since we cannot invoke user callback functions from GPU kernels
695 cpu_fullstatediagmatr_setElemsFromMultiVarFunc(out, callbackFunc, numQubitsPerVar, numVars, areSigned);
696
697 // overwrite all GPU elems
699}
700
701
702extern "C" void setDiagMatrFromMultiDimLists(DiagMatr out, void* lists, int* numQubitsPerDim, int numDims) {
703 validate_matrixFields(out, __func__);
704 validate_multiVarFuncQubits(out.numQubits, numQubitsPerDim, numDims, __func__);
705
706 vector<qindex> listInds(numDims);
707
708 // set each element of the diagonal in-turn, which is embarrassingly parallel,
709 // although we do not parallelise - the DiagMatr is intendedly small
710 for (qindex elemInd=0; elemInd<out.numElems; elemInd++) {
711
712 // nested list indices = unsigned integer values of variables
713 fast_getSubQuregValues(elemInd, numQubitsPerDim, numDims, false, listInds.data());
714
715 // update only the CPU elems
716 out.cpuElems[elemInd] = util_getElemFromNestedPtrs(lists, listInds.data(), numDims);
717 }
718
719 // overwrite all GPU elems
720 syncDiagMatr(out);
721}
722
723
724extern "C" void setFullStateDiagMatrFromMultiDimLists(FullStateDiagMatr out, void* lists, int* numQubitsPerDim, int numDims) {
725 validate_matrixFields(out, __func__);
726 validate_multiVarFuncQubits(out.numQubits, numQubitsPerDim, numDims, __func__);
727
728 // possibly use multithreading, but never GPU acceleration, due to the
729 // arbitrarily nested nature of the input lists
730 cpu_fullstatediagmatr_setElemsFromMultiDimLists(out, lists, numQubitsPerDim, numDims);
731
732 // overwrite all GPU elems
734}
735
736
737
738/*
739 * MATRIX REPORTERS
740 */
741
742
743// type T can be CompMatr1, CompMatr2, CompMatr, DiagMatr1, DiagMatr2, DiagMatr, FullStateDiagMatr
744template<class T>
745void validateAndPrintMatrix(T matr, const char* caller) {
746 validate_matrixFields(matr, caller);
747 validate_numReportedNewlinesAboveZero(__func__); // because trailing newline mandatory
748
749 // syncable matrices must be synced before reporting (though only CPU elems are printed)
750 if constexpr (util_isHeapMatrixType<T>())
751 validate_matrixIsSynced(matr, caller);
752
753 // calculate the total memory (in bytes) consumed by the matrix on each
754 // node, which will depend on whether the matrix is distributed, and
755 // includes the size of the matrix struct itself. Note that GPU memory
756 // is not included since it is less than or equal to the CPU memory, and
757 // occupies different memory spaces, confusing capacity accounting
758 int numNodes = (util_isDistributedMatrix(matr))? comm_getNumNodes() : 1;
759 size_t elemMem = mem_getLocalMatrixMemoryRequired(matr.numQubits, util_isDenseMatrixType<T>(), numNodes);
760 size_t structMem = sizeof(matr);
761
762 // struct memory includes fixed-size arrays (qcomp[][]), so we undo double-counting
763 if (util_isFixedSizeMatrixType<T>())
764 structMem -= elemMem;
765
766 size_t numBytesPerNode = elemMem + structMem;
767 print_header(matr, numBytesPerNode);
768 print_elems(matr);
769
770 // exclude mandatory newline above
771 print_oneFewerNewlines();
772}
773
774
775// all reporters are C and C++ accessible, so are de-mangled
776extern "C" {
777
778 void reportCompMatr1(CompMatr1 matr) { validateAndPrintMatrix(matr, __func__); }
779 void reportCompMatr2(CompMatr2 matr) { validateAndPrintMatrix(matr, __func__); }
780 void reportCompMatr (CompMatr matr) { validateAndPrintMatrix(matr, __func__); }
781 void reportDiagMatr1(DiagMatr1 matr) { validateAndPrintMatrix(matr, __func__); }
782 void reportDiagMatr2(DiagMatr2 matr) { validateAndPrintMatrix(matr, __func__); }
783 void reportDiagMatr (DiagMatr matr) { validateAndPrintMatrix(matr, __func__); }
784 void reportFullStateDiagMatr(FullStateDiagMatr matr) { validateAndPrintMatrix(matr, __func__); }
785
786}
QuESTEnv getQuESTEnv()
FullStateDiagMatr createCustomFullStateDiagMatr(int numQubits, int useDistrib, int useGpuAccel, int useMultithread)
Definition matrices.cpp:316
DiagMatr createInlineDiagMatr(int numQb, std::vector< qcomp > elems)
FullStateDiagMatr createFullStateDiagMatr(int numQubits)
Definition matrices.cpp:321
CompMatr createInlineCompMatr(int numQb, std::vector< std::vector< qcomp > > elems)
CompMatr createCompMatr(int numQubits)
Definition matrices.cpp:211
DiagMatr createDiagMatr(int numQubits)
Definition matrices.cpp:246
FullStateDiagMatr createFullStateDiagMatrFromPauliStrSum(PauliStrSum in)
Definition matrices.cpp:652
void destroyDiagMatr(DiagMatr matr)
Definition matrices.cpp:397
void destroyFullStateDiagMatr(FullStateDiagMatr matr)
Definition matrices.cpp:398
void destroyCompMatr(CompMatr matr)
Definition matrices.cpp:396
DiagMatr1 getDiagMatr1(std::vector< qcomp > in)
CompMatr1 getCompMatr1(std::vector< std::vector< qcomp > > in)
CompMatr2 getCompMatr2(std::vector< std::vector< qcomp > > in)
DiagMatr2 getDiagMatr2(std::vector< qcomp > in)
void reportCompMatr2(CompMatr2 matr)
Definition matrices.cpp:779
void reportDiagMatr1(DiagMatr1 matr)
Definition matrices.cpp:781
void reportCompMatr(CompMatr matr)
Definition matrices.cpp:780
void reportDiagMatr2(DiagMatr2 matr)
Definition matrices.cpp:782
void reportCompMatr1(CompMatr1 matr)
Definition matrices.cpp:778
void reportDiagMatr(DiagMatr matr)
Definition matrices.cpp:783
void reportFullStateDiagMatr(FullStateDiagMatr matr)
Definition matrices.cpp:784
void setInlineDiagMatr(DiagMatr matr, int numQb, std::vector< qcomp > in)
void setFullStateDiagMatrFromMultiDimLists(FullStateDiagMatr out, void *lists, int *numQubitsPerDim, int numDims)
Definition matrices.cpp:724
void setDiagMatrFromMultiDimLists(DiagMatr out, void *lists, int *numQubitsPerDim, int numDims)
Definition matrices.cpp:702
void setFullStateDiagMatrFromMultiVarFunc(FullStateDiagMatr out, qcomp(*callbackFunc)(qindex *), int *numQubitsPerVar, int numVars, int areSigned)
Definition matrices.cpp:688
void setInlineCompMatr(CompMatr matr, int numQb, std::vector< std::vector< qcomp > > in)
void setDiagMatr(DiagMatr out, qcomp *in)
Definition matrices.cpp:429
void setCompMatr(CompMatr out, qcomp **in)
Definition matrices.cpp:421
void setFullStateDiagMatr(FullStateDiagMatr out, qindex startInd, qcomp *in, qindex numElems)
Definition matrices.cpp:441
void setDiagMatrFromMultiVarFunc(DiagMatr out, qcomp(*callbackFunc)(qindex *), int *numQubitsPerVar, int numVars, int areSigned)
Definition matrices.cpp:668
void setFullStateDiagMatrFromPauliStrSum(FullStateDiagMatr out, PauliStrSum in)
Definition matrices.cpp:635
void setInlineFullStateDiagMatr(FullStateDiagMatr matr, qindex startInd, qindex numElems, std::vector< qcomp > in)
void syncFullStateDiagMatr(FullStateDiagMatr matr)
Definition matrices.cpp:373
void syncDiagMatr(DiagMatr matr)
Definition matrices.cpp:372
void syncCompMatr(CompMatr matr)
Definition matrices.cpp:371
int * wasGpuSynced
Definition matrices.h:112
int * isApproxNonZero
Definition matrices.h:170
int * isStrictlyNonNegative
Definition matrices.h:171
int * wasGpuSynced
Definition matrices.h:178