The Quantum Exact Simulation Toolkit v4.2.0
Loading...
Searching...
No Matches
qureg.cpp
1/** @file
2 * API definitions for creating and managing Quregs,
3 * and automatically choosing their deployment modes.
4 *
5 * @author Tyson Jones
6 */
7
8#include "quest/include/qureg.h"
9#include "quest/include/modes.h"
10#include "quest/include/environment.h"
11#include "quest/include/initialisations.h"
12
13#include "quest/src/core/validation.hpp"
14#include "quest/src/core/autodeployer.hpp"
15#include "quest/src/core/printer.hpp"
16#include "quest/src/core/bitwise.hpp"
17#include "quest/src/core/memory.hpp"
18#include "quest/src/core/utilities.hpp"
19#include "quest/src/core/localiser.hpp"
20#include "quest/src/comm/comm_config.hpp"
21#include "quest/src/comm/comm_routines.hpp"
22#include "quest/src/cpu/cpu_config.hpp"
23#include "quest/src/gpu/gpu_config.hpp"
24
25#include <string>
26#include <vector>
27
28using std::string;
29using std::vector;
30
31
32
33/*
34 * INTERNALLY EXPOSED FUNCTION
35 */
36
37
38Qureg qureg_populateNonHeapFields(int numQubits, int isDensMatr, int useDistrib, int useGpuAccel, int useMultithread) {
39
40 QuESTEnv env = getQuESTEnv();
41
42 // pre-prepare some struct fields (to avoid circular initialisation)
43 int logNumNodes = (useDistrib)?
44 logBase2(env.numNodes) : 0;
45 qindex logNumAmpsPerNode = (isDensMatr)?
46 (2*numQubits - logNumNodes) :
47 ( numQubits - logNumNodes);
48
49 // prepare output Qureg (avoiding C++20 designated initialiser)
50 Qureg out;
51
52 // bind deployment info
53 out.isMultithreaded = useMultithread;
54 out.isGpuAccelerated = useGpuAccel;
55 out.isDistributed = useDistrib;
56
57 // optionally bind distributed info, noting that in distributed environments,
58 // the non-distributed quregs are duplicated on each node and each believe
59 // they are the root node, with no other nodes existing; this is essential so
60 // that these quregs can agnostically use distributed routines which consult
61 // the rank, but it will interfere with naive root-only printing logic
62 out.rank = (useDistrib)? env.rank : 0;
63 out.numNodes = (useDistrib)? env.numNodes : 1;
64 out.logNumNodes = (useDistrib)? logBase2(env.numNodes) : 0; // duplicated for clarity
65
66 // bind dimensions
67 out.isDensityMatrix = isDensMatr;
68 out.numQubits = numQubits;
69 out.numAmps = (isDensMatr)? powerOf2(2*numQubits) : powerOf2(numQubits);
70 out.logNumAmps = (isDensMatr)? 2*numQubits : numQubits;
71
72 // bind dimensions per node (even if not distributed)
73 out.numAmpsPerNode = powerOf2(logNumAmpsPerNode);
74 out.logNumAmpsPerNode = logNumAmpsPerNode;
75 out.logNumColsPerNode = (isDensMatr)? numQubits - logNumNodes : 0; // used only by density matrices
76
77 // caller will allocate heap memory as necessary
78 out.cpuAmps = nullptr;
79 out.gpuAmps = nullptr;
80 out.cpuCommBuffer = nullptr;
81 out.gpuCommBuffer = nullptr;
82
83 return out;
84}
85
86
87
88/*
89 * PRIVATE INNER FUNCTIONS (C++)
90 */
91
92
93bool didAnyLocalAllocsFail(Qureg qureg) {
94
95 // CPU memory should always be allocated
96 if (! mem_isAllocated(qureg.cpuAmps))
97 return true;
98
99 // when distributed, the CPU communication buffer must be allocated
100 if (qureg.isDistributed && ! mem_isAllocated(qureg.cpuCommBuffer))
101 return true;
102
103 // when GPU-accelerated, the GPU memory should be allocated
104 if (qureg.isGpuAccelerated && ! mem_isAllocated(qureg.gpuAmps))
105 return true;
106
107 // when both distributed and GPU-accelerated, the GPU communication buffer must be allocated
108 if (qureg.isDistributed && qureg.isGpuAccelerated && ! mem_isAllocated(qureg.gpuCommBuffer))
109 return true;
110
111 // otherwise all pointers were non-NULL so no allocations failed
112 return false;
113}
114
115
116bool didAnyAllocsFailOnAnyNode(Qureg qureg) {
117
118 bool anyFail = didAnyLocalAllocsFail(qureg);
119 if (comm_isInit())
120 anyFail = comm_isTrueOnAllNodes(anyFail);
121
122 return anyFail;
123}
124
125
126void freeAllMemoryIfAnyAllocsFailed(Qureg qureg) {
127
128 // do nothing if everything allocated successfully between all nodes
129 if (!didAnyAllocsFailOnAnyNode(qureg))
130 return;
131
132 // otherwise, free everything that was successfully allocated (freeing nullptr is legal)
133 cpu_deallocArray(qureg.cpuAmps);
134 cpu_deallocArray(qureg.cpuCommBuffer);
135
136 // although we avoid calling GPU deallocation in non-GPU mode
137 if (qureg.isGpuAccelerated) {
138 gpu_deallocArray(qureg.gpuAmps);
139 gpu_deallocArray(qureg.gpuCommBuffer);
140 }
141}
142
143
144Qureg validateAndCreateCustomQureg(int numQubits, int isDensMatr, int useDistrib, int useGpuAccel, int useMultithread, const char* caller) {
145
146 validate_envIsInit(caller);
147 QuESTEnv env = getQuESTEnv();
148
149 // ensure deployment is compatible with environment, considering available hardware and their memory capacities
150 validate_newQuregParams(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread, env, caller);
151
152 // automatically overwrite distrib, GPU, and multithread fields which were left as modeflag::USE_AUTO
153 autodep_chooseQuregDeployment(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread, env);
154
155 Qureg qureg = qureg_populateNonHeapFields(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread);
156
157 // always allocate CPU memory
158 qureg.cpuAmps = cpu_allocNumaArray(qureg.numAmpsPerNode); // nullptr if failed
159
160 // conditionally allocate GPU memory and communication buffers (even if numNodes == 1).
161 // note that in distributed settings but where useDistrib=false, each node will have a
162 // full copy of the amplitudes, but will NOT have the communication buffers allocated.
163 qureg.gpuAmps = (useGpuAccel)? gpu_allocArray(qureg.numAmpsPerNode) : nullptr;
164 qureg.cpuCommBuffer = (useDistrib)? cpu_allocArray(qureg.numAmpsPerNode) : nullptr;
165 qureg.gpuCommBuffer = (useGpuAccel && useDistrib)? gpu_allocArray(qureg.numAmpsPerNode) : nullptr;
166
167 // if any of the above mallocs failed, below validation will memory leak; so free first (but don't set to nullptr)
168 freeAllMemoryIfAnyAllocsFailed(qureg);
169 validate_newQuregAllocs(qureg, __func__);
170
171 // initialise state to |0> or |0><0|
172 initZeroState(qureg);
173
174 return qureg;
175}
176
177
178
179/*
180 * PRIVATE QUREG REPORTING INNER FUNCTIONS
181 */
182
183
184void printDeploymentInfo(Qureg qureg) {
185
186 print_table(
187 "deployment", {
188 {"isMpiEnabled", qureg.isDistributed},
189 {"isGpuEnabled", qureg.isGpuAccelerated},
190 {"isOmpEnabled", qureg.isMultithreaded},
191 });
192}
193
194void printDimensionInfo(Qureg qureg) {
195
196 using namespace printer_substrings;
197
198 // 2^N = M
199 string ampsStr;
200 ampsStr = bt + printer_toStr(qureg.numQubits * (qureg.isDensityMatrix? 2 : 1));
201 ampsStr += eq + printer_toStr(qureg.numAmps);
202
203 string colsStr = na;
204 if (qureg.isDensityMatrix)
205 colsStr = (
206 bt + printer_toStr(qureg.numQubits) +
207 eq + printer_toStr(powerOf2(qureg.numQubits)));
208
209 print_table(
210 "dimension", {
211 {"isDensMatr", printer_toStr(qureg.isDensityMatrix)},
212 {"numQubits", printer_toStr(qureg.numQubits)},
213 {"numCols", colsStr},
214 {"numAmps", ampsStr},
215 });
216}
217
218
219void printDistributionInfo(Qureg qureg) {
220
221 using namespace printer_substrings;
222
223 // not applicable when not distributed
224 string nodesStr = na;
225 string ampsStr = na;
226 string colsStr = na;
227
228 // 2^N = M per node
229 if (qureg.isDistributed) {
230 nodesStr = bt + printer_toStr(qureg.logNumNodes) + eq + printer_toStr(qureg.numNodes);
231 ampsStr = bt + printer_toStr(qureg.logNumAmpsPerNode) + eq + printer_toStr(qureg.numAmpsPerNode) + pn;
232 if (qureg.isDensityMatrix)
233 colsStr = bt + printer_toStr(qureg.logNumColsPerNode) + eq + printer_toStr(powerOf2(qureg.logNumColsPerNode)) + pn;
234 }
235
236 print_table(
237 "distribution", {
238 {"numNodes", nodesStr},
239 {"numCols", colsStr},
240 {"numAmps", ampsStr},
241 });
242}
243
244
245void printMemoryInfo(Qureg qureg) {
246
247 using namespace printer_substrings;
248
249 size_t localArrayMem = mem_getLocalQuregMemoryRequired(qureg.numAmpsPerNode);
250 string localMemStr = printer_getMemoryWithUnitStr(localArrayMem) + (qureg.isDistributed? pn : "");
251
252 // precondition: no reportable fields are at risk of overflow as a qindex
253 // type, EXCEPT aggregate total memory between distributed nodes (in bytes)
254 qindex globalTotalMem = mem_getTotalGlobalMemoryUsed(qureg);
255 string globalMemStr = (globalTotalMem == 0)? "overflowed" : printer_getMemoryWithUnitStr(globalTotalMem);
256
257 print_table(
258 "memory", {
259 {"cpuAmps", mem_isAllocated(qureg.cpuAmps)? localMemStr : na},
260 {"gpuAmps", mem_isAllocated(qureg.gpuAmps)? localMemStr : na},
261 {"cpuCommBuffer", mem_isAllocated(qureg.cpuCommBuffer)? localMemStr : na},
262 {"gpuCommBuffer", mem_isAllocated(qureg.gpuCommBuffer)? localMemStr : na},
263 {"globalTotal", globalMemStr},
264 });
265}
266
267
268
269/*
270 * PUBLIC C & C++ AGNOSTIC FUNCTIONS
271 */
272
273// enable invocation by both C and C++ binaries
274extern "C" {
275
276
277Qureg createCustomQureg(int numQubits, int isDensMatr, int useDistrib, int useGpuAccel, int useMultithread) {
278
279 return validateAndCreateCustomQureg(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread, __func__);
280}
281
282
283Qureg createQureg(int numQubits) {
284
285 int isDensMatr = 0;
286 int autoMode = modeflag::USE_AUTO;
287 return validateAndCreateCustomQureg(numQubits, isDensMatr, autoMode, autoMode, autoMode, __func__);
288}
289
290
291Qureg createDensityQureg(int numQubits) {
292
293 int isDensMatr = 1;
294 int autoMode = modeflag::USE_AUTO;
295 return validateAndCreateCustomQureg(numQubits, isDensMatr, autoMode, autoMode, autoMode, __func__);
296}
297
298
299Qureg createForcedQureg(int numQubits) {
300 validate_envIsInit(__func__);
301
302 QuESTEnv env = getQuESTEnv();
303
304 int isDensMatr = 0;
305 return validateAndCreateCustomQureg(numQubits, isDensMatr, env.isDistributed, env.isGpuAccelerated, env.isMultithreaded, __func__);
306}
307
308
310 validate_envIsInit(__func__);
311
312 QuESTEnv env = getQuESTEnv();
313
314 int isDensMatr = 1;
315 return validateAndCreateCustomQureg(numQubits, isDensMatr, env.isDistributed, env.isGpuAccelerated, env.isMultithreaded, __func__);
316}
317
318
320 validate_quregFields(qureg, __func__);
321
322 // create a new Qureg with identical fields, but zero'd memory
323 Qureg clone = validateAndCreateCustomQureg(
324 qureg.numQubits, qureg.isDensityMatrix, qureg.isDistributed,
325 qureg.isGpuAccelerated, qureg.isMultithreaded, __func__);
326
327 setQuregToClone(clone, qureg); // harmlessly re-validates
328
329 // if GPU-accelerated, clone's CPU amps are NOT updated
330 return clone;
331}
332
333
334void destroyQureg(Qureg qureg) {
335 validate_quregFields(qureg, __func__);
336
337 // free CPU memory
338 cpu_deallocNumaArray(qureg.cpuAmps, qureg.numAmpsPerNode);
339
340 // free CPU communication buffer
341 if (qureg.isDistributed)
342 cpu_deallocArray(qureg.cpuCommBuffer);
343
344 // free GPU memory
345 if (qureg.isGpuAccelerated)
346 gpu_deallocArray(qureg.gpuAmps);
347
348 // free GPU communication buffer
349 if (qureg.isGpuAccelerated && qureg.isDistributed)
350 gpu_deallocArray(qureg.gpuCommBuffer);
351
352 // cannot set free'd fields to nullptr because qureg
353 // wasn't passed-by-reference, and isn't returned.
354}
355
356
358 validate_quregFields(qureg, __func__);
359 validate_numReportedNewlinesAboveZero(__func__); // because trailing newline mandatory
360
361 /// @todo add function to write this output to file (useful for HPC debugging)
362
363 // printer routines will consult env rank to avoid duplicate printing
364 print_label("Qureg");
365 printDeploymentInfo(qureg);
366 printDimensionInfo(qureg);
367 printDistributionInfo(qureg);
368 printMemoryInfo(qureg);
369
370 // exclude mandatory newline above
371 print_oneFewerNewlines();
372}
373
374
375void reportQureg(Qureg qureg) {
376 validate_quregFields(qureg, __func__);
377 validate_numReportedNewlinesAboveZero(__func__); // because trailing newline mandatory
378
379 // account all local CPU memory (including buffer), neglecting GPU memory
380 // because it occupies distinct memory spaces, confusing accounting
381 size_t localMem = mem_getLocalQuregMemoryRequired(qureg.numAmpsPerNode);
382 if (qureg.isDistributed)
383 localMem *= 2; // include buffer. @todo will this ever overflow?!?!
384
385 // include struct size (expected negligibly tiny)
386 localMem += sizeof(qureg);
387
388 print_header(qureg, localMem);
389 print_elems(qureg);
390
391 // exclude mandatory newline above
392 print_oneFewerNewlines();
393}
394
395
397 validate_quregFields(qureg, __func__);
398
399 // permit this to be called even in non-GPU mode
400 if (qureg.isGpuAccelerated)
401 gpu_copyCpuToGpu(qureg); // syncs then overwrites all local GPU amps
402}
404 validate_quregFields(qureg, __func__);
405
406 // permit this to be called even in non-GPU mode
407 if (qureg.isGpuAccelerated)
408 gpu_copyGpuToCpu(qureg); // syncs then overwrites all local CPU amps
409}
410
411
412void syncSubQuregToGpu(Qureg qureg, qindex localStartInd, qindex numLocalAmps) {
413 validate_quregFields(qureg, __func__);
414 validate_localAmpIndices(qureg, localStartInd, numLocalAmps, __func__);
415
416 // the above validation communicates for node consensus in
417 // distributed settings, because params can differ per-node.
418 // note also this function accepts statevectors AND density
419 // matrices, because the latter does not need a bespoke
420 // (row,col) interface, because the user can only access/modify
421 // local density matrix amps via a flat index anyway!
422
423 // we permit this function to do nothing when not GPU-accelerated
424 if (!qureg.isGpuAccelerated)
425 return;
426
427 // otherwise, every node merely copies its local subset, which
428 // may differ per-node, in an embarrassingly parallel manner
429 gpu_copyCpuToGpu(&qureg.cpuAmps[localStartInd], &qureg.gpuAmps[localStartInd], numLocalAmps);
430}
431void syncSubQuregFromGpu(Qureg qureg, qindex localStartInd, qindex numLocalAmps) {
432 validate_quregFields(qureg, __func__);
433 validate_localAmpIndices(qureg, localStartInd, numLocalAmps, __func__);
434
435 // the above validation communicates for node consensus in
436 // distributed settings, because params can differ per-node.
437 // note also this function accepts statevectors AND density
438 // matrices, because the latter does not need a bespoke
439 // (row,col) interface, because the user can only access/modify
440 // local density matrix amps via a flat index anyway!
441
442 // we permit this function to do nothing when not GPU-accelerated
443 if (!qureg.isGpuAccelerated)
444 return;
445
446 // otherwise, every node merely copies its local subset, which
447 // may differ per-node, in an embarrassingly parallel manner
448 gpu_copyGpuToCpu(&qureg.gpuAmps[localStartInd], &qureg.cpuAmps[localStartInd], numLocalAmps);
449}
450
451
452void getQuregAmps(qcomp* outAmps, Qureg qureg, qindex startInd, qindex numAmps) {
453 validate_quregFields(qureg, __func__);
454 validate_quregIsStateVector(qureg, __func__);
455 validate_basisStateIndices(qureg, startInd, numAmps, __func__);
456
457 localiser_statevec_getAmps(outAmps, qureg, startInd, numAmps);
458}
459
460
461void getDensityQuregAmps(qcomp** outAmps, Qureg qureg, qindex startRow, qindex startCol, qindex numRows, qindex numCols) {
462 validate_quregFields(qureg, __func__);
463 validate_quregIsDensityMatrix(qureg, __func__);
464 validate_basisStateRowCols(qureg, startRow, startCol, numRows, numCols, __func__);
465
466 localiser_densmatr_getAmps(outAmps, qureg, startRow, startCol, numRows, numCols);
467}
468
469
470
471// end de-mangler
472}
473
474
475
476/*
477 * C++ ONLY FUNCTIONS
478 *
479 * which are not directly C-compatible because of limited
480 * interoperability of the qcomp type. See calculations.h
481 * for more info. We here define a C++-only signature (with
482 * name-mangling), and a C-friendly wrapper which passes by
483 * pointer; the C-friendly interface in wrappers.h which itself
484 * wrap this.
485 */
486
487
488qcomp getQuregAmp(Qureg qureg, qindex index) {
489 validate_quregFields(qureg, __func__);
490 validate_quregIsStateVector(qureg, __func__);
491 validate_basisStateIndex(qureg, index, __func__);
492
493 return localiser_statevec_getAmp(qureg, index);
494}
495extern "C" void _wrap_getQuregAmp(qcomp* out, Qureg qureg, qindex index) {
496
497 *out = getQuregAmp(qureg, index);
498}
499
500
501qcomp getDensityQuregAmp(Qureg qureg, qindex row, qindex column) {
502 validate_quregFields(qureg, __func__);
503 validate_quregIsDensityMatrix(qureg, __func__);
504 validate_basisStateRowCol(qureg, row, column, __func__);
505
506 qindex ind = util_getGlobalFlatIndex(qureg, row, column);
507 qcomp amp = localiser_statevec_getAmp(qureg, ind);
508 return amp;
509}
510extern "C" void _wrap_getDensityQuregAmp(qcomp* out, Qureg qureg, qindex row, qindex column) {
511
512 *out = getDensityQuregAmp(qureg, row, column);
513}
514
515
516
517/*
518 * C++ OVERLOADS
519 */
520
521
522vector<qcomp> getQuregAmps(Qureg qureg, qindex startInd, qindex numAmps) {
523
524 // allocate the output vector, and validate successful
525 vector<qcomp> out;
526 auto callback = [&]() { validate_tempAllocSucceeded(false, numAmps, sizeof(qcomp), __func__); };
527 util_tryAllocVector(out, numAmps, callback);
528
529 // performs main validation
530 getQuregAmps(out.data(), qureg, startInd, numAmps);
531 return out;
532}
533
534
535vector<vector<qcomp>> getDensityQuregAmps(Qureg qureg, qindex startRow, qindex startCol, qindex numRows, qindex numCols) {
536
537 // allocate the output matrix, and validate successful
538 vector<vector<qcomp>> out;
539 qindex numElems = numRows * numCols; // never overflows (else Qureg alloc would fail)
540 auto callback1 = [&]() { validate_tempAllocSucceeded(false, numElems, sizeof(qcomp), __func__); };
541 util_tryAllocMatrix(out, numRows, numCols, callback1);
542
543 // we must pass nested pointers to core C function, requiring another temp array, also validated
544 vector<qcomp*> ptrs;
545 auto callback2 = [&]() { validate_tempAllocSucceeded(false, numRows, sizeof(qcomp*), __func__); };
546 util_tryAllocVector(ptrs, numRows, callback2);
547
548 // embed out pointers
549 for (qindex i=0; i<numRows; i++)
550 ptrs[i] = out[i].data();
551
552 // modify out through its ptrs
553 getDensityQuregAmps(ptrs.data(), qureg, startRow, startCol, numRows, numCols);
554 return out;
555}
QuESTEnv getQuESTEnv()
void setQuregToClone(Qureg outQureg, Qureg inQureg)
void initZeroState(Qureg qureg)
Qureg createDensityQureg(int numQubits)
Definition qureg.cpp:291
Qureg createForcedQureg(int numQubits)
Definition qureg.cpp:299
Qureg createForcedDensityQureg(int numQubits)
Definition qureg.cpp:309
Qureg createCloneQureg(Qureg qureg)
Definition qureg.cpp:319
Qureg createCustomQureg(int numQubits, int isDensMatr, int useDistrib, int useGpuAccel, int useMultithread)
Definition qureg.cpp:277
Qureg createQureg(int numQubits)
Definition qureg.cpp:283
void destroyQureg(Qureg qureg)
Definition qureg.cpp:334
qcomp getQuregAmp(Qureg qureg, qindex index)
Definition qureg.cpp:488
void getDensityQuregAmps(qcomp **outAmps, Qureg qureg, qindex startRow, qindex startCol, qindex numRows, qindex numCols)
Definition qureg.cpp:461
void getQuregAmps(qcomp *outAmps, Qureg qureg, qindex startInd, qindex numAmps)
Definition qureg.cpp:452
qcomp getDensityQuregAmp(Qureg qureg, qindex row, qindex column)
Definition qureg.cpp:501
void reportQureg(Qureg qureg)
Definition qureg.cpp:375
void reportQuregParams(Qureg qureg)
Definition qureg.cpp:357
void syncQuregFromGpu(Qureg qureg)
Definition qureg.cpp:403
void syncSubQuregToGpu(Qureg qureg, qindex localStartInd, qindex numLocalAmps)
Definition qureg.cpp:412
void syncSubQuregFromGpu(Qureg qureg, qindex localStartInd, qindex numLocalAmps)
Definition qureg.cpp:431
void syncQuregToGpu(Qureg qureg)
Definition qureg.cpp:396
Definition qureg.h:49