The Quantum Exact Simulation Toolkit v4.1.0
Loading...
Searching...
No Matches
qureg.cpp
1/** @file
2 * API definitions for creating and managing Quregs,
3 * and automatically choosing their deployment modes.
4 *
5 * @author Tyson Jones
6 */
7
8#include "quest/include/qureg.h"
9#include "quest/include/environment.h"
10#include "quest/include/initialisations.h"
11
12#include "quest/src/core/validation.hpp"
13#include "quest/src/core/autodeployer.hpp"
14#include "quest/src/core/printer.hpp"
15#include "quest/src/core/bitwise.hpp"
16#include "quest/src/core/memory.hpp"
17#include "quest/src/core/utilities.hpp"
18#include "quest/src/core/localiser.hpp"
19#include "quest/src/comm/comm_config.hpp"
20#include "quest/src/comm/comm_routines.hpp"
21#include "quest/src/cpu/cpu_config.hpp"
22#include "quest/src/gpu/gpu_config.hpp"
23
24#include <string>
25#include <vector>
26
27using std::string;
28using std::vector;
29
30
31
32/*
33 * INTERNALLY EXPOSED FUNCTION
34 */
35
36
37Qureg qureg_populateNonHeapFields(int numQubits, int isDensMatr, int useDistrib, int useGpuAccel, int useMultithread) {
38
39 QuESTEnv env = getQuESTEnv();
40
41 // pre-prepare some struct fields (to avoid circular initialisation)
42 int logNumNodes = (useDistrib)?
43 logBase2(env.numNodes) : 0;
44 qindex logNumAmpsPerNode = (isDensMatr)?
45 (2*numQubits - logNumNodes) :
46 ( numQubits - logNumNodes);
47
48 // prepare output Qureg (avoiding C++20 designated initialiser)
49 Qureg out;
50
51 // bind deployment info
52 out.isMultithreaded = useMultithread;
53 out.isGpuAccelerated = useGpuAccel;
54 out.isDistributed = useDistrib;
55
56 // optionally bind distributed info, noting that in distributed environments,
57 // the non-distributed quregs are duplicated on each node and each believe
58 // they are the root node, with no other nodes existing; this is essential so
59 // that these quregs can agnostically use distributed routines which consult
60 // the rank, but it will interfere with naive root-only printing logic
61 out.rank = (useDistrib)? env.rank : 0;
62 out.numNodes = (useDistrib)? env.numNodes : 1;
63 out.logNumNodes = (useDistrib)? logBase2(env.numNodes) : 0; // duplicated for clarity
64
65 // bind dimensions
66 out.isDensityMatrix = isDensMatr;
67 out.numQubits = numQubits;
68 out.numAmps = (isDensMatr)? powerOf2(2*numQubits) : powerOf2(numQubits);
69 out.logNumAmps = (isDensMatr)? 2*numQubits : numQubits;
70
71 // bind dimensions per node (even if not distributed)
72 out.numAmpsPerNode = powerOf2(logNumAmpsPerNode);
73 out.logNumAmpsPerNode = logNumAmpsPerNode;
74 out.logNumColsPerNode = (isDensMatr)? numQubits - logNumNodes : 0; // used only by density matrices
75
76 // caller will allocate heap memory as necessary
77 out.cpuAmps = nullptr;
78 out.gpuAmps = nullptr;
79 out.cpuCommBuffer = nullptr;
80 out.gpuCommBuffer = nullptr;
81
82 return out;
83}
84
85
86
87/*
88 * PRIVATE INNER FUNCTIONS (C++)
89 */
90
91
92bool didAnyLocalAllocsFail(Qureg qureg) {
93
94 // CPU memory should always be allocated
95 if (! mem_isAllocated(qureg.cpuAmps))
96 return true;
97
98 // when distributed, the CPU communication buffer must be allocated
99 if (qureg.isDistributed && ! mem_isAllocated(qureg.cpuCommBuffer))
100 return true;
101
102 // when GPU-accelerated, the GPU memory should be allocated
103 if (qureg.isGpuAccelerated && ! mem_isAllocated(qureg.gpuAmps))
104 return true;
105
106 // when both distributed and GPU-accelerated, the GPU communication buffer must be allocated
107 if (qureg.isDistributed && qureg.isGpuAccelerated && ! mem_isAllocated(qureg.gpuCommBuffer))
108 return true;
109
110 // otherwise all pointers were non-NULL so no allocations failed
111 return false;
112}
113
114
115bool didAnyAllocsFailOnAnyNode(Qureg qureg) {
116
117 bool anyFail = didAnyLocalAllocsFail(qureg);
118 if (comm_isInit())
119 anyFail = comm_isTrueOnAllNodes(anyFail);
120
121 return anyFail;
122}
123
124
125void freeAllMemoryIfAnyAllocsFailed(Qureg qureg) {
126
127 // do nothing if everything allocated successfully between all nodes
128 if (!didAnyAllocsFailOnAnyNode(qureg))
129 return;
130
131 // otherwise, free everything that was successfully allocated (freeing nullptr is legal)
132 cpu_deallocArray(qureg.cpuAmps);
133 cpu_deallocArray(qureg.cpuCommBuffer);
134
135 // although we avoid calling GPU deallocation in non-GPU mode
136 if (qureg.isGpuAccelerated) {
137 gpu_deallocArray(qureg.gpuAmps);
138 gpu_deallocArray(qureg.gpuCommBuffer);
139 }
140}
141
142
143Qureg validateAndCreateCustomQureg(int numQubits, int isDensMatr, int useDistrib, int useGpuAccel, int useMultithread, const char* caller) {
144
145 validate_envIsInit(caller);
146 QuESTEnv env = getQuESTEnv();
147
148 // ensure deployment is compatible with environment, considering available hardware and their memory capacities
149 validate_newQuregParams(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread, env, caller);
150
151 // automatically overwrite distrib, GPU, and multithread fields which were left as modeflag::USE_AUTO
152 autodep_chooseQuregDeployment(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread, env);
153
154 Qureg qureg = qureg_populateNonHeapFields(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread);
155
156 // always allocate CPU memory
157 qureg.cpuAmps = cpu_allocArray(qureg.numAmpsPerNode); // nullptr if failed
158
159 // conditionally allocate GPU memory and communication buffers (even if numNodes == 1).
160 // note that in distributed settings but where useDistrib=false, each node will have a
161 // full copy of the amplitudes, but will NOT have the communication buffers allocated.
162 qureg.gpuAmps = (useGpuAccel)? gpu_allocArray(qureg.numAmpsPerNode) : nullptr;
163 qureg.cpuCommBuffer = (useDistrib)? cpu_allocArray(qureg.numAmpsPerNode) : nullptr;
164 qureg.gpuCommBuffer = (useGpuAccel && useDistrib)? gpu_allocArray(qureg.numAmpsPerNode) : nullptr;
165
166 // if any of the above mallocs failed, below validation will memory leak; so free first (but don't set to nullptr)
167 freeAllMemoryIfAnyAllocsFailed(qureg);
168 validate_newQuregAllocs(qureg, __func__);
169
170 // initialise state to |0> or |0><0|
171 initZeroState(qureg);
172
173 return qureg;
174}
175
176
177
178/*
179 * PRIVATE QUREG REPORTING INNER FUNCTIONS
180 */
181
182
183void printDeploymentInfo(Qureg qureg) {
184
185 print_table(
186 "deployment", {
187 {"isMpiEnabled", qureg.isDistributed},
188 {"isGpuEnabled", qureg.isGpuAccelerated},
189 {"isOmpEnabled", qureg.isMultithreaded},
190 });
191}
192
193void printDimensionInfo(Qureg qureg) {
194
195 using namespace printer_substrings;
196
197 // 2^N = M
198 string ampsStr;
199 ampsStr = bt + printer_toStr(qureg.numQubits * (qureg.isDensityMatrix? 2 : 1));
200 ampsStr += eq + printer_toStr(qureg.numAmps);
201
202 string colsStr = na;
203 if (qureg.isDensityMatrix)
204 colsStr = (
205 bt + printer_toStr(qureg.numQubits) +
206 eq + printer_toStr(powerOf2(qureg.numQubits)));
207
208 print_table(
209 "dimension", {
210 {"isDensMatr", printer_toStr(qureg.isDensityMatrix)},
211 {"numQubits", printer_toStr(qureg.numQubits)},
212 {"numCols", colsStr},
213 {"numAmps", ampsStr},
214 });
215}
216
217
218void printDistributionInfo(Qureg qureg) {
219
220 using namespace printer_substrings;
221
222 // not applicable when not distributed
223 string nodesStr = na;
224 string ampsStr = na;
225 string colsStr = na;
226
227 // 2^N = M per node
228 if (qureg.isDistributed) {
229 nodesStr = bt + printer_toStr(qureg.logNumNodes) + eq + printer_toStr(qureg.numNodes);
230 ampsStr = bt + printer_toStr(qureg.logNumAmpsPerNode) + eq + printer_toStr(qureg.numAmpsPerNode) + pn;
231 if (qureg.isDensityMatrix)
232 colsStr = bt + printer_toStr(qureg.logNumColsPerNode) + eq + printer_toStr(powerOf2(qureg.logNumColsPerNode)) + pn;
233 }
234
235 print_table(
236 "distribution", {
237 {"numNodes", nodesStr},
238 {"numCols", colsStr},
239 {"numAmps", ampsStr},
240 });
241}
242
243
244void printMemoryInfo(Qureg qureg) {
245
246 using namespace printer_substrings;
247
248 size_t localArrayMem = mem_getLocalQuregMemoryRequired(qureg.numAmpsPerNode);
249 string localMemStr = printer_getMemoryWithUnitStr(localArrayMem) + (qureg.isDistributed? pn : "");
250
251 // precondition: no reportable fields are at risk of overflow as a qindex
252 // type, EXCEPT aggregate total memory between distributed nodes (in bytes)
253 qindex globalTotalMem = mem_getTotalGlobalMemoryUsed(qureg);
254 string globalMemStr = (globalTotalMem == 0)? "overflowed" : printer_getMemoryWithUnitStr(globalTotalMem);
255
256 print_table(
257 "memory", {
258 {"cpuAmps", mem_isAllocated(qureg.cpuAmps)? localMemStr : na},
259 {"gpuAmps", mem_isAllocated(qureg.gpuAmps)? localMemStr : na},
260 {"cpuCommBuffer", mem_isAllocated(qureg.cpuCommBuffer)? localMemStr : na},
261 {"gpuCommBuffer", mem_isAllocated(qureg.gpuCommBuffer)? localMemStr : na},
262 {"globalTotal", globalMemStr},
263 });
264}
265
266
267
268/*
269 * PUBLIC C & C++ AGNOSTIC FUNCTIONS
270 */
271
272// enable invocation by both C and C++ binaries
273extern "C" {
274
275
276Qureg createCustomQureg(int numQubits, int isDensMatr, int useDistrib, int useGpuAccel, int useMultithread) {
277
278 return validateAndCreateCustomQureg(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread, __func__);
279}
280
281
282Qureg createQureg(int numQubits) {
283
284 int isDensMatr = 0;
285 int autoMode = modeflag::USE_AUTO;
286 return validateAndCreateCustomQureg(numQubits, isDensMatr, autoMode, autoMode, autoMode, __func__);
287}
288
289
290Qureg createDensityQureg(int numQubits) {
291
292 int isDensMatr = 1;
293 int autoMode = modeflag::USE_AUTO;
294 return validateAndCreateCustomQureg(numQubits, isDensMatr, autoMode, autoMode, autoMode, __func__);
295}
296
297
298Qureg createForcedQureg(int numQubits) {
299 validate_envIsInit(__func__);
300
301 QuESTEnv env = getQuESTEnv();
302
303 int isDensMatr = 0;
304 return validateAndCreateCustomQureg(numQubits, isDensMatr, env.isDistributed, env.isGpuAccelerated, env.isMultithreaded, __func__);
305}
306
307
309 validate_envIsInit(__func__);
310
311 QuESTEnv env = getQuESTEnv();
312
313 int isDensMatr = 1;
314 return validateAndCreateCustomQureg(numQubits, isDensMatr, env.isDistributed, env.isGpuAccelerated, env.isMultithreaded, __func__);
315}
316
317
319 validate_quregFields(qureg, __func__);
320
321 // create a new Qureg with identical fields, but zero'd memory
322 Qureg clone = validateAndCreateCustomQureg(
323 qureg.numQubits, qureg.isDensityMatrix, qureg.isDistributed,
324 qureg.isGpuAccelerated, qureg.isMultithreaded, __func__);
325
326 setQuregToClone(clone, qureg); // harmlessly re-validates
327
328 // if GPU-accelerated, clone's CPU amps are NOT updated
329 return clone;
330}
331
332
333void destroyQureg(Qureg qureg) {
334 validate_quregFields(qureg, __func__);
335
336 // free CPU memory
337 cpu_deallocArray(qureg.cpuAmps);
338
339 // free CPU communication buffer
340 if (qureg.isDistributed)
341 cpu_deallocArray(qureg.cpuCommBuffer);
342
343 // free GPU memory
344 if (qureg.isGpuAccelerated)
345 gpu_deallocArray(qureg.gpuAmps);
346
347 // free GPU communication buffer
348 if (qureg.isGpuAccelerated && qureg.isDistributed)
349 gpu_deallocArray(qureg.gpuCommBuffer);
350
351 // cannot set free'd fields to nullptr because qureg
352 // wasn't passed-by-reference, and isn't returned.
353}
354
355
357 validate_quregFields(qureg, __func__);
358 validate_numReportedNewlinesAboveZero(__func__); // because trailing newline mandatory
359
360 /// @todo add function to write this output to file (useful for HPC debugging)
361
362 // printer routines will consult env rank to avoid duplicate printing
363 print_label("Qureg");
364 printDeploymentInfo(qureg);
365 printDimensionInfo(qureg);
366 printDistributionInfo(qureg);
367 printMemoryInfo(qureg);
368
369 // exclude mandatory newline above
370 print_oneFewerNewlines();
371}
372
373
374void reportQureg(Qureg qureg) {
375 validate_quregFields(qureg, __func__);
376 validate_numReportedNewlinesAboveZero(__func__); // because trailing newline mandatory
377
378 // account all local CPU memory (including buffer), neglecting GPU memory
379 // because it occupies distinct memory spaces, confusing accounting
380 size_t localMem = mem_getLocalQuregMemoryRequired(qureg.numAmpsPerNode);
381 if (qureg.isDistributed)
382 localMem *= 2; // include buffer. @todo will this ever overflow?!?!
383
384 // include struct size (expected negligibly tiny)
385 localMem += sizeof(qureg);
386
387 print_header(qureg, localMem);
388 print_elems(qureg);
389
390 // exclude mandatory newline above
391 print_oneFewerNewlines();
392}
393
394
396 validate_quregFields(qureg, __func__);
397
398 // permit this to be called even in non-GPU mode
399 if (qureg.isGpuAccelerated)
400 gpu_copyCpuToGpu(qureg); // syncs then overwrites all local GPU amps
401}
403 validate_quregFields(qureg, __func__);
404
405 // permit this to be called even in non-GPU mode
406 if (qureg.isGpuAccelerated)
407 gpu_copyGpuToCpu(qureg); // syncs then overwrites all local CPU amps
408}
409
410
411void syncSubQuregToGpu(Qureg qureg, qindex localStartInd, qindex numLocalAmps) {
412 validate_quregFields(qureg, __func__);
413 validate_localAmpIndices(qureg, localStartInd, numLocalAmps, __func__);
414
415 // the above validation communicates for node consensus in
416 // distributed settings, because params can differ per-node.
417 // note also this function accepts statevectors AND density
418 // matrices, because the latter does not need a bespoke
419 // (row,col) interface, because the user can only access/modify
420 // local density matrix amps via a flat index anyway!
421
422 // we permit this function to do nothing when not GPU-accelerated
423 if (!qureg.isGpuAccelerated)
424 return;
425
426 // otherwise, every node merely copies its local subset, which
427 // may differ per-node, in an embarrassingly parallel manner
428 gpu_copyCpuToGpu(&qureg.cpuAmps[localStartInd], &qureg.gpuAmps[localStartInd], numLocalAmps);
429}
430void syncSubQuregFromGpu(Qureg qureg, qindex localStartInd, qindex numLocalAmps) {
431 validate_quregFields(qureg, __func__);
432 validate_localAmpIndices(qureg, localStartInd, numLocalAmps, __func__);
433
434 // the above validation communicates for node consensus in
435 // distributed settings, because params can differ per-node.
436 // note also this function accepts statevectors AND density
437 // matrices, because the latter does not need a bespoke
438 // (row,col) interface, because the user can only access/modify
439 // local density matrix amps via a flat index anyway!
440
441 // we permit this function to do nothing when not GPU-accelerated
442 if (!qureg.isGpuAccelerated)
443 return;
444
445 // otherwise, every node merely copies its local subset, which
446 // may differ per-node, in an embarrassingly parallel manner
447 gpu_copyGpuToCpu(&qureg.gpuAmps[localStartInd], &qureg.cpuAmps[localStartInd], numLocalAmps);
448}
449
450
451void getQuregAmps(qcomp* outAmps, Qureg qureg, qindex startInd, qindex numAmps) {
452 validate_quregFields(qureg, __func__);
453 validate_quregIsStateVector(qureg, __func__);
454 validate_basisStateIndices(qureg, startInd, numAmps, __func__);
455
456 localiser_statevec_getAmps(outAmps, qureg, startInd, numAmps);
457}
458
459
460void getDensityQuregAmps(qcomp** outAmps, Qureg qureg, qindex startRow, qindex startCol, qindex numRows, qindex numCols) {
461 validate_quregFields(qureg, __func__);
462 validate_quregIsDensityMatrix(qureg, __func__);
463 validate_basisStateRowCols(qureg, startRow, startCol, numRows, numCols, __func__);
464
465 localiser_densmatr_getAmps(outAmps, qureg, startRow, startCol, numRows, numCols);
466}
467
468
469
470// end de-mangler
471}
472
473
474
475/*
476 * C++ ONLY FUNCTIONS
477 *
478 * which are not directly C-compatible because of limited
479 * interoperability of the qcomp type. See calculations.h
480 * for more info. We here define a C++-only signature (with
481 * name-mangling), and a C-friendly wrapper which passes by
482 * pointer; the C-friendly interface in wrappers.h which itself
483 * wrap this.
484 */
485
486
487qcomp getQuregAmp(Qureg qureg, qindex index) {
488 validate_quregFields(qureg, __func__);
489 validate_quregIsStateVector(qureg, __func__);
490 validate_basisStateIndex(qureg, index, __func__);
491
492 return localiser_statevec_getAmp(qureg, index);
493}
494extern "C" void _wrap_getQuregAmp(qcomp* out, Qureg qureg, qindex index) {
495
496 *out = getQuregAmp(qureg, index);
497}
498
499
500qcomp getDensityQuregAmp(Qureg qureg, qindex row, qindex column) {
501 validate_quregFields(qureg, __func__);
502 validate_quregIsDensityMatrix(qureg, __func__);
503 validate_basisStateRowCol(qureg, row, column, __func__);
504
505 qindex ind = util_getGlobalFlatIndex(qureg, row, column);
506 qcomp amp = localiser_statevec_getAmp(qureg, ind);
507 return amp;
508}
509extern "C" void _wrap_getDensityQuregAmp(qcomp* out, Qureg qureg, qindex row, qindex column) {
510
511 *out = getDensityQuregAmp(qureg, row, column);
512}
513
514
515
516/*
517 * C++ OVERLOADS
518 */
519
520
521vector<qcomp> getQuregAmps(Qureg qureg, qindex startInd, qindex numAmps) {
522
523 // allocate the output vector, and validate successful
524 vector<qcomp> out;
525 auto callback = [&]() { validate_tempAllocSucceeded(false, numAmps, sizeof(qcomp), __func__); };
526 util_tryAllocVector(out, numAmps, callback);
527
528 // performs main validation
529 getQuregAmps(out.data(), qureg, startInd, numAmps);
530 return out;
531}
532
533
534vector<vector<qcomp>> getDensityQuregAmps(Qureg qureg, qindex startRow, qindex startCol, qindex numRows, qindex numCols) {
535
536 // allocate the output matrix, and validate successful
537 vector<vector<qcomp>> out;
538 qindex numElems = numRows * numCols; // never overflows (else Qureg alloc would fail)
539 auto callback1 = [&]() { validate_tempAllocSucceeded(false, numElems, sizeof(qcomp), __func__); };
540 util_tryAllocMatrix(out, numRows, numCols, callback1);
541
542 // we must pass nested pointers to core C function, requiring another temp array, also validated
543 vector<qcomp*> ptrs;
544 auto callback2 = [&]() { validate_tempAllocSucceeded(false, numRows, sizeof(qcomp*), __func__); };
545 util_tryAllocVector(ptrs, numRows, callback2);
546
547 // embed out pointers
548 for (qindex i=0; i<numRows; i++)
549 ptrs[i] = out[i].data();
550
551 // modify out through its ptrs
552 getDensityQuregAmps(ptrs.data(), qureg, startRow, startCol, numRows, numCols);
553 return out;
554}
QuESTEnv getQuESTEnv()
void setQuregToClone(Qureg targetQureg, Qureg copyQureg)
void initZeroState(Qureg qureg)
Qureg createDensityQureg(int numQubits)
Definition qureg.cpp:290
Qureg createForcedQureg(int numQubits)
Definition qureg.cpp:298
Qureg createForcedDensityQureg(int numQubits)
Definition qureg.cpp:308
Qureg createCloneQureg(Qureg qureg)
Definition qureg.cpp:318
Qureg createCustomQureg(int numQubits, int isDensMatr, int useDistrib, int useGpuAccel, int useMultithread)
Definition qureg.cpp:276
Qureg createQureg(int numQubits)
Definition qureg.cpp:282
void destroyQureg(Qureg qureg)
Definition qureg.cpp:333
qcomp getQuregAmp(Qureg qureg, qindex index)
Definition qureg.cpp:487
void getDensityQuregAmps(qcomp **outAmps, Qureg qureg, qindex startRow, qindex startCol, qindex numRows, qindex numCols)
Definition qureg.cpp:460
void getQuregAmps(qcomp *outAmps, Qureg qureg, qindex startInd, qindex numAmps)
Definition qureg.cpp:451
qcomp getDensityQuregAmp(Qureg qureg, qindex row, qindex column)
Definition qureg.cpp:500
void reportQureg(Qureg qureg)
Definition qureg.cpp:374
void reportQuregParams(Qureg qureg)
Definition qureg.cpp:356
void syncQuregFromGpu(Qureg qureg)
Definition qureg.cpp:402
void syncSubQuregToGpu(Qureg qureg, qindex localStartInd, qindex numLocalAmps)
Definition qureg.cpp:411
void syncSubQuregFromGpu(Qureg qureg, qindex localStartInd, qindex numLocalAmps)
Definition qureg.cpp:430
void syncQuregToGpu(Qureg qureg)
Definition qureg.cpp:395
Definition qureg.h:49