The Quantum Exact Simulation Toolkit v4.0.0
Loading...
Searching...
No Matches
qureg.cpp
1/** @file
2 * API definitions for creating and managing Quregs,
3 * and automatically choosing their deployment modes.
4 *
5 * @author Tyson Jones
6 */
7
8#include "quest/include/qureg.h"
9#include "quest/include/environment.h"
10#include "quest/include/initialisations.h"
11
12#include "quest/src/core/validation.hpp"
13#include "quest/src/core/autodeployer.hpp"
14#include "quest/src/core/printer.hpp"
15#include "quest/src/core/bitwise.hpp"
16#include "quest/src/core/memory.hpp"
17#include "quest/src/core/utilities.hpp"
18#include "quest/src/core/localiser.hpp"
19#include "quest/src/comm/comm_config.hpp"
20#include "quest/src/comm/comm_routines.hpp"
21#include "quest/src/cpu/cpu_config.hpp"
22#include "quest/src/gpu/gpu_config.hpp"
23
24#include <string>
25
26using std::string;
27
28
29
30/*
31 * INTERNALLY EXPOSED FUNCTION
32 */
33
34
35Qureg qureg_populateNonHeapFields(int numQubits, int isDensMatr, int useDistrib, int useGpuAccel, int useMultithread) {
36
37 QuESTEnv env = getQuESTEnv();
38
39 // pre-prepare some struct fields (to avoid circular initialisation)
40 int logNumNodes = (useDistrib)?
41 logBase2(env.numNodes) : 0;
42 qindex logNumAmpsPerNode = (isDensMatr)?
43 (2*numQubits - logNumNodes) :
44 ( numQubits - logNumNodes);
45
46 return {
47 // bind deployment info
48 .isMultithreaded = useMultithread,
49 .isGpuAccelerated = useGpuAccel,
50 .isDistributed = useDistrib,
51
52 // optionally bind distributed info, noting that in distributed environments,
53 // the non-distributed quregs are duplicated on each node and each believe
54 // they are the root node, with no other nodes existing; this is essential so
55 // that these quregs can agnostically use distributed routines which consult
56 // the rank, but it will interfere with naive root-only printing logic
57 .rank = (useDistrib)? env.rank : 0,
58 .numNodes = (useDistrib)? env.numNodes : 1,
59 .logNumNodes = (useDistrib)? logBase2(env.numNodes) : 0, // duplicated for clarity
60
61 // set dimensions
62 .isDensityMatrix = isDensMatr,
63 .numQubits = numQubits,
64 .numAmps = (isDensMatr)? powerOf2(2*numQubits) : powerOf2(numQubits),
65 .logNumAmps = (isDensMatr)? 2*numQubits : numQubits,
66
67 // set dimensions per node (even if not distributed)
68 .numAmpsPerNode = powerOf2(logNumAmpsPerNode),
69 .logNumAmpsPerNode = logNumAmpsPerNode,
70 .logNumColsPerNode = (isDensMatr)? numQubits - logNumNodes : 0, // used only by density matrices
71
72 // caller will allocate heap memory as necessary
73 .cpuAmps = nullptr,
74 .gpuAmps = nullptr,
75 .cpuCommBuffer = nullptr,
76 .gpuCommBuffer = nullptr
77 };
78}
79
80
81
82/*
83 * PRIVATE INNER FUNCTIONS (C++)
84 */
85
86
87bool didAnyLocalAllocsFail(Qureg qureg) {
88
89 // CPU memory should always be allocated
90 if (! mem_isAllocated(qureg.cpuAmps))
91 return true;
92
93 // when distributed, the CPU communication buffer must be allocated
94 if (qureg.isDistributed && ! mem_isAllocated(qureg.cpuCommBuffer))
95 return true;
96
97 // when GPU-accelerated, the GPU memory should be allocated
98 if (qureg.isGpuAccelerated && ! mem_isAllocated(qureg.gpuAmps))
99 return true;
100
101 // when both distributed and GPU-accelerated, the GPU communication buffer must be allocated
102 if (qureg.isDistributed && qureg.isGpuAccelerated && ! mem_isAllocated(qureg.gpuCommBuffer))
103 return true;
104
105 // otherwise all pointers were non-NULL so no allocations failed
106 return false;
107}
108
109
110bool didAnyAllocsFailOnAnyNode(Qureg qureg) {
111
112 bool anyFail = didAnyLocalAllocsFail(qureg);
113 if (comm_isInit())
114 anyFail = comm_isTrueOnAllNodes(anyFail);
115
116 return anyFail;
117}
118
119
120void freeAllMemoryIfAnyAllocsFailed(Qureg qureg) {
121
122 // do nothing if everything allocated successfully between all nodes
123 if (!didAnyAllocsFailOnAnyNode(qureg))
124 return;
125
126 // otherwise, free everything that was successfully allocated (freeing nullptr is legal)
127 cpu_deallocArray(qureg.cpuAmps);
128 cpu_deallocArray(qureg.cpuCommBuffer);
129
130 // although we avoid calling GPU deallocation in non-GPU mode
131 if (qureg.isGpuAccelerated) {
132 gpu_deallocArray(qureg.gpuAmps);
133 gpu_deallocArray(qureg.gpuCommBuffer);
134 }
135}
136
137
138Qureg validateAndCreateCustomQureg(int numQubits, int isDensMatr, int useDistrib, int useGpuAccel, int useMultithread, const char* caller) {
139
140 validate_envIsInit(caller);
141 QuESTEnv env = getQuESTEnv();
142
143 // ensure deployment is compatible with environment, considering available hardware and their memory capacities
144 validate_newQuregParams(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread, env, caller);
145
146 // automatically overwrite distrib, GPU, and multithread fields which were left as modeflag::USE_AUTO
147 autodep_chooseQuregDeployment(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread, env);
148
149 Qureg qureg = qureg_populateNonHeapFields(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread);
150
151 // always allocate CPU memory
152 qureg.cpuAmps = cpu_allocArray(qureg.numAmpsPerNode); // nullptr if failed
153
154 // conditionally allocate GPU memory and communication buffers (even if numNodes == 1).
155 // note that in distributed settings but where useDistrib=false, each node will have a
156 // full copy of the amplitudes, but will NOT have the communication buffers allocated.
157 qureg.gpuAmps = (useGpuAccel)? gpu_allocArray(qureg.numAmpsPerNode) : nullptr;
158 qureg.cpuCommBuffer = (useDistrib)? cpu_allocArray(qureg.numAmpsPerNode) : nullptr;
159 qureg.gpuCommBuffer = (useGpuAccel && useDistrib)? gpu_allocArray(qureg.numAmpsPerNode) : nullptr;
160
161 // if any of the above mallocs failed, below validation will memory leak; so free first (but don't set to nullptr)
162 freeAllMemoryIfAnyAllocsFailed(qureg);
163 validate_newQuregAllocs(qureg, __func__);
164
165 // initialise state to |0> or |0><0|
166 initZeroState(qureg);
167
168 return qureg;
169}
170
171
172
173/*
174 * PRIVATE QUREG REPORTING INNER FUNCTIONS
175 */
176
177
178void printDeploymentInfo(Qureg qureg) {
179
180 print_table(
181 "deployment", {
182 {"isMpiEnabled", qureg.isDistributed},
183 {"isGpuEnabled", qureg.isGpuAccelerated},
184 {"isOmpEnabled", qureg.isMultithreaded},
185 });
186}
187
188void printDimensionInfo(Qureg qureg) {
189
190 using namespace printer_substrings;
191
192 // 2^N = M
193 string ampsStr;
194 ampsStr = bt + printer_toStr(qureg.numQubits * (qureg.isDensityMatrix? 2 : 1));
195 ampsStr += eq + printer_toStr(qureg.numAmps);
196
197 string colsStr = na;
198 if (qureg.isDensityMatrix)
199 colsStr = (
200 bt + printer_toStr(qureg.numQubits) +
201 eq + printer_toStr(powerOf2(qureg.numQubits)));
202
203 print_table(
204 "dimension", {
205 {"isDensMatr", printer_toStr(qureg.isDensityMatrix)},
206 {"numQubits", printer_toStr(qureg.numQubits)},
207 {"numCols", colsStr},
208 {"numAmps", ampsStr},
209 });
210}
211
212
213void printDistributionInfo(Qureg qureg) {
214
215 using namespace printer_substrings;
216
217 // not applicable when not distributed
218 string nodesStr = na;
219 string ampsStr = na;
220 string colsStr = na;
221
222 // 2^N = M per node
223 if (qureg.isDistributed) {
224 nodesStr = bt + printer_toStr(qureg.logNumNodes) + eq + printer_toStr(qureg.numNodes);
225 ampsStr = bt + printer_toStr(qureg.logNumAmpsPerNode) + eq + printer_toStr(qureg.numAmpsPerNode) + pn;
226 if (qureg.isDensityMatrix)
227 colsStr = bt + printer_toStr(qureg.logNumColsPerNode) + eq + printer_toStr(powerOf2(qureg.logNumColsPerNode)) + pn;
228 }
229
230 print_table(
231 "distribution", {
232 {"numNodes", nodesStr},
233 {"numCols", colsStr},
234 {"numAmps", ampsStr},
235 });
236}
237
238
239void printMemoryInfo(Qureg qureg) {
240
241 using namespace printer_substrings;
242
243 size_t localArrayMem = mem_getLocalQuregMemoryRequired(qureg.numAmpsPerNode);
244 string localMemStr = printer_getMemoryWithUnitStr(localArrayMem) + (qureg.isDistributed? pn : "");
245
246 // precondition: no reportable fields are at risk of overflow as a qindex
247 // type, EXCEPT aggregate total memory between distributed nodes (in bytes)
248 qindex globalTotalMem = mem_getTotalGlobalMemoryUsed(qureg);
249 string globalMemStr = (globalTotalMem == 0)? "overflowed" : printer_getMemoryWithUnitStr(globalTotalMem);
250
251 print_table(
252 "memory", {
253 {"cpuAmps", mem_isAllocated(qureg.cpuAmps)? localMemStr : na},
254 {"gpuAmps", mem_isAllocated(qureg.gpuAmps)? localMemStr : na},
255 {"cpuCommBuffer", mem_isAllocated(qureg.cpuCommBuffer)? localMemStr : na},
256 {"gpuCommBuffer", mem_isAllocated(qureg.gpuCommBuffer)? localMemStr : na},
257 {"globalTotal", globalMemStr},
258 });
259}
260
261
262
263/*
264 * PUBLIC FUNCTIONS
265 */
266
267// enable invocation by both C and C++ binaries
268extern "C" {
269
270
271Qureg createCustomQureg(int numQubits, int isDensMatr, int useDistrib, int useGpuAccel, int useMultithread) {
272
273 return validateAndCreateCustomQureg(numQubits, isDensMatr, useDistrib, useGpuAccel, useMultithread, __func__);
274}
275
276
277Qureg createQureg(int numQubits) {
278
279 int isDensMatr = 0;
280 int autoMode = modeflag::USE_AUTO;
281 return validateAndCreateCustomQureg(numQubits, isDensMatr, autoMode, autoMode, autoMode, __func__);
282}
283
284
285Qureg createDensityQureg(int numQubits) {
286
287 int isDensMatr = 1;
288 int autoMode = modeflag::USE_AUTO;
289 return validateAndCreateCustomQureg(numQubits, isDensMatr, autoMode, autoMode, autoMode, __func__);
290}
291
292
293Qureg createForcedQureg(int numQubits) {
294 validate_envIsInit(__func__);
295
296 QuESTEnv env = getQuESTEnv();
297
298 int isDensMatr = 0;
299 return validateAndCreateCustomQureg(numQubits, isDensMatr, env.isDistributed, env.isGpuAccelerated, env.isMultithreaded, __func__);
300}
301
302
304 validate_envIsInit(__func__);
305
306 QuESTEnv env = getQuESTEnv();
307
308 int isDensMatr = 1;
309 return validateAndCreateCustomQureg(numQubits, isDensMatr, env.isDistributed, env.isGpuAccelerated, env.isMultithreaded, __func__);
310}
311
312
314 validate_quregFields(qureg, __func__);
315
316 // create a new Qureg with identical fields, but zero'd memory
317 Qureg clone = validateAndCreateCustomQureg(
318 qureg.numQubits, qureg.isDensityMatrix, qureg.isDistributed,
319 qureg.isGpuAccelerated, qureg.isMultithreaded, __func__);
320
321 setQuregToClone(clone, qureg); // harmlessly re-validates
322
323 // if GPU-accelerated, clone's CPU amps are NOT updated
324 return clone;
325}
326
327
328void destroyQureg(Qureg qureg) {
329 validate_quregFields(qureg, __func__);
330
331 // free CPU memory
332 cpu_deallocArray(qureg.cpuAmps);
333
334 // free CPU communication buffer
335 if (qureg.isDistributed)
336 cpu_deallocArray(qureg.cpuCommBuffer);
337
338 // free GPU memory
339 if (qureg.isGpuAccelerated)
340 gpu_deallocArray(qureg.gpuAmps);
341
342 // free GPU communication buffer
343 if (qureg.isGpuAccelerated && qureg.isDistributed)
344 gpu_deallocArray(qureg.gpuCommBuffer);
345
346 // cannot set free'd fields to nullptr because qureg
347 // wasn't passed-by-reference, and isn't returned.
348}
349
350
352 validate_quregFields(qureg, __func__);
353 validate_numReportedNewlinesAboveZero(__func__); // because trailing newline mandatory
354
355 /// @todo add function to write this output to file (useful for HPC debugging)
356
357 // printer routines will consult env rank to avoid duplicate printing
358 print_label("Qureg");
359 printDeploymentInfo(qureg);
360 printDimensionInfo(qureg);
361 printDistributionInfo(qureg);
362 printMemoryInfo(qureg);
363
364 // exclude mandatory newline above
365 print_oneFewerNewlines();
366}
367
368
369void reportQureg(Qureg qureg) {
370 validate_quregFields(qureg, __func__);
371 validate_numReportedNewlinesAboveZero(__func__); // because trailing newline mandatory
372
373 // account all local CPU memory (including buffer), neglecting GPU memory
374 // because it occupies distinct memory spaces, confusing accounting
375 size_t localMem = mem_getLocalQuregMemoryRequired(qureg.numAmpsPerNode);
376 if (qureg.isDistributed)
377 localMem *= 2; // include buffer. @todo will this ever overflow?!?!
378
379 // include struct size (expected negligibly tiny)
380 localMem += sizeof(qureg);
381
382 print_header(qureg, localMem);
383 print_elems(qureg);
384
385 // exclude mandatory newline above
386 print_oneFewerNewlines();
387}
388
389
391 validate_quregFields(qureg, __func__);
392
393 // permit this to be called even in non-GPU mode
394 if (qureg.isGpuAccelerated)
395 gpu_copyCpuToGpu(qureg); // syncs then overwrites all local GPU amps
396}
398 validate_quregFields(qureg, __func__);
399
400 // permit this to be called even in non-GPU mode
401 if (qureg.isGpuAccelerated)
402 gpu_copyGpuToCpu(qureg); // syncs then overwrites all local CPU amps
403}
404
405
406void syncSubQuregToGpu(Qureg qureg, qindex localStartInd, qindex numLocalAmps) {
407 validate_quregFields(qureg, __func__);
408 validate_localAmpIndices(qureg, localStartInd, numLocalAmps, __func__);
409
410 // the above validation communicates for node consensus in
411 // distributed settings, because params can differ per-node.
412 // note also this function accepts statevectors AND density
413 // matrices, because the latter does not need a bespoke
414 // (row,col) interface, because the user can only access/modify
415 // local density matrix amps via a flat index anyway!
416
417 // we permit this function to do nothing when not GPU-accelerated
418 if (!qureg.isGpuAccelerated)
419 return;
420
421 // otherwise, every node merely copies its local subset, which
422 // may differ per-node, in an embarrassingly parallel manner
423 gpu_copyCpuToGpu(&qureg.cpuAmps[localStartInd], &qureg.gpuAmps[localStartInd], numLocalAmps);
424}
425void syncSubQuregFromGpu(Qureg qureg, qindex localStartInd, qindex numLocalAmps) {
426 validate_quregFields(qureg, __func__);
427 validate_localAmpIndices(qureg, localStartInd, numLocalAmps, __func__);
428
429 // the above validation communicates for node consensus in
430 // distributed settings, because params can differ per-node.
431 // note also this function accepts statevectors AND density
432 // matrices, because the latter does not need a bespoke
433 // (row,col) interface, because the user can only access/modify
434 // local density matrix amps via a flat index anyway!
435
436 // we permit this function to do nothing when not GPU-accelerated
437 if (!qureg.isGpuAccelerated)
438 return;
439
440 // otherwise, every node merely copies its local subset, which
441 // may differ per-node, in an embarrassingly parallel manner
442 gpu_copyGpuToCpu(&qureg.gpuAmps[localStartInd], &qureg.cpuAmps[localStartInd], numLocalAmps);
443}
444
445
446void getQuregAmps(qcomp* outAmps, Qureg qureg, qindex startInd, qindex numAmps) {
447 validate_quregFields(qureg, __func__);
448 validate_quregIsStateVector(qureg, __func__);
449 validate_basisStateIndices(qureg, startInd, numAmps, __func__);
450
451 localiser_statevec_getAmps(outAmps, qureg, startInd, numAmps);
452}
453
454
455void getDensityQuregAmps(qcomp** outAmps, Qureg qureg, qindex startRow, qindex startCol, qindex numRows, qindex numCols) {
456 validate_quregFields(qureg, __func__);
457 validate_quregIsDensityMatrix(qureg, __func__);
458 validate_basisStateRowCols(qureg, startRow, startCol, numRows, numCols, __func__);
459
460 localiser_densmatr_getAmps(outAmps, qureg, startRow, startCol, numRows, numCols);
461}
462
463
464
465// end de-mangler
466}
467
468
469
470
471/*
472 * C++ ONLY FUNCTIONS
473 *
474 * which are not directly C-compatible because of limited
475 * interoperability of the qcomp type. See calculations.h
476 * for more info. We here define a C++-only signature (with
477 * name-mangling), and a C-friendly wrapper which passes by
478 * pointer; the C-friendly interface in wrappers.h which itself
479 * wrap this.
480 */
481
482
483qcomp getQuregAmp(Qureg qureg, qindex index) {
484 validate_quregFields(qureg, __func__);
485 validate_quregIsStateVector(qureg, __func__);
486 validate_basisStateIndex(qureg, index, __func__);
487
488 return localiser_statevec_getAmp(qureg, index);
489}
490extern "C" void _wrap_getQuregAmp(qcomp* out, Qureg qureg, qindex index) {
491
492 *out = getQuregAmp(qureg, index);
493}
494
495
496qcomp getDensityQuregAmp(Qureg qureg, qindex row, qindex column) {
497 validate_quregFields(qureg, __func__);
498 validate_quregIsDensityMatrix(qureg, __func__);
499 validate_basisStateRowCol(qureg, row, column, __func__);
500
501 qindex ind = util_getGlobalFlatIndex(qureg, row, column);
502 qcomp amp = localiser_statevec_getAmp(qureg, ind);
503 return amp;
504}
505extern "C" void _wrap_getDensityQuregAmp(qcomp* out, Qureg qureg, qindex row, qindex column) {
506
507 *out = getDensityQuregAmp(qureg, row, column);
508}
QuESTEnv getQuESTEnv()
void setQuregToClone(Qureg targetQureg, Qureg copyQureg)
void initZeroState(Qureg qureg)
Qureg createDensityQureg(int numQubits)
Definition qureg.cpp:285
Qureg createForcedQureg(int numQubits)
Definition qureg.cpp:293
Qureg createForcedDensityQureg(int numQubits)
Definition qureg.cpp:303
Qureg createCloneQureg(Qureg qureg)
Definition qureg.cpp:313
Qureg createCustomQureg(int numQubits, int isDensMatr, int useDistrib, int useGpuAccel, int useMultithread)
Definition qureg.cpp:271
Qureg createQureg(int numQubits)
Definition qureg.cpp:277
void destroyQureg(Qureg qureg)
Definition qureg.cpp:328
qcomp getQuregAmp(Qureg qureg, qindex index)
Definition qureg.cpp:483
void getDensityQuregAmps(qcomp **outAmps, Qureg qureg, qindex startRow, qindex startCol, qindex numRows, qindex numCols)
Definition qureg.cpp:455
void getQuregAmps(qcomp *outAmps, Qureg qureg, qindex startInd, qindex numAmps)
Definition qureg.cpp:446
qcomp getDensityQuregAmp(Qureg qureg, qindex row, qindex column)
Definition qureg.cpp:496
void reportQureg(Qureg qureg)
Definition qureg.cpp:369
void reportQuregParams(Qureg qureg)
Definition qureg.cpp:351
void syncQuregFromGpu(Qureg qureg)
Definition qureg.cpp:397
void syncSubQuregToGpu(Qureg qureg, qindex localStartInd, qindex numLocalAmps)
Definition qureg.cpp:406
void syncSubQuregFromGpu(Qureg qureg, qindex localStartInd, qindex numLocalAmps)
Definition qureg.cpp:425
void syncQuregToGpu(Qureg qureg)
Definition qureg.cpp:390
Definition qureg.h:42