QuEST_cpu_distributed.c
Go to the documentation of this file.
1 // Distributed under MIT licence. See https://github.com/QuEST-Kit/QuEST/blob/master/LICENCE.txt for details
2 
12 # include "QuEST.h"
13 # include "QuEST_internal.h"
14 # include "QuEST_precision.h"
15 # include "QuEST_validation.h"
16 # include "mt19937ar.h"
17 
18 # include "QuEST_cpu_internal.h"
19 
20 # define _BSD_SOURCE
21 # include <unistd.h>
22 # include <mpi.h>
23 # include <stdlib.h>
24 # include <stdio.h>
25 # include <string.h> // for memcpy
26 # include <math.h>
27 # include <time.h>
28 # include <sys/types.h>
29 
30 # ifdef _OPENMP
31 # include <omp.h>
32 # endif
33 
34 
36 
37  Complex localInnerProd = statevec_calcInnerProductLocal(bra, ket);
38  if (bra.numChunks == 1)
39  return localInnerProd;
40 
41  qreal localReal = localInnerProd.real;
42  qreal localImag = localInnerProd.imag;
43  qreal globalReal, globalImag;
44  MPI_Allreduce(&localReal, &globalReal, 1, MPI_QuEST_REAL, MPI_SUM, MPI_COMM_WORLD);
45  MPI_Allreduce(&localImag, &globalImag, 1, MPI_QuEST_REAL, MPI_SUM, MPI_COMM_WORLD);
46 
47  Complex globalInnerProd;
48  globalInnerProd.real = globalReal;
49  globalInnerProd.imag = globalImag;
50  return globalInnerProd;
51 }
52 
54 
55  // computes the trace by summing every element ("diag") with global index (2^n + 1)i for i in [0, 2^n-1]
56 
57  // computes first local index containing a diagonal element
58  long long int diagSpacing = 1LL + (1LL << qureg.numQubitsRepresented);
59  long long int numPrevDiags = (qureg.chunkId>0)? 1+(qureg.chunkId*qureg.numAmpsPerChunk)/diagSpacing : 0;
60  long long int globalIndNextDiag = diagSpacing * numPrevDiags;
61  long long int localIndNextDiag = globalIndNextDiag % qureg.numAmpsPerChunk;
62  long long int index;
63 
64  qreal rankTotal = 0;
65  qreal y, t, c;
66  c = 0;
67 
68  // iterates every local diagonal
69  for (index=localIndNextDiag; index < qureg.numAmpsPerChunk; index += diagSpacing) {
70 
71  // Kahan summation - brackets are important
72  y = qureg.stateVec.real[index] - c;
73  t = rankTotal + y;
74  c = ( t - rankTotal ) - y;
75  rankTotal = t;
76  }
77 
78  // combine each node's sum of diagonals
79  qreal globalTotal;
80  if (qureg.numChunks > 1)
81  MPI_Allreduce(&rankTotal, &globalTotal, 1, MPI_QuEST_REAL, MPI_SUM, MPI_COMM_WORLD);
82  else
83  globalTotal = rankTotal;
84 
85  return globalTotal;
86 }
87 
89  // Implemented using Kahan summation for greater accuracy at a slight floating
90  // point operation overhead. For more details see https://en.wikipedia.org/wiki/Kahan_summation_algorithm
91  qreal pTotal=0;
92  qreal y, t, c;
93  qreal allRankTotals=0;
94  long long int index;
95  long long int numAmpsPerRank = qureg.numAmpsPerChunk;
96  c = 0.0;
97  for (index=0; index<numAmpsPerRank; index++){
98  // Perform pTotal+=qureg.stateVec.real[index]*qureg.stateVec.real[index]; by Kahan
99  y = qureg.stateVec.real[index]*qureg.stateVec.real[index] - c;
100  t = pTotal + y;
101  // Don't change the bracketing on the following line
102  c = ( t - pTotal ) - y;
103  pTotal = t;
104  // Perform pTotal+=qureg.stateVec.imag[index]*qureg.stateVec.imag[index]; by Kahan
105  y = qureg.stateVec.imag[index]*qureg.stateVec.imag[index] - c;
106  t = pTotal + y;
107  // Don't change the bracketing on the following line
108  c = ( t - pTotal ) - y;
109  pTotal = t;
110  }
111  if (qureg.numChunks>1)
112  MPI_Allreduce(&pTotal, &allRankTotals, 1, MPI_QuEST_REAL, MPI_SUM, MPI_COMM_WORLD);
113  else
114  allRankTotals=pTotal;
115 
116  return allRankTotals;
117 }
118 
119 
120 static int isChunkToSkipInFindPZero(int chunkId, long long int chunkSize, int measureQubit);
121 static int chunkIsUpper(int chunkId, long long int chunkSize, int targetQubit);
122 static int chunkIsUpperInOuterBlock(int chunkId, long long int chunkSize, int targetQubit, int numQubits);
123 static void getRotAngle(int chunkIsUpper, Complex *rot1, Complex *rot2, Complex alpha, Complex beta);
124 static int getChunkPairId(int chunkIsUpper, int chunkId, long long int chunkSize, int targetQubit);
125 static int getChunkOuterBlockPairId(int chunkIsUpper, int chunkId, long long int chunkSize, int targetQubit, int numQubits);
126 static int halfMatrixBlockFitsInChunk(long long int chunkSize, int targetQubit);
127 static int getChunkIdFromIndex(Qureg qureg, long long int index);
128 
130 
131  QuESTEnv env;
132 
133  // init MPI environment
134  int rank, numRanks, initialized;
135  MPI_Initialized(&initialized);
136  if (!initialized){
137  MPI_Init(NULL, NULL);
138  MPI_Comm_size(MPI_COMM_WORLD, &numRanks);
139  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
140 
141  env.rank=rank;
142  env.numRanks=numRanks;
143 
144  } else {
145 
146  printf("ERROR: Trying to initialize QuESTEnv multiple times. Ignoring...\n");
147 
148  // ensure env is initialised anyway, so the compiler is happy
149  MPI_Comm_size(MPI_COMM_WORLD, &numRanks);
150  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
151  env.rank=rank;
152  env.numRanks=numRanks;
153  }
154 
155  validateNumRanks(env.numRanks, __func__);
156 
157  env.seeds = NULL;
158  env.numSeeds = 0;
159  seedQuESTDefault(&env);
160 
161  return env;
162 }
163 
165  MPI_Barrier(MPI_COMM_WORLD);
166 }
167 
168 int syncQuESTSuccess(int successCode){
169  int totalSuccess;
170  MPI_Allreduce(&successCode, &totalSuccess, 1, MPI_INT, MPI_LAND, MPI_COMM_WORLD);
171  return totalSuccess;
172 }
173 
175  free(env.seeds);
176 
177  int finalized;
178  MPI_Finalized(&finalized);
179  if (!finalized) MPI_Finalize();
180  else printf("ERROR: Trying to close QuESTEnv multiple times. Ignoring\n");
181 }
182 
184  if (env.rank==0){
185  printf("EXECUTION ENVIRONMENT:\n");
186  printf("Running distributed (MPI) version\n");
187  printf("Number of ranks is %d\n", env.numRanks);
188 # ifdef _OPENMP
189  printf("OpenMP enabled\n");
190  printf("Number of threads available is %d\n", omp_get_max_threads());
191 # else
192  printf("OpenMP disabled\n");
193 # endif
194  printf("Precision: size of qreal is %ld bytes\n", sizeof(qreal) );
195  }
196 }
197 
198 void getEnvironmentString(QuESTEnv env, char str[200]){
199  int ompStatus=0;
200  int numThreads=1;
201 # ifdef _OPENMP
202  ompStatus=1;
203  numThreads=omp_get_max_threads();
204 # endif
205  sprintf(str, "CUDA=0 OpenMP=%d MPI=1 threads=%d ranks=%d", ompStatus, numThreads, env.numRanks);
206 }
207 
208 int getChunkIdFromIndex(Qureg qureg, long long int index){
209  return index/qureg.numAmpsPerChunk; // this is numAmpsPerChunk
210 }
211 
212 qreal statevec_getRealAmp(Qureg qureg, long long int index){
213  int chunkId = getChunkIdFromIndex(qureg, index);
214  qreal el;
215  if (qureg.chunkId==chunkId){
216  el = qureg.stateVec.real[index-chunkId*qureg.numAmpsPerChunk];
217  }
218  MPI_Bcast(&el, 1, MPI_QuEST_REAL, chunkId, MPI_COMM_WORLD);
219  return el;
220 }
221 
222 qreal statevec_getImagAmp(Qureg qureg, long long int index){
223  int chunkId = getChunkIdFromIndex(qureg, index);
224  qreal el;
225  if (qureg.chunkId==chunkId){
226  el = qureg.stateVec.imag[index-chunkId*qureg.numAmpsPerChunk];
227  }
228  MPI_Bcast(&el, 1, MPI_QuEST_REAL, chunkId, MPI_COMM_WORLD);
229  return el;
230 }
231 
240 static int chunkIsUpper(int chunkId, long long int chunkSize, int targetQubit)
242 {
243  long long int sizeHalfBlock = 1LL << (targetQubit);
244  long long int sizeBlock = sizeHalfBlock*2;
245  long long int posInBlock = (chunkId*chunkSize) % sizeBlock;
246  return posInBlock<sizeHalfBlock;
247 }
248 
250 static int chunkIsUpperInOuterBlock(int chunkId, long long int chunkSize, int targetQubit, int numQubits)
251 {
252  long long int sizeOuterHalfBlock = 1LL << (targetQubit+numQubits);
253  long long int sizeOuterBlock = sizeOuterHalfBlock*2;
254  long long int posInBlock = (chunkId*chunkSize) % sizeOuterBlock;
255  return posInBlock<sizeOuterHalfBlock;
256 }
257 
272 static void getRotAngle(int chunkIsUpper, Complex *rot1, Complex *rot2, Complex alpha, Complex beta)
273 {
274  if (chunkIsUpper){
275  *rot1=alpha;
276  rot2->real=-beta.real;
277  rot2->imag=-beta.imag;
278  } else {
279  *rot1=beta;
280  *rot2=alpha;
281  }
282 }
283 
298 {
299  if (chunkIsUpper){
300  *rot1=(Complex) {.real=u.real[0][0], .imag=u.imag[0][0]};
301  *rot2=(Complex) {.real=u.real[0][1], .imag=u.imag[0][1]};
302  } else {
303  *rot1=(Complex) {.real=u.real[1][0], .imag=u.imag[1][0]};
304  *rot2=(Complex) {.real=u.real[1][1], .imag=u.imag[1][1]};
305  }
306 }
307 
317 static int getChunkPairId(int chunkIsUpper, int chunkId, long long int chunkSize, int targetQubit)
318 {
319  long long int sizeHalfBlock = 1LL << (targetQubit);
320  int chunksPerHalfBlock = sizeHalfBlock/chunkSize;
321  if (chunkIsUpper){
322  return chunkId + chunksPerHalfBlock;
323  } else {
324  return chunkId - chunksPerHalfBlock;
325  }
326 }
327 
328 static int getChunkOuterBlockPairId(int chunkIsUpper, int chunkId, long long int chunkSize, int targetQubit, int numQubits)
329 {
330  long long int sizeOuterHalfBlock = 1LL << (targetQubit+numQubits);
331  int chunksPerOuterHalfBlock = sizeOuterHalfBlock/chunkSize;
332  if (chunkIsUpper){
333  return chunkId + chunksPerOuterHalfBlock;
334  } else {
335  return chunkId - chunksPerOuterHalfBlock;
336  }
337 }
338 
339 static int getChunkOuterBlockPairIdForPart3(int chunkIsUpperSmallerQubit, int chunkIsUpperBiggerQubit, int chunkId,
340  long long int chunkSize, int smallerQubit, int biggerQubit, int numQubits)
341 {
342  long long int sizeOuterHalfBlockBiggerQubit = 1LL << (biggerQubit+numQubits);
343  long long int sizeOuterHalfBlockSmallerQubit = 1LL << (smallerQubit+numQubits);
344  int chunksPerOuterHalfBlockSmallerQubit = sizeOuterHalfBlockSmallerQubit/chunkSize;
345  int chunksPerOuterHalfBlockBiggerQubit = sizeOuterHalfBlockBiggerQubit/chunkSize;
346  int rank;
347  if (chunkIsUpperBiggerQubit){
348  rank = chunkId + chunksPerOuterHalfBlockBiggerQubit;
349  } else {
350  rank = chunkId - chunksPerOuterHalfBlockBiggerQubit;
351  }
352 
353  if (chunkIsUpperSmallerQubit){
354  rank = rank + chunksPerOuterHalfBlockSmallerQubit;
355  } else {
356  rank = rank - chunksPerOuterHalfBlockSmallerQubit;
357  }
358 
359  return rank;
360 }
361 
369 static int halfMatrixBlockFitsInChunk(long long int chunkSize, int targetQubit)
371 {
372  long long int sizeHalfBlock = 1LL << (targetQubit);
373  if (chunkSize > sizeHalfBlock) return 1;
374  else return 0;
375 }
376 
377 static int densityMatrixBlockFitsInChunk(long long int chunkSize, int numQubits, int targetQubit) {
378  long long int sizeOuterHalfBlock = 1LL << (targetQubit+numQubits);
379  if (chunkSize > sizeOuterHalfBlock) return 1;
380  else return 0;
381 }
382 
386 
387  // Remember that for every amplitude that `vec` stores on the node,
388  // `matr` stores an entire column. Ergo there are always an integer
389  // number (in fact, a power of 2) number of `matr`s columns on each node.
390  // Since the total size of `vec` (between all nodes) is one column
391  // and each node stores (possibly) multiple columns (vec.numAmpsPerChunk as many),
392  // `vec` can be fit entirely inside a single node's matr.pairStateVec (with excess!)
393 
394  // copy this node's vec segment into this node's matr pairState (in the right spot)
395  long long int numLocalAmps = vec.numAmpsPerChunk;
396  long long int myOffset = vec.chunkId * numLocalAmps;
397  memcpy(&matr.pairStateVec.real[myOffset], vec.stateVec.real, numLocalAmps * sizeof(qreal));
398  memcpy(&matr.pairStateVec.imag[myOffset], vec.stateVec.imag, numLocalAmps * sizeof(qreal));
399 
400  // we now want to share this node's vec segment with other node, so that
401  // vec is cloned in every node's matr.pairStateVec
402 
403  // work out how many messages needed to send vec chunks (2GB limit)
404  long long int maxMsgSize = MPI_MAX_AMPS_IN_MSG;
405  if (numLocalAmps < maxMsgSize)
406  maxMsgSize = numLocalAmps;
407  // safely assume MPI_MAX... = 2^n, so division always exact:
408  int numMsgs = numLocalAmps / maxMsgSize;
409 
410  // every node gets a turn at being the broadcaster
411  for (int broadcaster=0; broadcaster < vec.numChunks; broadcaster++) {
412 
413  long long int otherOffset = broadcaster * numLocalAmps;
414 
415  // every node sends a slice of qureg's pairState to every other
416  for (int i=0; i< numMsgs; i++) {
417 
418  // by sending that slice in further slices (due to bandwidth limit)
419  MPI_Bcast(
420  &matr.pairStateVec.real[otherOffset + i*maxMsgSize],
421  maxMsgSize, MPI_QuEST_REAL, broadcaster, MPI_COMM_WORLD);
422  MPI_Bcast(
423  &matr.pairStateVec.imag[otherOffset + i*maxMsgSize],
424  maxMsgSize, MPI_QuEST_REAL, broadcaster, MPI_COMM_WORLD);
425  }
426  }
427 }
428 
430 
431  // set qureg's pairState is to be the full pureState (on every node)
432  copyVecIntoMatrixPairState(qureg, pureState);
433 
434  // collect calcFidelityLocal by every machine
435  qreal localSum = densmatr_calcFidelityLocal(qureg, pureState);
436 
437  // sum each localSum
438  qreal globalSum;
439  MPI_Allreduce(&localSum, &globalSum, 1, MPI_QuEST_REAL, MPI_SUM, MPI_COMM_WORLD);
440 
441  return globalSum;
442 }
443 
445 
447 
448  qreal globalSum;
449  MPI_Allreduce(&localSum, &globalSum, 1, MPI_QuEST_REAL, MPI_SUM, MPI_COMM_WORLD);
450 
451  qreal dist = sqrt(globalSum);
452  return dist;
453 }
454 
456 
457  qreal localSum = densmatr_calcInnerProductLocal(a, b);
458 
459  qreal globalSum;
460  MPI_Allreduce(&localSum, &globalSum, 1, MPI_QuEST_REAL, MPI_SUM, MPI_COMM_WORLD);
461 
462  qreal dist = globalSum;
463  return dist;
464 }
465 
466 void densmatr_initPureState(Qureg targetQureg, Qureg copyQureg) {
467 
468  if (targetQureg.numChunks==1){
469  // local version
470  // save pointers to qureg's pair state
471  qreal* quregPairRePtr = targetQureg.pairStateVec.real;
472  qreal* quregPairImPtr = targetQureg.pairStateVec.imag;
473 
474  // populate qureg pair state with pure state (by repointing)
475  targetQureg.pairStateVec.real = copyQureg.stateVec.real;
476  targetQureg.pairStateVec.imag = copyQureg.stateVec.imag;
477 
478  // populate density matrix via it's pairState
479  densmatr_initPureStateLocal(targetQureg, copyQureg);
480 
481  // restore pointers
482  targetQureg.pairStateVec.real = quregPairRePtr;
483  targetQureg.pairStateVec.imag = quregPairImPtr;
484  } else {
485  // set qureg's pairState is to be the full pure state (on every node)
486  copyVecIntoMatrixPairState(targetQureg, copyQureg);
487 
488  // update every density matrix chunk using pairState
489  densmatr_initPureStateLocal(targetQureg, copyQureg);
490  }
491 }
492 
493 void exchangeStateVectors(Qureg qureg, int pairRank){
494  // MPI send/receive vars
495  int TAG=100;
496  MPI_Status status;
497 
498  // Multiple messages are required as MPI uses int rather than long long int for count
499  // For openmpi, messages are further restricted to 2GB in size -- do this for all cases
500  // to be safe
501  long long int maxMessageCount = MPI_MAX_AMPS_IN_MSG;
502  if (qureg.numAmpsPerChunk < maxMessageCount)
503  maxMessageCount = qureg.numAmpsPerChunk;
504 
505  // safely assume MPI_MAX... = 2^n, so division always exact
506  int numMessages = qureg.numAmpsPerChunk/maxMessageCount;
507  int i;
508  long long int offset;
509  // send my state vector to pairRank's qureg.pairStateVec
510  // receive pairRank's state vector into qureg.pairStateVec
511  for (i=0; i<numMessages; i++){
512  offset = i*maxMessageCount;
513  MPI_Sendrecv(&qureg.stateVec.real[offset], maxMessageCount, MPI_QuEST_REAL, pairRank, TAG,
514  &qureg.pairStateVec.real[offset], maxMessageCount, MPI_QuEST_REAL,
515  pairRank, TAG, MPI_COMM_WORLD, &status);
516  //printf("rank: %d err: %d\n", qureg.rank, err);
517  MPI_Sendrecv(&qureg.stateVec.imag[offset], maxMessageCount, MPI_QuEST_REAL, pairRank, TAG,
518  &qureg.pairStateVec.imag[offset], maxMessageCount, MPI_QuEST_REAL,
519  pairRank, TAG, MPI_COMM_WORLD, &status);
520  }
521 }
522 
523 void exchangePairStateVectorHalves(Qureg qureg, int pairRank){
524  // MPI send/receive vars
525  int TAG=100;
526  MPI_Status status;
527  long long int numAmpsToSend = qureg.numAmpsPerChunk >> 1;
528 
529  // Multiple messages are required as MPI uses int rather than long long int for count
530  // For openmpi, messages are further restricted to 2GB in size -- do this for all cases
531  // to be safe
532  long long int maxMessageCount = MPI_MAX_AMPS_IN_MSG;
533  if (numAmpsToSend < maxMessageCount)
534  maxMessageCount = numAmpsToSend;
535 
536  // safely assume MPI_MAX... = 2^n, so division always exact
537  int numMessages = numAmpsToSend/maxMessageCount;
538  int i;
539  long long int offset;
540  // send the bottom half of my state vector to the top half of pairRank's qureg.pairStateVec
541  // receive pairRank's state vector into the top of qureg.pairStateVec
542  for (i=0; i<numMessages; i++){
543  offset = i*maxMessageCount;
544  MPI_Sendrecv(&qureg.pairStateVec.real[offset+numAmpsToSend], maxMessageCount,
545  MPI_QuEST_REAL, pairRank, TAG,
546  &qureg.pairStateVec.real[offset], maxMessageCount, MPI_QuEST_REAL,
547  pairRank, TAG, MPI_COMM_WORLD, &status);
548  //printf("rank: %d err: %d\n", qureg.rank, err);
549  MPI_Sendrecv(&qureg.pairStateVec.imag[offset+numAmpsToSend], maxMessageCount,
550  MPI_QuEST_REAL, pairRank, TAG,
551  &qureg.pairStateVec.imag[offset], maxMessageCount, MPI_QuEST_REAL,
552  pairRank, TAG, MPI_COMM_WORLD, &status);
553  }
554 }
555 
556 // @todo decide where this function should go. It is a preparation for MPI data transfer function
558  long long int sizeInnerBlock, sizeInnerHalfBlock;
559  long long int sizeOuterColumn, sizeOuterHalfColumn;
560  long long int thisInnerBlock, // current block
561  thisOuterColumn, // current column in density matrix
562  thisIndex, // current index in (density matrix representation) state vector
563  thisIndexInOuterColumn,
564  thisIndexInInnerBlock;
565 
566  int outerBit;
567 
568  long long int thisTask;
569  long long int numTasks=qureg.numAmpsPerChunk>>1;
570 
571  // set dimensions
572  sizeInnerHalfBlock = 1LL << targetQubit;
573  sizeInnerBlock = 2LL * sizeInnerHalfBlock;
574  sizeOuterHalfColumn = 1LL << qureg.numQubitsRepresented;
575  sizeOuterColumn = 2LL * sizeOuterHalfColumn;
576 
577 # ifdef _OPENMP
578 # pragma omp parallel \
579  default (none) \
580  shared (sizeInnerBlock,sizeInnerHalfBlock,sizeOuterColumn,sizeOuterHalfColumn, \
581  qureg,numTasks,targetQubit) \
582  private (thisTask,thisInnerBlock,thisOuterColumn,thisIndex,thisIndexInOuterColumn, \
583  thisIndexInInnerBlock,outerBit)
584 # endif
585  {
586 # ifdef _OPENMP
587 # pragma omp for schedule (static)
588 # endif
589  // thisTask iterates over half the elements in this process' chunk of the density matrix
590  // treat this as iterating over all columns, then iterating over half the values
591  // within one column.
592  // If this function has been called, this process' chunk contains half an
593  // outer block or less
594  for (thisTask=0; thisTask<numTasks; thisTask++) {
595  // we want to process all columns in the density matrix,
596  // updating the values for half of each column (one half of each inner block)
597  thisOuterColumn = thisTask / sizeOuterHalfColumn;
598  thisIndexInOuterColumn = thisTask&(sizeOuterHalfColumn-1); // thisTask % sizeOuterHalfColumn
599  thisInnerBlock = thisIndexInOuterColumn/sizeInnerHalfBlock;
600  // get index in state vector corresponding to upper inner block
601  thisIndexInInnerBlock = thisTask&(sizeInnerHalfBlock-1); // thisTask % sizeInnerHalfBlock
602  thisIndex = thisOuterColumn*sizeOuterColumn + thisInnerBlock*sizeInnerBlock
603  + thisIndexInInnerBlock;
604  // check if we are in the upper or lower half of an outer block
605  outerBit = extractBit(targetQubit, (thisIndex+qureg.numAmpsPerChunk*qureg.chunkId)>>qureg.numQubitsRepresented);
606  // if we are in the lower half of an outer block, shift to be in the lower half
607  // of the inner block as well (we want to dephase |0><0| and |1><1| only)
608  thisIndex += outerBit*(sizeInnerHalfBlock);
609 
610  // NOTE: at this point thisIndex should be the index of the element we want to
611  // dephase in the chunk of the state vector on this process, in the
612  // density matrix representation.
613  // thisTask is the index of the pair element in pairStateVec
614  // we will populate the second half of pairStateVec with this process'
615  // data to send
616 
617  qureg.pairStateVec.real[thisTask+numTasks] = qureg.stateVec.real[thisIndex];
618  qureg.pairStateVec.imag[thisTask+numTasks] = qureg.stateVec.imag[thisIndex];
619 
620  }
621  }
622 }
623 
624 void compressPairVectorForTwoQubitDepolarise(Qureg qureg, int targetQubit,
625  int qubit2) {
626 
627  long long int sizeInnerBlockQ1, sizeInnerHalfBlockQ1;
628  long long int sizeInnerBlockQ2, sizeInnerHalfBlockQ2, sizeInnerQuarterBlockQ2;
629  long long int sizeOuterColumn, sizeOuterQuarterColumn;
630  long long int
631  thisInnerBlockQ2,
632  thisOuterColumn, // current column in density matrix
633  thisIndex, // current index in (density matrix representation) state vector
634  thisIndexInOuterColumn,
635  thisIndexInInnerBlockQ1,
636  thisIndexInInnerBlockQ2,
637  thisInnerBlockQ1InInnerBlockQ2;
638  int outerBitQ1, outerBitQ2;
639 
640  long long int thisTask;
641  long long int numTasks=qureg.numAmpsPerChunk>>2;
642 
643  // set dimensions
644  sizeInnerHalfBlockQ1 = 1LL << targetQubit;
645  sizeInnerHalfBlockQ2 = 1LL << qubit2;
646  sizeInnerQuarterBlockQ2 = sizeInnerHalfBlockQ2 >> 1;
647  sizeInnerBlockQ2 = sizeInnerHalfBlockQ2 << 1;
648  sizeInnerBlockQ1 = 2LL * sizeInnerHalfBlockQ1;
649  sizeOuterColumn = 1LL << qureg.numQubitsRepresented;
650  sizeOuterQuarterColumn = sizeOuterColumn >> 2;
651 
652 # ifdef _OPENMP
653 # pragma omp parallel \
654  default (none) \
655  shared (sizeInnerBlockQ1,sizeInnerHalfBlockQ1,sizeInnerQuarterBlockQ2,sizeInnerHalfBlockQ2,sizeInnerBlockQ2, \
656  sizeOuterColumn,sizeOuterQuarterColumn,qureg,numTasks,targetQubit,qubit2) \
657  private (thisTask,thisInnerBlockQ2,thisOuterColumn,thisIndex,thisIndexInOuterColumn, \
658  thisIndexInInnerBlockQ1,thisIndexInInnerBlockQ2,thisInnerBlockQ1InInnerBlockQ2,outerBitQ1,outerBitQ2)
659 # endif
660  {
661 # ifdef _OPENMP
662 # pragma omp for schedule (static)
663 # endif
664  // thisTask iterates over half the elements in this process' chunk of the density matrix
665  // treat this as iterating over all columns, then iterating over half the values
666  // within one column.
667  // If this function has been called, this process' chunk contains half an
668  // outer block or less
669  for (thisTask=0; thisTask<numTasks; thisTask++) {
670  // we want to process all columns in the density matrix,
671  // updating the values for half of each column (one half of each inner block)
672  thisOuterColumn = thisTask / sizeOuterQuarterColumn;
673  // thisTask % sizeOuterQuarterColumn
674  thisIndexInOuterColumn = thisTask&(sizeOuterQuarterColumn-1);
675  thisInnerBlockQ2 = thisIndexInOuterColumn / sizeInnerQuarterBlockQ2;
676  // thisTask % sizeInnerQuarterBlockQ2;
677  thisIndexInInnerBlockQ2 = thisTask&(sizeInnerQuarterBlockQ2-1);
678  thisInnerBlockQ1InInnerBlockQ2 = thisIndexInInnerBlockQ2 / sizeInnerHalfBlockQ1;
679  // thisTask % sizeInnerHalfBlockQ1;
680  thisIndexInInnerBlockQ1 = thisTask&(sizeInnerHalfBlockQ1-1);
681 
682  // get index in state vector corresponding to upper inner block
683  thisIndex = thisOuterColumn*sizeOuterColumn + thisInnerBlockQ2*sizeInnerBlockQ2
684  + thisInnerBlockQ1InInnerBlockQ2*sizeInnerBlockQ1 + thisIndexInInnerBlockQ1;
685 
686  // check if we are in the upper or lower half of an outer block for Q1
687  outerBitQ1 = extractBit(targetQubit, (thisIndex+qureg.numAmpsPerChunk*qureg.chunkId)>>qureg.numQubitsRepresented);
688  // if we are in the lower half of an outer block, shift to be in the lower half
689  // of the inner block as well (we want to dephase |0><0| and |1><1| only)
690  thisIndex += outerBitQ1*(sizeInnerHalfBlockQ1);
691 
692  // check if we are in the upper or lower half of an outer block for Q2
693  outerBitQ2 = extractBit(qubit2, (thisIndex+qureg.numAmpsPerChunk*qureg.chunkId)>>qureg.numQubitsRepresented);
694  // if we are in the lower half of an outer block, shift to be in the lower half
695  // of the inner block as well (we want to dephase |0><0| and |1><1| only)
696  thisIndex += outerBitQ2*(sizeInnerQuarterBlockQ2<<1);
697 
698  // NOTE: at this point thisIndex should be the index of the element we want to
699  // dephase in the chunk of the state vector on this process, in the
700  // density matrix representation.
701  // thisTask is the index of the pair element in pairStateVec
702 
703  // state[thisIndex] = (1-depolLevel)*state[thisIndex] + depolLevel*(state[thisIndex]
704  // + pair[thisTask])/2
705  qureg.pairStateVec.real[thisTask+numTasks*2] = qureg.stateVec.real[thisIndex];
706  qureg.pairStateVec.imag[thisTask+numTasks*2] = qureg.stateVec.imag[thisIndex];
707  }
708  }
709 }
710 
711 
712 void densmatr_mixDepolarising(Qureg qureg, int targetQubit, qreal depolLevel) {
713  if (depolLevel == 0)
714  return;
715 
716  int rankIsUpper; // rank is in the upper half of an outer block
717  int pairRank; // rank of corresponding chunk
718 
719  int useLocalDataOnly = densityMatrixBlockFitsInChunk(qureg.numAmpsPerChunk,
720  qureg.numQubitsRepresented, targetQubit);
721 
722  if (useLocalDataOnly){
723  densmatr_mixDepolarisingLocal(qureg, targetQubit, depolLevel);
724  } else {
725  // pack data to send to my pair process into the first half of pairStateVec
726  compressPairVectorForSingleQubitDepolarise(qureg, targetQubit);
727 
728  rankIsUpper = chunkIsUpperInOuterBlock(qureg.chunkId, qureg.numAmpsPerChunk, targetQubit,
729  qureg.numQubitsRepresented);
730  pairRank = getChunkOuterBlockPairId(rankIsUpper, qureg.chunkId, qureg.numAmpsPerChunk,
731  targetQubit, qureg.numQubitsRepresented);
732 
733  exchangePairStateVectorHalves(qureg, pairRank);
734  densmatr_mixDepolarisingDistributed(qureg, targetQubit, depolLevel);
735  }
736 
737 }
738 
739 void densmatr_mixDamping(Qureg qureg, int targetQubit, qreal damping) {
740  if (damping == 0)
741  return;
742 
743  int rankIsUpper; // rank is in the upper half of an outer block
744  int pairRank; // rank of corresponding chunk
745 
746  int useLocalDataOnly = densityMatrixBlockFitsInChunk(qureg.numAmpsPerChunk,
747  qureg.numQubitsRepresented, targetQubit);
748 
749  if (useLocalDataOnly){
750  densmatr_mixDampingLocal(qureg, targetQubit, damping);
751  } else {
752  // pack data to send to my pair process into the first half of pairStateVec
753  compressPairVectorForSingleQubitDepolarise(qureg, targetQubit);
754 
755  rankIsUpper = chunkIsUpperInOuterBlock(qureg.chunkId, qureg.numAmpsPerChunk, targetQubit,
756  qureg.numQubitsRepresented);
757  pairRank = getChunkOuterBlockPairId(rankIsUpper, qureg.chunkId, qureg.numAmpsPerChunk,
758  targetQubit, qureg.numQubitsRepresented);
759 
760  exchangePairStateVectorHalves(qureg, pairRank);
761  densmatr_mixDampingDistributed(qureg, targetQubit, damping);
762  }
763 
764 }
765 
766 void densmatr_mixTwoQubitDepolarising(Qureg qureg, int qubit1, int qubit2, qreal depolLevel){
767  if (depolLevel == 0)
768  return;
769  int rankIsUpperBiggerQubit, rankIsUpperSmallerQubit;
770  int pairRank; // rank of corresponding chunk
771  int biggerQubit, smallerQubit;
772 
773  densmatr_mixTwoQubitDephasing(qureg, qubit1, qubit2, depolLevel);
774 
775  qreal eta = 2/depolLevel;
776  qreal delta = eta - 1 - sqrt( (eta-1)*(eta-1) - 1 );
777  qreal gamma = 1+delta;
778  gamma = 1/(gamma*gamma*gamma);
779  qreal GAMMA_PARTS_1_OR_2 = 1.0;
780  // TODO -- test delta too small
781  /*
782  if (fabs(4*delta*(1+delta)*gamma-depolLevel)>1e-5){
783  printf("Numerical error in delta; for small error rates try Taylor expansion.\n");
784  exit(1);
785  }
786  */
787 
788  biggerQubit = qubit1 > qubit2 ? qubit1 : qubit2;
789  smallerQubit = qubit1 < qubit2 ? qubit1 : qubit2;
790  int useLocalDataOnlyBigQubit, useLocalDataOnlySmallQubit;
791 
792  useLocalDataOnlyBigQubit = densityMatrixBlockFitsInChunk(qureg.numAmpsPerChunk,
793  qureg.numQubitsRepresented, biggerQubit);
794  if (useLocalDataOnlyBigQubit){
795  // does parts 1, 2 and 3 locally in one go
796  densmatr_mixTwoQubitDepolarisingLocal(qureg, qubit1, qubit2, delta, gamma);
797  } else {
798  useLocalDataOnlySmallQubit = densityMatrixBlockFitsInChunk(qureg.numAmpsPerChunk,
799  qureg.numQubitsRepresented, smallerQubit);
800  if (useLocalDataOnlySmallQubit){
801  // do part 1 locally
802  densmatr_mixTwoQubitDepolarisingLocalPart1(qureg, smallerQubit, biggerQubit, delta);
803 
804  // do parts 2 and 3 distributed (if part 2 is distributed part 3 is also distributed)
805  // part 2 will be distributed and the value of the small qubit won't matter
806  compressPairVectorForTwoQubitDepolarise(qureg, smallerQubit, biggerQubit);
807  rankIsUpperBiggerQubit = chunkIsUpperInOuterBlock(qureg.chunkId, qureg.numAmpsPerChunk, biggerQubit,
808  qureg.numQubitsRepresented);
809  pairRank = getChunkOuterBlockPairId(rankIsUpperBiggerQubit, qureg.chunkId, qureg.numAmpsPerChunk,
810  biggerQubit, qureg.numQubitsRepresented);
811 
812  exchangePairStateVectorHalves(qureg, pairRank);
813  densmatr_mixTwoQubitDepolarisingDistributed(qureg, smallerQubit, biggerQubit, delta, GAMMA_PARTS_1_OR_2);
814 
815  // part 3 will be distributed but involve rearranging for the smaller qubit
816  compressPairVectorForTwoQubitDepolarise(qureg, smallerQubit, biggerQubit);
817  rankIsUpperBiggerQubit = chunkIsUpperInOuterBlock(qureg.chunkId, qureg.numAmpsPerChunk, biggerQubit,
818  qureg.numQubitsRepresented);
819  pairRank = getChunkOuterBlockPairId(rankIsUpperBiggerQubit, qureg.chunkId, qureg.numAmpsPerChunk,
820  biggerQubit, qureg.numQubitsRepresented);
821 
822  exchangePairStateVectorHalves(qureg, pairRank);
823  densmatr_mixTwoQubitDepolarisingQ1LocalQ2DistributedPart3(qureg, smallerQubit, biggerQubit, delta, gamma);
824  } else {
825  // do part 1, 2 and 3 distributed
826  // part 1
827  compressPairVectorForTwoQubitDepolarise(qureg, smallerQubit, biggerQubit);
828  rankIsUpperSmallerQubit = chunkIsUpperInOuterBlock(qureg.chunkId, qureg.numAmpsPerChunk, smallerQubit,
829  qureg.numQubitsRepresented);
830  pairRank = getChunkOuterBlockPairId(rankIsUpperSmallerQubit, qureg.chunkId, qureg.numAmpsPerChunk,
831  smallerQubit, qureg.numQubitsRepresented);
832 
833  exchangePairStateVectorHalves(qureg, pairRank);
834  densmatr_mixTwoQubitDepolarisingDistributed(qureg, smallerQubit, biggerQubit, delta, GAMMA_PARTS_1_OR_2);
835 
836  // part 2
837  compressPairVectorForTwoQubitDepolarise(qureg, smallerQubit, biggerQubit);
838  rankIsUpperBiggerQubit = chunkIsUpperInOuterBlock(qureg.chunkId, qureg.numAmpsPerChunk, biggerQubit,
839  qureg.numQubitsRepresented);
840  pairRank = getChunkOuterBlockPairId(rankIsUpperBiggerQubit, qureg.chunkId, qureg.numAmpsPerChunk,
841  biggerQubit, qureg.numQubitsRepresented);
842 
843  exchangePairStateVectorHalves(qureg, pairRank);
844  densmatr_mixTwoQubitDepolarisingDistributed(qureg, smallerQubit, biggerQubit, delta, GAMMA_PARTS_1_OR_2);
845 
846  // part 3
847  compressPairVectorForTwoQubitDepolarise(qureg, smallerQubit, biggerQubit);
848  pairRank = getChunkOuterBlockPairIdForPart3(rankIsUpperSmallerQubit, rankIsUpperBiggerQubit,
849  qureg.chunkId, qureg.numAmpsPerChunk, smallerQubit, biggerQubit, qureg.numQubitsRepresented);
850  exchangePairStateVectorHalves(qureg, pairRank);
851  densmatr_mixTwoQubitDepolarisingDistributed(qureg, smallerQubit, biggerQubit, delta, gamma);
852 
853  }
854  }
855 
856 }
857 
858 void statevec_compactUnitary(Qureg qureg, int targetQubit, Complex alpha, Complex beta)
859 {
860  // flag to require memory exchange. 1: an entire block fits on one rank, 0: at most half a block fits on one rank
861  int useLocalDataOnly = halfMatrixBlockFitsInChunk(qureg.numAmpsPerChunk, targetQubit);
862  Complex rot1, rot2;
863 
864  // rank's chunk is in upper half of block
865  int rankIsUpper;
866  int pairRank; // rank of corresponding chunk
867 
868  if (useLocalDataOnly){
869  // all values required to update state vector lie in this rank
870  statevec_compactUnitaryLocal(qureg, targetQubit, alpha, beta);
871  } else {
872  // need to get corresponding chunk of state vector from other rank
873  rankIsUpper = chunkIsUpper(qureg.chunkId, qureg.numAmpsPerChunk, targetQubit);
874  getRotAngle(rankIsUpper, &rot1, &rot2, alpha, beta);
875  pairRank = getChunkPairId(rankIsUpper, qureg.chunkId, qureg.numAmpsPerChunk, targetQubit);
876  // get corresponding values from my pair
877  exchangeStateVectors(qureg, pairRank);
878 
879  // this rank's values are either in the upper of lower half of the block.
880  // send values to compactUnitaryDistributed in the correct order
881  if (rankIsUpper){
882  statevec_compactUnitaryDistributed(qureg,rot1,rot2,
883  qureg.stateVec, //upper
884  qureg.pairStateVec, //lower
885  qureg.stateVec); //output
886  } else {
887  statevec_compactUnitaryDistributed(qureg,rot1,rot2,
888  qureg.pairStateVec, //upper
889  qureg.stateVec, //lower
890  qureg.stateVec); //output
891  }
892  }
893 }
894 
895 void statevec_unitary(Qureg qureg, int targetQubit, ComplexMatrix2 u)
896 {
897  // flag to require memory exchange. 1: an entire block fits on one rank, 0: at most half a block fits on one rank
898  int useLocalDataOnly = halfMatrixBlockFitsInChunk(qureg.numAmpsPerChunk, targetQubit);
899  Complex rot1, rot2;
900 
901  // rank's chunk is in upper half of block
902  int rankIsUpper;
903  int pairRank; // rank of corresponding chunk
904 
905  if (useLocalDataOnly){
906  // all values required to update state vector lie in this rank
907  statevec_unitaryLocal(qureg, targetQubit, u);
908  } else {
909  // need to get corresponding chunk of state vector from other rank
910  rankIsUpper = chunkIsUpper(qureg.chunkId, qureg.numAmpsPerChunk, targetQubit);
911  getRotAngleFromUnitaryMatrix(rankIsUpper, &rot1, &rot2, u);
912  pairRank = getChunkPairId(rankIsUpper, qureg.chunkId, qureg.numAmpsPerChunk, targetQubit);
913  // get corresponding values from my pair
914  exchangeStateVectors(qureg, pairRank);
915 
916  // this rank's values are either in the upper of lower half of the block.
917  // send values to compactUnitaryDistributed in the correct order
918  if (rankIsUpper){
919  statevec_unitaryDistributed(qureg,rot1,rot2,
920  qureg.stateVec, //upper
921  qureg.pairStateVec, //lower
922  qureg.stateVec); //output
923  } else {
924  statevec_unitaryDistributed(qureg,rot1,rot2,
925  qureg.pairStateVec, //upper
926  qureg.stateVec, //lower
927  qureg.stateVec); //output
928  }
929  }
930 
931 
932 }
933 
934 void statevec_controlledCompactUnitary(Qureg qureg, int controlQubit, int targetQubit, Complex alpha, Complex beta)
935 {
936  // flag to require memory exchange. 1: an entire block fits on one rank, 0: at most half a block fits on one rank
937  int useLocalDataOnly = halfMatrixBlockFitsInChunk(qureg.numAmpsPerChunk, targetQubit);
938  Complex rot1, rot2;
939 
940  // rank's chunk is in upper half of block
941  int rankIsUpper;
942  int pairRank; // rank of corresponding chunk
943 
944  if (useLocalDataOnly){
945  // all values required to update state vector lie in this rank
946  statevec_controlledCompactUnitaryLocal(qureg, controlQubit, targetQubit, alpha, beta);
947  } else {
948  // need to get corresponding chunk of state vector from other rank
949  rankIsUpper = chunkIsUpper(qureg.chunkId, qureg.numAmpsPerChunk, targetQubit);
950  getRotAngle(rankIsUpper, &rot1, &rot2, alpha, beta);
951  pairRank = getChunkPairId(rankIsUpper, qureg.chunkId, qureg.numAmpsPerChunk, targetQubit);
952  //printf("%d rank has pair rank: %d\n", qureg.rank, pairRank);
953  // get corresponding values from my pair
954  exchangeStateVectors(qureg, pairRank);
955 
956  // this rank's values are either in the upper of lower half of the block. send values to controlledCompactUnitaryDistributed
957  // in the correct order
958  if (rankIsUpper){
959  statevec_controlledCompactUnitaryDistributed(qureg,controlQubit,rot1,rot2,
960  qureg.stateVec, //upper
961  qureg.pairStateVec, //lower
962  qureg.stateVec); //output
963  } else {
964  statevec_controlledCompactUnitaryDistributed(qureg,controlQubit,rot1,rot2,
965  qureg.pairStateVec, //upper
966  qureg.stateVec, //lower
967  qureg.stateVec); //output
968  }
969  }
970 }
971 
972 void statevec_controlledUnitary(Qureg qureg, int controlQubit, int targetQubit,
973  ComplexMatrix2 u)
974 {
975  // flag to require memory exchange. 1: an entire block fits on one rank, 0: at most half a block fits on one rank
976  int useLocalDataOnly = halfMatrixBlockFitsInChunk(qureg.numAmpsPerChunk, targetQubit);
977  Complex rot1, rot2;
978 
979  // rank's chunk is in upper half of block
980  int rankIsUpper;
981  int pairRank; // rank of corresponding chunk
982 
983  if (useLocalDataOnly){
984  // all values required to update state vector lie in this rank
985  statevec_controlledUnitaryLocal(qureg, controlQubit, targetQubit, u);
986  } else {
987  // need to get corresponding chunk of state vector from other rank
988  rankIsUpper = chunkIsUpper(qureg.chunkId, qureg.numAmpsPerChunk, targetQubit);
989  getRotAngleFromUnitaryMatrix(rankIsUpper, &rot1, &rot2, u);
990  pairRank = getChunkPairId(rankIsUpper, qureg.chunkId, qureg.numAmpsPerChunk, targetQubit);
991  //printf("%d rank has pair rank: %d\n", qureg.rank, pairRank);
992  // get corresponding values from my pair
993  exchangeStateVectors(qureg, pairRank);
994 
995  // this rank's values are either in the upper of lower half of the block. send values to controlledUnitaryDistributed
996  // in the correct order
997  if (rankIsUpper){
998  statevec_controlledUnitaryDistributed(qureg,controlQubit,rot1,rot2,
999  qureg.stateVec, //upper
1000  qureg.pairStateVec, //lower
1001  qureg.stateVec); //output
1002  } else {
1003  statevec_controlledUnitaryDistributed(qureg,controlQubit,rot1,rot2,
1004  qureg.pairStateVec, //upper
1005  qureg.stateVec, //lower
1006  qureg.stateVec); //output
1007  }
1008  }
1009 }
1010 
1011 void statevec_multiControlledUnitary(Qureg qureg, long long int ctrlQubitsMask, long long int ctrlFlipMask, int targetQubit, ComplexMatrix2 u)
1012 {
1013  // flag to require memory exchange. 1: an entire block fits on one rank, 0: at most half a block fits on one rank
1014  int useLocalDataOnly = halfMatrixBlockFitsInChunk(qureg.numAmpsPerChunk, targetQubit);
1015  Complex rot1, rot2;
1016 
1017  // rank's chunk is in upper half of block
1018  int rankIsUpper;
1019  int pairRank; // rank of corresponding chunk
1020 
1021  if (useLocalDataOnly){
1022  // all values required to update state vector lie in this rank
1023  statevec_multiControlledUnitaryLocal(qureg, targetQubit, ctrlQubitsMask, ctrlFlipMask, u);
1024  } else {
1025  // need to get corresponding chunk of state vector from other rank
1026  rankIsUpper = chunkIsUpper(qureg.chunkId, qureg.numAmpsPerChunk, targetQubit);
1027  getRotAngleFromUnitaryMatrix(rankIsUpper, &rot1, &rot2, u);
1028  pairRank = getChunkPairId(rankIsUpper, qureg.chunkId, qureg.numAmpsPerChunk, targetQubit);
1029 
1030  // get corresponding values from my pair
1031  exchangeStateVectors(qureg, pairRank);
1032 
1033  // this rank's values are either in the upper of lower half of the block. send values to multiControlledUnitaryDistributed
1034  // in the correct order
1035  if (rankIsUpper){
1036  statevec_multiControlledUnitaryDistributed(qureg,targetQubit,ctrlQubitsMask,ctrlFlipMask,rot1,rot2,
1037  qureg.stateVec, //upper
1038  qureg.pairStateVec, //lower
1039  qureg.stateVec); //output
1040  } else {
1041  statevec_multiControlledUnitaryDistributed(qureg,targetQubit,ctrlQubitsMask,ctrlFlipMask,rot1,rot2,
1042  qureg.pairStateVec, //upper
1043  qureg.stateVec, //lower
1044  qureg.stateVec); //output
1045  }
1046  }
1047 }
1048 void statevec_pauliX(Qureg qureg, int targetQubit)
1049 {
1050  // flag to require memory exchange. 1: an entire block fits on one rank, 0: at most half a block fits on one rank
1051  int useLocalDataOnly = halfMatrixBlockFitsInChunk(qureg.numAmpsPerChunk, targetQubit);
1052 
1053  // rank's chunk is in upper half of block
1054  int rankIsUpper;
1055  int pairRank; // rank of corresponding chunk
1056 
1057  if (useLocalDataOnly){
1058  // all values required to update state vector lie in this rank
1059  statevec_pauliXLocal(qureg, targetQubit);
1060  } else {
1061  // need to get corresponding chunk of state vector from other rank
1062  rankIsUpper = chunkIsUpper(qureg.chunkId, qureg.numAmpsPerChunk, targetQubit);
1063  pairRank = getChunkPairId(rankIsUpper, qureg.chunkId, qureg.numAmpsPerChunk, targetQubit);
1064  //printf("%d rank has pair rank: %d\n", qureg.rank, pairRank);
1065  // get corresponding values from my pair
1066  exchangeStateVectors(qureg, pairRank);
1067  // this rank's values are either in the upper of lower half of the block. pauliX just replaces
1068  // this rank's values with pair values
1070  qureg.pairStateVec, // in
1071  qureg.stateVec); // out
1072  }
1073 }
1074 
1075 void statevec_controlledNot(Qureg qureg, int controlQubit, int targetQubit)
1076 {
1077  // flag to require memory exchange. 1: an entire block fits on one rank, 0: at most half a block fits on one rank
1078  int useLocalDataOnly = halfMatrixBlockFitsInChunk(qureg.numAmpsPerChunk, targetQubit);
1079  int rankIsUpper; // rank's chunk is in upper half of block
1080  int pairRank; // rank of corresponding chunk
1081 
1082  if (useLocalDataOnly){
1083  // all values required to update state vector lie in this rank
1084  statevec_controlledNotLocal(qureg, controlQubit, targetQubit);
1085  } else {
1086  // need to get corresponding chunk of state vector from other rank
1087  rankIsUpper = chunkIsUpper(qureg.chunkId, qureg.numAmpsPerChunk, targetQubit);
1088  pairRank = getChunkPairId(rankIsUpper, qureg.chunkId, qureg.numAmpsPerChunk, targetQubit);
1089  // get corresponding values from my pair
1090  exchangeStateVectors(qureg, pairRank);
1091  statevec_controlledNotDistributed(qureg,controlQubit,
1092  qureg.pairStateVec, //in
1093  qureg.stateVec); //out
1094  }
1095 }
1096 
1097 void statevec_multiControlledMultiQubitNot(Qureg qureg, int ctrlMask, int targMask)
1098 {
1099  /* operation is the same regardless of control and target ordering, hence
1100  * we accept only bitMasks (for convenience of caller, when shifting qubits
1101  * for density matrices)
1102  */
1103 
1104  // global index of the first basis state in this node
1105  long long int firstInd = qureg.chunkId * qureg.numAmpsPerChunk;
1106 
1107  /* optimisation: if this node doesn't contain any amplitudes for which {ctrls}={1}
1108  * (and hence, neither does the pair node), then these pair nodes have nothing to modify
1109  * nor any need to communicate, and can halt. No ctrls are contained in the node
1110  * if the distance from the first index, to the next index where {ctrls}=1, is
1111  * greater than the total contained amplitudes. This is a worthwhile optimisation,
1112  * since although we must still wait for the slowest node, we have potentially reduced
1113  * the network traffic and might avoid saturation.
1114  */
1115  if ((firstInd|ctrlMask) - firstInd >= qureg.numAmpsPerChunk)
1116  return;
1117 
1118  /* nodes communicate pairwise, and (ignoring ctrls) swap all their amplitudes with mate.
1119  * hence we find |pairState> = X_{targs}|firstStateInNode>, determine which node contains
1120  * |pairState>, and swap state-vector with it (unless it happens to be this node).
1121  */
1122 
1123  // global index of the corresponding NOT'd first basis state
1124  long long int pairInd = firstInd ^ targMask;
1125  int pairRank = pairInd / qureg.numAmpsPerChunk;
1126  int useLocalDataOnly = (pairRank == qureg.chunkId);
1127 
1128  if (useLocalDataOnly) {
1129  // swaps amplitudes locally, setting |a>=X|b>, and |b>=X|a>
1130  statevec_multiControlledMultiQubitNotLocal(qureg, ctrlMask, targMask);
1131  } else {
1132  // swaps amplitudes with pair node
1133  exchangeStateVectors(qureg, pairRank);
1134  // modifies only |a>=X|b> (pair node handles the converse)
1136  qureg, ctrlMask, targMask,
1137  qureg.pairStateVec, // in
1138  qureg.stateVec); // out
1139  }
1140 }
1141 
1142 void statevec_pauliY(Qureg qureg, int targetQubit)
1143 {
1144  int conjFac = 1;
1145 
1146  // flag to require memory exchange. 1: an entire block fits on one rank, 0: at most half a block fits on one rank
1147  int useLocalDataOnly = halfMatrixBlockFitsInChunk(qureg.numAmpsPerChunk, targetQubit);
1148  int rankIsUpper; // rank's chunk is in upper half of block
1149  int pairRank; // rank of corresponding chunk
1150 
1151  if (useLocalDataOnly){
1152  statevec_pauliYLocal(qureg, targetQubit, conjFac);
1153  } else {
1154  // need to get corresponding chunk of state vector from other rank
1155  rankIsUpper = chunkIsUpper(qureg.chunkId, qureg.numAmpsPerChunk, targetQubit);
1156  pairRank = getChunkPairId(rankIsUpper, qureg.chunkId, qureg.numAmpsPerChunk, targetQubit);
1157  // get corresponding values from my pair
1158  exchangeStateVectors(qureg, pairRank);
1159  // this rank's values are either in the upper of lower half of the block
1161  qureg.pairStateVec, // in
1162  qureg.stateVec, // out
1163  rankIsUpper, conjFac);
1164  }
1165 }
1166 
1167 void statevec_pauliYConj(Qureg qureg, int targetQubit)
1168 {
1169  int conjFac = -1;
1170 
1171  // flag to require memory exchange. 1: an entire block fits on one rank, 0: at most half a block fits on one rank
1172  int useLocalDataOnly = halfMatrixBlockFitsInChunk(qureg.numAmpsPerChunk, targetQubit);
1173  int rankIsUpper; // rank's chunk is in upper half of block
1174  int pairRank; // rank of corresponding chunk
1175 
1176  if (useLocalDataOnly){
1177  statevec_pauliYLocal(qureg, targetQubit, conjFac);
1178  } else {
1179  // need to get corresponding chunk of state vector from other rank
1180  rankIsUpper = chunkIsUpper(qureg.chunkId, qureg.numAmpsPerChunk, targetQubit);
1181  pairRank = getChunkPairId(rankIsUpper, qureg.chunkId, qureg.numAmpsPerChunk, targetQubit);
1182  // get corresponding values from my pair
1183  exchangeStateVectors(qureg, pairRank);
1184  // this rank's values are either in the upper of lower half of the block
1186  qureg.pairStateVec, // in
1187  qureg.stateVec, // out
1188  rankIsUpper, conjFac);
1189  }
1190 }
1191 
1192 void statevec_controlledPauliY(Qureg qureg, int controlQubit, int targetQubit)
1193 {
1194  int conjFac = 1;
1195 
1196  // flag to require memory exchange. 1: an entire block fits on one rank, 0: at most half a block fits on one rank
1197  int useLocalDataOnly = halfMatrixBlockFitsInChunk(qureg.numAmpsPerChunk, targetQubit);
1198  int rankIsUpper; // rank's chunk is in upper half of block
1199  int pairRank; // rank of corresponding chunk
1200 
1201  if (useLocalDataOnly){
1202  // all values required to update state vector lie in this rank
1203  statevec_controlledPauliYLocal(qureg, controlQubit, targetQubit, conjFac);
1204  } else {
1205  // need to get corresponding chunk of state vector from other rank
1206  rankIsUpper = chunkIsUpper(qureg.chunkId, qureg.numAmpsPerChunk, targetQubit);
1207  pairRank = getChunkPairId(rankIsUpper, qureg.chunkId, qureg.numAmpsPerChunk, targetQubit);
1208  // get corresponding values from my pair
1209  exchangeStateVectors(qureg, pairRank);
1210  // this rank's values are either in the upper of lower half of the block
1211  if (rankIsUpper){
1212  statevec_controlledPauliYDistributed(qureg,controlQubit,
1213  qureg.pairStateVec, //in
1214  qureg.stateVec,
1215  conjFac); //out
1216  } else {
1217  statevec_controlledPauliYDistributed(qureg,controlQubit,
1218  qureg.pairStateVec, //in
1219  qureg.stateVec,
1220  - conjFac); //out
1221  }
1222  }
1223 }
1224 
1225 void statevec_controlledPauliYConj(Qureg qureg, int controlQubit, int targetQubit)
1226 {
1227  int conjFac = -1;
1228 
1229  // flag to require memory exchange. 1: an entire block fits on one rank, 0: at most half a block fits on one rank
1230  int useLocalDataOnly = halfMatrixBlockFitsInChunk(qureg.numAmpsPerChunk, targetQubit);
1231  int rankIsUpper; // rank's chunk is in upper half of block
1232  int pairRank; // rank of corresponding chunk
1233 
1234  if (useLocalDataOnly){
1235  // all values required to update state vector lie in this rank
1236  statevec_controlledPauliYLocal(qureg, controlQubit, targetQubit, conjFac);
1237  } else {
1238  // need to get corresponding chunk of state vector from other rank
1239  rankIsUpper = chunkIsUpper(qureg.chunkId, qureg.numAmpsPerChunk, targetQubit);
1240  pairRank = getChunkPairId(rankIsUpper, qureg.chunkId, qureg.numAmpsPerChunk, targetQubit);
1241  // get corresponding values from my pair
1242  exchangeStateVectors(qureg, pairRank);
1243  // this rank's values are either in the upper of lower half of the block
1244  if (rankIsUpper){
1245  statevec_controlledPauliYDistributed(qureg,controlQubit,
1246  qureg.pairStateVec, //in
1247  qureg.stateVec,
1248  conjFac); //out
1249  } else {
1250  statevec_controlledPauliYDistributed(qureg,controlQubit,
1251  qureg.pairStateVec, //in
1252  qureg.stateVec,
1253  - conjFac); //out
1254  }
1255  }
1256 }
1257 
1258 void statevec_hadamard(Qureg qureg, int targetQubit)
1259 {
1260  // flag to require memory exchange. 1: an entire block fits on one rank, 0: at most half a block fits on one rank
1261  int useLocalDataOnly = halfMatrixBlockFitsInChunk(qureg.numAmpsPerChunk, targetQubit);
1262 
1263  // rank's chunk is in upper half of block
1264  int rankIsUpper;
1265  int pairRank; // rank of corresponding chunk
1266 
1267  if (useLocalDataOnly){
1268  // all values required to update state vector lie in this rank
1269  statevec_hadamardLocal(qureg, targetQubit);
1270  } else {
1271  // need to get corresponding chunk of state vector from other rank
1272  rankIsUpper = chunkIsUpper(qureg.chunkId, qureg.numAmpsPerChunk, targetQubit);
1273  pairRank = getChunkPairId(rankIsUpper, qureg.chunkId, qureg.numAmpsPerChunk, targetQubit);
1274  //printf("%d rank has pair rank: %d\n", qureg.rank, pairRank);
1275  // get corresponding values from my pair
1276  exchangeStateVectors(qureg, pairRank);
1277  // this rank's values are either in the upper of lower half of the block. send values to hadamardDistributed
1278  // in the correct order
1279  if (rankIsUpper){
1281  qureg.stateVec, //upper
1282  qureg.pairStateVec, //lower
1283  qureg.stateVec, rankIsUpper); //output
1284  } else {
1286  qureg.pairStateVec, //upper
1287  qureg.stateVec, //lower
1288  qureg.stateVec, rankIsUpper); //output
1289  }
1290  }
1291 }
1292 
1303 static int isChunkToSkipInFindPZero(int chunkId, long long int chunkSize, int measureQubit)
1304 {
1305  long long int sizeHalfBlock = 1LL << (measureQubit);
1306  int numChunksToSkip = sizeHalfBlock/chunkSize;
1307  // calculate probability by summing over numChunksToSkip, then skipping numChunksToSkip, etc
1308  int bitToCheck = chunkId & numChunksToSkip;
1309  return bitToCheck;
1310 }
1311 
1312 qreal statevec_calcProbOfOutcome(Qureg qureg, int measureQubit, int outcome)
1313 {
1314  qreal stateProb=0, totalStateProb=0;
1315  int skipValuesWithinRank = halfMatrixBlockFitsInChunk(qureg.numAmpsPerChunk, measureQubit);
1316  if (skipValuesWithinRank) {
1317  stateProb = statevec_findProbabilityOfZeroLocal(qureg, measureQubit);
1318  } else {
1319  if (!isChunkToSkipInFindPZero(qureg.chunkId, qureg.numAmpsPerChunk, measureQubit)){
1320  stateProb = statevec_findProbabilityOfZeroDistributed(qureg);
1321  } else stateProb = 0;
1322  }
1323  MPI_Allreduce(&stateProb, &totalStateProb, 1, MPI_QuEST_REAL, MPI_SUM, MPI_COMM_WORLD);
1324  if (outcome==1) totalStateProb = 1.0 - totalStateProb;
1325  return totalStateProb;
1326 }
1327 
1328 qreal densmatr_calcProbOfOutcome(Qureg qureg, int measureQubit, int outcome) {
1329 
1330  qreal zeroProb = densmatr_findProbabilityOfZeroLocal(qureg, measureQubit);
1331 
1332  qreal outcomeProb;
1333  MPI_Allreduce(&zeroProb, &outcomeProb, 1, MPI_QuEST_REAL, MPI_SUM, MPI_COMM_WORLD);
1334  if (outcome == 1)
1335  outcomeProb = 1.0 - outcomeProb;
1336 
1337  return outcomeProb;
1338 }
1339 
1340 void statevec_calcProbOfAllOutcomes(qreal* retProbs, Qureg qureg, int* qubits, int numQubits) {
1341 
1342  // each node populates retProbs with contributions from the subset of amps in each
1343  statevec_calcProbOfAllOutcomesLocal(retProbs, qureg, qubits, numQubits);
1344 
1345  // then, retProbs are summed element-wise
1346  MPI_Allreduce(MPI_IN_PLACE, retProbs, 1LL<<numQubits, MPI_QuEST_REAL, MPI_SUM, MPI_COMM_WORLD);
1347 }
1348 
1349 void densmatr_calcProbOfAllOutcomes(qreal* retProbs, Qureg qureg, int* qubits, int numQubits) {
1350 
1351  // each node populates retProbs with contributions from the subset of amps in each
1352  densmatr_calcProbOfAllOutcomesLocal(retProbs, qureg, qubits, numQubits);
1353 
1354  // then, retProbs are summed element-wise
1355  MPI_Allreduce(MPI_IN_PLACE, retProbs, 1LL<<numQubits, MPI_QuEST_REAL, MPI_SUM, MPI_COMM_WORLD);
1356 }
1357 
1359 
1360  qreal localPurity = densmatr_calcPurityLocal(qureg);
1361 
1362  qreal globalPurity;
1363  MPI_Allreduce(&localPurity, &globalPurity, 1, MPI_QuEST_REAL, MPI_SUM, MPI_COMM_WORLD);
1364 
1365  return globalPurity;
1366 }
1367 
1368 void statevec_collapseToKnownProbOutcome(Qureg qureg, int measureQubit, int outcome, qreal totalStateProb)
1369 {
1370  int skipValuesWithinRank = halfMatrixBlockFitsInChunk(qureg.numAmpsPerChunk, measureQubit);
1371  if (skipValuesWithinRank) {
1372  statevec_collapseToKnownProbOutcomeLocal(qureg, measureQubit, outcome, totalStateProb);
1373  } else {
1374  if (!isChunkToSkipInFindPZero(qureg.chunkId, qureg.numAmpsPerChunk, measureQubit)){
1375  // chunk has amps for q=0
1376  if (outcome==0) statevec_collapseToKnownProbOutcomeDistributedRenorm(qureg, measureQubit,
1377  totalStateProb);
1379  } else {
1380  // chunk has amps for q=1
1381  if (outcome==1) statevec_collapseToKnownProbOutcomeDistributedRenorm(qureg, measureQubit,
1382  totalStateProb);
1384  }
1385  }
1386 }
1387 
1388 void seedQuEST(QuESTEnv *env, unsigned long int* seedArray, int numSeeds) {
1389 
1390  // it is imperative every node agrees on the seed, so that random decisions
1391  // agree on every node. Hence we use only the master node keys.
1392  MPI_Bcast(seedArray, numSeeds, MPI_UNSIGNED_LONG, 0, MPI_COMM_WORLD);
1393 
1394  // free existing seed array, if exists
1395  if (env->seeds != NULL)
1396  free(env->seeds);
1397 
1398  // record keys in permanent heap
1399  env->seeds = malloc(numSeeds * sizeof *(env->seeds));
1400  for (int i=0; i<numSeeds; i++)
1401  (env->seeds)[i] = seedArray[i];
1402  env->numSeeds = numSeeds;
1403 
1404  // pass keys to Mersenne Twister seeder
1405  init_by_array(seedArray, numSeeds);
1406 }
1407 
1412 long long int getGlobalIndOfOddParityInChunk(Qureg qureg, int qb1, int qb2) {
1413  long long int chunkStartInd = qureg.numAmpsPerChunk * qureg.chunkId;
1414  long long int chunkEndInd = chunkStartInd + qureg.numAmpsPerChunk; // exclusive
1415  long long int oddParityInd;
1416 
1417  if (extractBit(qb1, chunkStartInd) != extractBit(qb2, chunkStartInd))
1418  return chunkStartInd;
1419 
1420  oddParityInd = flipBit(chunkStartInd, qb1);
1421  if (oddParityInd >= chunkStartInd && oddParityInd < chunkEndInd)
1422  return oddParityInd;
1423 
1424  oddParityInd = flipBit(chunkStartInd, qb2);
1425  if (oddParityInd >= chunkStartInd && oddParityInd < chunkEndInd)
1426  return oddParityInd;
1427 
1428  return -1;
1429 }
1430 
1431 void statevec_swapQubitAmps(Qureg qureg, int qb1, int qb2) {
1432 
1433  // perform locally if possible
1434  int qbBig = (qb1 > qb2)? qb1 : qb2;
1435  if (halfMatrixBlockFitsInChunk(qureg.numAmpsPerChunk, qbBig))
1436  return statevec_swapQubitAmpsLocal(qureg, qb1, qb2);
1437 
1438  // do nothing if this node contains no amplitudes to swap
1439  long long int oddParityGlobalInd = getGlobalIndOfOddParityInChunk(qureg, qb1, qb2);
1440  if (oddParityGlobalInd == -1)
1441  return;
1442 
1443  // determine and swap amps with pair node
1444  int pairRank = flipBit(flipBit(oddParityGlobalInd, qb1), qb2) / qureg.numAmpsPerChunk;
1445  exchangeStateVectors(qureg, pairRank);
1446  statevec_swapQubitAmpsDistributed(qureg, pairRank, qb1, qb2);
1447 }
1448 
1458 void statevec_multiControlledTwoQubitUnitary(Qureg qureg, long long int ctrlMask, int q1, int q2, ComplexMatrix4 u) {
1459  int q1FitsInNode = halfMatrixBlockFitsInChunk(qureg.numAmpsPerChunk, q1);
1460  int q2FitsInNode = halfMatrixBlockFitsInChunk(qureg.numAmpsPerChunk, q2);
1461 
1462  if (q1FitsInNode && q2FitsInNode) {
1463  statevec_multiControlledTwoQubitUnitaryLocal(qureg, ctrlMask, q1, q2, u);
1464 
1465  } else if (q1FitsInNode) {
1466  int qSwap = (q1 > 0)? q1-1 : q1+1;
1467 
1468  // ensure ctrl == qSwap, ensure ctrlMask updates under the swap
1469  if (maskContainsBit(ctrlMask, qSwap))
1470  ctrlMask = flipBit(flipBit(ctrlMask, q2), qSwap);
1471 
1472  statevec_swapQubitAmps(qureg, q2, qSwap);
1473  statevec_multiControlledTwoQubitUnitaryLocal(qureg, ctrlMask, q1, qSwap, u);
1474  statevec_swapQubitAmps(qureg, q2, qSwap);
1475 
1476  } else if (q2FitsInNode) {
1477  int qSwap = (q2 > 0)? q2-1 : q2+1;
1478 
1479  // ensure ctrl == qSwap, ensure ctrlMask updates under the swap
1480  if (maskContainsBit(ctrlMask, qSwap))
1481  ctrlMask = flipBit(flipBit(ctrlMask, q1), qSwap);
1482 
1483  statevec_swapQubitAmps(qureg, q1, qSwap);
1484  statevec_multiControlledTwoQubitUnitaryLocal(qureg, ctrlMask, qSwap, q2, u);
1485  statevec_swapQubitAmps(qureg, q1, qSwap);
1486 
1487  } else {
1488  // we know with certainty that both q1 and q2 >= 2
1489  int swap1 = 0;
1490  int swap2 = 1;
1491 
1492  // if ctrl == swap1 or swap2, ensure ctrlMask updates under the swap
1493  if (maskContainsBit(ctrlMask, swap1))
1494  ctrlMask = flipBit(flipBit(ctrlMask, swap1), q1);
1495  if (maskContainsBit(ctrlMask, swap2))
1496  ctrlMask = flipBit(flipBit(ctrlMask, swap2), q2);
1497 
1498  statevec_swapQubitAmps(qureg, q1, swap1);
1499  statevec_swapQubitAmps(qureg, q2, swap2);
1500  statevec_multiControlledTwoQubitUnitaryLocal(qureg, ctrlMask, swap1, swap2, u);
1501  statevec_swapQubitAmps(qureg, q1, swap1);
1502  statevec_swapQubitAmps(qureg, q2, swap2);
1503  }
1504 }
1505 
1514 void statevec_multiControlledMultiQubitUnitary(Qureg qureg, long long int ctrlMask, int* targs, int numTargs, ComplexMatrixN u) {
1515 
1516  // bit mask of target qubits (for quick collision checking)
1517  long long int targMask = getQubitBitMask(targs, numTargs);
1518 
1519  // find lowest qubit available for swapping (isn't in targs)
1520  int freeQb=0;
1521  while (maskContainsBit(targMask, freeQb))
1522  freeQb++;
1523 
1524  // assign indices of where each target will be swapped to (else itself)
1525  int swapTargs[numTargs];
1526  for (int t=0; t<numTargs; t++) {
1527  if (halfMatrixBlockFitsInChunk(qureg.numAmpsPerChunk, targs[t]))
1528  swapTargs[t] = targs[t];
1529  else {
1530  // mark swap
1531  swapTargs[t] = freeQb;
1532 
1533  // update ctrlMask if swapped-out qubit was a control
1534  if (maskContainsBit(ctrlMask, swapTargs[t]))
1535  ctrlMask = flipBit(flipBit(ctrlMask, swapTargs[t]), targs[t]); // swap targ and ctrl
1536 
1537  // locate next available on-chunk qubit
1538  freeQb++;
1539  while (maskContainsBit(targMask, freeQb))
1540  freeQb++;
1541  }
1542  }
1543 
1544  // perform swaps as necessary
1545  for (int t=0; t<numTargs; t++)
1546  if (swapTargs[t] != targs[t])
1547  statevec_swapQubitAmps(qureg, targs[t], swapTargs[t]);
1548 
1549  // all target qubits have now been swapped into local memory
1550  statevec_multiControlledMultiQubitUnitaryLocal(qureg, ctrlMask, swapTargs, numTargs, u);
1551 
1552  // undo swaps
1553  for (int t=0; t<numTargs; t++)
1554  if (swapTargs[t] != targs[t])
1555  statevec_swapQubitAmps(qureg, targs[t], swapTargs[t]);
1556 }
1557 
1558 
1560 
1561  /* since, for every elem in 2^N op, there is a column in 2^N x 2^N qureg,
1562  * we know immediately (by each node containing at least 1 element of op)
1563  * that every node contains at least 1 column. Hence, we know that pairStateVec
1564  * of qureg can fit the entirety of op.
1565  */
1566 
1567  // load up our local contribution
1568  long long int localOffset = qureg.chunkId * op.numElemsPerChunk;
1569  memcpy(&qureg.pairStateVec.real[localOffset], op.real, op.numElemsPerChunk * sizeof(qreal));
1570  memcpy(&qureg.pairStateVec.imag[localOffset], op.imag, op.numElemsPerChunk * sizeof(qreal));
1571 
1572  // work out how many messages are needed to send op chunks (2GB limit)
1573  long long int maxMsgSize = MPI_MAX_AMPS_IN_MSG;
1574  if (op.numElemsPerChunk < maxMsgSize)
1575  maxMsgSize = op.numElemsPerChunk;
1576  int numMsgs = op.numElemsPerChunk / maxMsgSize; // since MPI_MAX... = 2^n, division is exact
1577 
1578  // each node has a turn at broadcasting its contribution of op
1579  for (int broadcaster=0; broadcaster < qureg.numChunks; broadcaster++) {
1580  long long int broadOffset = broadcaster * op.numElemsPerChunk;
1581 
1582  // (while keeping each message smaller than MPI max)
1583  for (int i=0; i<numMsgs; i++) {
1584  MPI_Bcast(
1585  &qureg.pairStateVec.real[broadOffset + i*maxMsgSize],
1586  maxMsgSize, MPI_QuEST_REAL, broadcaster, MPI_COMM_WORLD);
1587  MPI_Bcast(
1588  &qureg.pairStateVec.imag[broadOffset + i*maxMsgSize],
1589  maxMsgSize, MPI_QuEST_REAL, broadcaster, MPI_COMM_WORLD);
1590  }
1591  }
1592 }
1593 
1595 
1596  copyDiagOpIntoMatrixPairState(qureg, op);
1597  densmatr_applyDiagonalOpLocal(qureg, op);
1598 }
1599 
1601 
1602  Complex localExpec = statevec_calcExpecDiagonalOpLocal(qureg, op);
1603  if (qureg.numChunks == 1)
1604  return localExpec;
1605 
1606  qreal localReal = localExpec.real;
1607  qreal localImag = localExpec.imag;
1608  qreal globalReal, globalImag;
1609  MPI_Allreduce(&localReal, &globalReal, 1, MPI_QuEST_REAL, MPI_SUM, MPI_COMM_WORLD);
1610  MPI_Allreduce(&localImag, &globalImag, 1, MPI_QuEST_REAL, MPI_SUM, MPI_COMM_WORLD);
1611 
1612  Complex globalExpec;
1613  globalExpec.real = globalReal;
1614  globalExpec.imag = globalImag;
1615  return globalExpec;
1616 }
1617 
1619 
1620  Complex localVal = densmatr_calcExpecDiagonalOpLocal(qureg, op);
1621  if (qureg.numChunks == 1)
1622  return localVal;
1623 
1624  qreal localRe = localVal.real;
1625  qreal localIm = localVal.imag;
1626  qreal globalRe, globalIm;
1627 
1628  MPI_Allreduce(&localRe, &globalRe, 1, MPI_QuEST_REAL, MPI_SUM, MPI_COMM_WORLD);
1629  MPI_Allreduce(&localIm, &globalIm, 1, MPI_QuEST_REAL, MPI_SUM, MPI_COMM_WORLD);
1630 
1631  Complex globalVal;
1632  globalVal.real = globalRe;
1633  globalVal.imag = globalIm;
1634  return globalVal;
1635 }
void densmatr_mixDepolarising(Qureg qureg, int targetQubit, qreal depolLevel)
void destroyQuESTEnv(QuESTEnv env)
Destroy the QuEST environment.
void init_by_array(unsigned long init_key[], int key_length)
Definition: mt19937ar.c:80
qreal statevec_calcProbOfOutcome(Qureg qureg, int measureQubit, int outcome)
qreal densmatr_calcProbOfOutcome(Qureg qureg, int measureQubit, int outcome)
qreal statevec_getImagAmp(Qureg qureg, long long int index)
void statevec_hadamard(Qureg qureg, int targetQubit)
void syncQuESTEnv(QuESTEnv env)
Guarantees that all code up to the given point has been executed on all nodes (if running in distribu...
void statevec_controlledNotLocal(Qureg qureg, int controlQubit, int targetQubit)
Definition: QuEST_cpu.c:2679
static void getRotAngle(int chunkIsUpper, Complex *rot1, Complex *rot2, Complex alpha, Complex beta)
Get rotation values for a given chunk.
void statevec_pauliYLocal(Qureg qureg, int targetQubit, int conjFac)
Definition: QuEST_cpu.c:2887
qreal densmatr_calcInnerProduct(Qureg a, Qureg b)
qreal densmatr_calcHilbertSchmidtDistanceSquaredLocal(Qureg a, Qureg b)
computes Tr((a-b) conjTrans(a-b)) = sum of abs values of (a-b)
Definition: QuEST_cpu.c:934
void densmatr_calcProbOfAllOutcomes(qreal *retProbs, Qureg qureg, int *qubits, int numQubits)
int rank
Definition: QuEST.h:364
void seedQuESTDefault(QuESTEnv *env)
Seeds the random number generator with the (master node) current time and process ID.
Definition: QuEST.c:1614
void densmatr_calcProbOfAllOutcomesLocal(qreal *outcomeProbs, Qureg qureg, int *qubits, int numQubits)
Definition: QuEST_cpu.c:3616
static int isChunkToSkipInFindPZero(int chunkId, long long int chunkSize, int measureQubit)
Find chunks to skip when calculating probability of qubit being zero.
ComplexArray pairStateVec
Temporary storage for a chunk of the state vector received from another process in the MPI version.
Definition: QuEST.h:343
qreal statevec_findProbabilityOfZeroDistributed(Qureg qureg)
Measure the probability of a specified qubit being in the zero state across all amplitudes held in th...
Definition: QuEST_cpu.c:3513
void densmatr_mixDepolarisingDistributed(Qureg qureg, int targetQubit, qreal depolLevel)
Definition: QuEST_cpu.c:230
void densmatr_mixTwoQubitDepolarisingQ1LocalQ2DistributedPart3(Qureg qureg, int targetQubit, int qubit2, qreal delta, qreal gamma)
Definition: QuEST_cpu.c:638
void getEnvironmentString(QuESTEnv env, char str[200])
Sets str to a string containing information about the runtime environment, including whether simulati...
int numSeeds
Definition: QuEST.h:367
int numChunks
Number of chunks the state vector is broken up into – the number of MPI processes used.
Definition: QuEST.h:338
qreal densmatr_calcFidelity(Qureg qureg, Qureg pureState)
void statevec_swapQubitAmpsLocal(Qureg qureg, int qb1, int qb2)
It is ensured that all amplitudes needing to be swapped are on this node.
Definition: QuEST_cpu.c:3922
void statevec_multiControlledUnitaryDistributed(Qureg qureg, int targetQubit, long long int ctrlQubitsMask, long long int ctrlFlipMask, Complex rot1, Complex rot2, ComplexArray stateVecUp, ComplexArray stateVecLo, ComplexArray stateVecOut)
Apply a unitary operation to a single qubit in the state vector of probability amplitudes,...
Definition: QuEST_cpu.c:2542
void statevec_multiControlledMultiQubitNotLocal(Qureg qureg, int ctrlMask, int targMask)
Definition: QuEST_cpu.c:2778
void statevec_controlledUnitaryDistributed(Qureg qureg, int controlQubit, Complex rot1, Complex rot2, ComplexArray stateVecUp, ComplexArray stateVecLo, ComplexArray stateVecOut)
Rotate a single qubit in the state vector of probability amplitudes, given two complex numbers alpha ...
Definition: QuEST_cpu.c:2476
void statevec_collapseToKnownProbOutcomeLocal(Qureg qureg, int measureQubit, int outcome, qreal totalProbability)
Update the state vector to be consistent with measuring measureQubit=0 if outcome=0 and measureQubit=...
Definition: QuEST_cpu.c:3767
Complex statevec_calcExpecDiagonalOp(Qureg qureg, DiagonalOp op)
Complex densmatr_calcExpecDiagonalOp(Qureg qureg, DiagonalOp op)
void compressPairVectorForTwoQubitDepolarise(Qureg qureg, int targetQubit, int qubit2)
void statevec_compactUnitaryLocal(Qureg qureg, int targetQubit, Complex alpha, Complex beta)
Definition: QuEST_cpu.c:1754
qreal densmatr_calcPurityLocal(Qureg qureg)
Definition: QuEST_cpu.c:872
Complex statevec_calcExpecDiagonalOpLocal(Qureg qureg, DiagonalOp op)
Definition: QuEST_cpu.c:4124
Represents a 4x4 matrix of complex numbers.
Definition: QuEST.h:175
Information about the environment the program is running in.
Definition: QuEST.h:362
void statevec_multiControlledMultiQubitUnitaryLocal(Qureg qureg, long long int ctrlMask, int *targs, int numTargs, ComplexMatrixN u)
Definition: QuEST_cpu.c:1912
void statevec_multiControlledTwoQubitUnitaryLocal(Qureg qureg, long long int ctrlMask, int q1, int q2, ComplexMatrix4 u)
Definition: QuEST_cpu.c:1813
Represents a general 2^N by 2^N matrix of complex numbers.
Definition: QuEST.h:186
void statevec_pauliXDistributed(Qureg qureg, ComplexArray stateVecIn, ComplexArray stateVecOut)
Rotate a single qubit by {{0,1},{1,0}.
Definition: QuEST_cpu.c:2651
static int getChunkOuterBlockPairId(int chunkIsUpper, int chunkId, long long int chunkSize, int targetQubit, int numQubits)
void statevec_controlledNot(Qureg qureg, int controlQubit, int targetQubit)
void copyVecIntoMatrixPairState(Qureg matr, Qureg vec)
This copies/clones vec (a statevector) into every node's matr pairState.
Complex statevec_calcInnerProduct(Qureg bra, Qureg ket)
#define qreal
void statevec_multiControlledUnitary(Qureg qureg, long long int ctrlQubitsMask, long long int ctrlFlipMask, int targetQubit, ComplexMatrix2 u)
void statevec_pauliYConj(Qureg qureg, int targetQubit)
void statevec_calcProbOfAllOutcomesLocal(qreal *outcomeProbs, Qureg qureg, int *qubits, int numQubits)
Definition: QuEST_cpu.c:3549
void exchangePairStateVectorHalves(Qureg qureg, int pairRank)
__forceinline__ __device__ long long int flipBit(const long long int number, const int bitInd)
Definition: QuEST_gpu.cu:95
void statevec_controlledPauliY(Qureg qureg, int controlQubit, int targetQubit)
void statevec_swapQubitAmps(Qureg qureg, int qb1, int qb2)
static int getChunkPairId(int chunkIsUpper, int chunkId, long long int chunkSize, int targetQubit)
get position of corresponding chunk, holding values required to update values in my chunk (with chunk...
qreal densmatr_calcPurity(Qureg qureg)
void statevec_collapseToOutcomeDistributedSetZero(Qureg qureg)
Definition: QuEST_cpu.c:3887
int chunkId
The position of the chunk of the state vector held by this process in the full state vector.
Definition: QuEST.h:336
qreal imag[2][2]
Definition: QuEST.h:140
qreal * imag
The imaginary values of the 2^numQubits complex elements.
Definition: QuEST.h:310
void statevec_multiControlledTwoQubitUnitary(Qureg qureg, long long int ctrlMask, int q1, int q2, ComplexMatrix4 u)
This calls swapQubitAmps only when it would involve a distributed communication; if the qubit chunks ...
void compressPairVectorForSingleQubitDepolarise(Qureg qureg, int targetQubit)
long long int numAmpsPerChunk
Number of probability amplitudes held in stateVec by this process In the non-MPI version,...
Definition: QuEST.h:332
void densmatr_mixTwoQubitDepolarisingLocal(Qureg qureg, int qubit1, int qubit2, qreal delta, qreal gamma)
Definition: QuEST_cpu.c:393
static void getRotAngleFromUnitaryMatrix(int chunkIsUpper, Complex *rot1, Complex *rot2, ComplexMatrix2 u)
Get rotation values for a given chunk given a unitary matrix.
void statevec_controlledPauliYLocal(Qureg qureg, int controlQubit, int targetQubit, int conjFac)
Definition: QuEST_cpu.c:2982
int numRanks
Definition: QuEST.h:365
void densmatr_mixTwoQubitDephasing(Qureg qureg, int qubit1, int qubit2, qreal dephase)
Definition: QuEST_cpu.c:90
void statevec_compactUnitaryDistributed(Qureg qureg, Complex rot1, Complex rot2, ComplexArray stateVecUp, ComplexArray stateVecLo, ComplexArray stateVecOut)
Rotate a single qubit in the state vector of probability amplitudes, given two complex numbers alpha ...
Definition: QuEST_cpu.c:2095
qreal densmatr_calcTotalProb(Qureg qureg)
void exchangeStateVectors(Qureg qureg, int pairRank)
void statevec_multiControlledUnitaryLocal(Qureg qureg, int targetQubit, long long int ctrlQubitsMask, long long int ctrlFlipMask, ComplexMatrix2 u)
Definition: QuEST_cpu.c:2268
long long int getQubitBitMask(int *qubits, int numQubits)
Definition: QuEST_common.c:50
Represents a diagonal complex operator on the full Hilbert state of a Qureg.
Definition: QuEST.h:297
void statevec_pauliX(Qureg qureg, int targetQubit)
void statevec_pauliYDistributed(Qureg qureg, ComplexArray stateVecIn, ComplexArray stateVecOut, int updateUpper, int conjFac)
Rotate a single qubit by +-{{0,-i},{i,0}.
Definition: QuEST_cpu.c:2945
void densmatr_mixDampingDistributed(Qureg qureg, int targetQubit, qreal damping)
Definition: QuEST_cpu.c:306
Complex densmatr_calcExpecDiagonalOpLocal(Qureg qureg, DiagonalOp op)
Definition: QuEST_cpu.c:4167
void statevec_pauliXLocal(Qureg qureg, int targetQubit)
Definition: QuEST_cpu.c:2593
void densmatr_initPureStateLocal(Qureg targetQureg, Qureg copyQureg)
Definition: QuEST_cpu.c:1195
__forceinline__ __device__ int extractBit(const int locationOfBitFromRight, const long long int theEncodedNumber)
Definition: QuEST_gpu.cu:82
unsigned long int * seeds
Definition: QuEST.h:366
void densmatr_mixDampingLocal(Qureg qureg, int targetQubit, qreal damping)
Definition: QuEST_cpu.c:180
Represents a system of qubits.
Definition: QuEST.h:322
void densmatr_mixDamping(Qureg qureg, int targetQubit, qreal damping)
qreal densmatr_calcInnerProductLocal(Qureg a, Qureg b)
computes Tr(conjTrans(a) b) = sum of (a_ij^* b_ij)
Definition: QuEST_cpu.c:969
static int getChunkIdFromIndex(Qureg qureg, long long int index)
void statevec_multiControlledMultiQubitNotDistributed(Qureg qureg, int ctrlMask, int targMask, ComplexArray stateVecIn, ComplexArray stateVecOut)
Definition: QuEST_cpu.c:2834
void densmatr_mixTwoQubitDepolarising(Qureg qureg, int qubit1, int qubit2, qreal depolLevel)
void statevec_unitaryDistributed(Qureg qureg, Complex rot1, Complex rot2, ComplexArray stateVecUp, ComplexArray stateVecLo, ComplexArray stateVecOut)
Apply a unitary operation to a single qubit given a subset of the state vector with upper and lower b...
Definition: QuEST_cpu.c:2151
void statevec_controlledCompactUnitaryDistributed(Qureg qureg, int controlQubit, Complex rot1, Complex rot2, ComplexArray stateVecUp, ComplexArray stateVecLo, ComplexArray stateVecOut)
Rotate a single qubit in the state vector of probability amplitudes, given two complex numbers alpha ...
Definition: QuEST_cpu.c:2414
void densmatr_initPureState(Qureg targetQureg, Qureg copyQureg)
static int densityMatrixBlockFitsInChunk(long long int chunkSize, int numQubits, int targetQubit)
ComplexArray stateVec
Computational state amplitudes - a subset thereof in the MPI version.
Definition: QuEST.h:341
qreal real[2][2]
Definition: QuEST.h:139
void statevec_compactUnitary(Qureg qureg, int targetQubit, Complex alpha, Complex beta)
void densmatr_mixDepolarisingLocal(Qureg qureg, int targetQubit, qreal depolLevel)
Definition: QuEST_cpu.c:131
void statevec_collapseToKnownProbOutcomeDistributedRenorm(Qureg qureg, int measureQubit, qreal totalProbability)
Renormalise parts of the state vector where measureQubit=0 or 1, based on the total probability of th...
Definition: QuEST_cpu.c:3849
void statevec_multiControlledMultiQubitUnitary(Qureg qureg, long long int ctrlMask, int *targs, int numTargs, ComplexMatrixN u)
This calls swapQubitAmps only when it would involve a distributed communication; if the qubit chunks ...
long long int numElemsPerChunk
The number of the 2^numQubits amplitudes stored on each distributed node.
Definition: QuEST.h:302
static int halfMatrixBlockFitsInChunk(long long int chunkSize, int targetQubit)
return whether the current qubit rotation will use blocks that fit within a single chunk.
void statevec_calcProbOfAllOutcomes(qreal *retProbs, Qureg qureg, int *qubits, int numQubits)
void statevec_hadamardLocal(Qureg qureg, int targetQubit)
Definition: QuEST_cpu.c:3078
void reportQuESTEnv(QuESTEnv env)
Report information about the QuEST environment.
void densmatr_applyDiagonalOpLocal(Qureg qureg, DiagonalOp op)
Definition: QuEST_cpu.c:4082
void statevec_controlledUnitaryLocal(Qureg qureg, int controlQubit, int targetQubit, ComplexMatrix2 u)
Definition: QuEST_cpu.c:2336
void densmatr_applyDiagonalOp(Qureg qureg, DiagonalOp op)
long long int getGlobalIndOfOddParityInChunk(Qureg qureg, int qb1, int qb2)
returns -1 if this node contains no amplitudes where qb1 and qb2 have opposite parity,...
void densmatr_mixTwoQubitDepolarisingLocalPart1(Qureg qureg, int qubit1, int qubit2, qreal delta)
Definition: QuEST_cpu.c:494
void copyDiagOpIntoMatrixPairState(Qureg qureg, DiagonalOp op)
void statevec_controlledNotDistributed(Qureg qureg, int controlQubit, ComplexArray stateVecIn, ComplexArray stateVecOut)
Rotate a single qubit by {{0,1},{1,0}.
Definition: QuEST_cpu.c:2742
void statevec_controlledCompactUnitary(Qureg qureg, int controlQubit, int targetQubit, Complex alpha, Complex beta)
int numQubitsRepresented
The number of qubits represented in either the state-vector or density matrix.
Definition: QuEST.h:327
int syncQuESTSuccess(int successCode)
Performs a logical AND on all successCodes held by all processes.
qreal * real
The real values of the 2^numQubits complex elements.
Definition: QuEST.h:308
qreal real
Definition: QuEST.h:105
void statevec_swapQubitAmpsDistributed(Qureg qureg, int pairRank, int qb1, int qb2)
qureg.pairStateVec contains the entire set of amplitudes of the paired node which includes the set of...
Definition: QuEST_cpu.c:3965
qreal imag
Definition: QuEST.h:106
static int getChunkOuterBlockPairIdForPart3(int chunkIsUpperSmallerQubit, int chunkIsUpperBiggerQubit, int chunkId, long long int chunkSize, int smallerQubit, int biggerQubit, int numQubits)
void densmatr_mixTwoQubitDepolarisingDistributed(Qureg qureg, int targetQubit, int qubit2, qreal delta, qreal gamma)
Definition: QuEST_cpu.c:547
void statevec_unitaryLocal(Qureg qureg, int targetQubit, ComplexMatrix2 u)
Definition: QuEST_cpu.c:2026
qreal statevec_calcTotalProb(Qureg qureg)
qreal densmatr_findProbabilityOfZeroLocal(Qureg qureg, int measureQubit)
Definition: QuEST_cpu.c:3402
static int chunkIsUpperInOuterBlock(int chunkId, long long int chunkSize, int targetQubit, int numQubits)
fix – do with masking instead
void validateNumRanks(int numRanks, const char *caller)
void seedQuEST(QuESTEnv *env, unsigned long int *seedArray, int numSeeds)
Seeds the random number generator with a custom array of key(s), overriding the default keys.
qreal densmatr_calcHilbertSchmidtDistance(Qureg a, Qureg b)
Represents one complex number.
Definition: QuEST.h:103
QuESTEnv createQuESTEnv(void)
Create the QuEST execution environment.
void statevec_hadamardDistributed(Qureg qureg, ComplexArray stateVecUp, ComplexArray stateVecLo, ComplexArray stateVecOut, int updateUpper)
Rotate a single qubit by {{1,1},{1,-1}}/sqrt2.
Definition: QuEST_cpu.c:3139
void statevec_multiControlledMultiQubitNot(Qureg qureg, int ctrlMask, int targMask)
static int maskContainsBit(const long long int mask, const int bitInd)
qreal statevec_getRealAmp(Qureg qureg, long long int index)
void statevec_pauliY(Qureg qureg, int targetQubit)
void statevec_controlledUnitary(Qureg qureg, int controlQubit, int targetQubit, ComplexMatrix2 u)
void statevec_controlledPauliYDistributed(Qureg qureg, int controlQubit, ComplexArray stateVecIn, ComplexArray stateVecOut, int conjFac)
Definition: QuEST_cpu.c:3036
void statevec_controlledCompactUnitaryLocal(Qureg qureg, int controlQubit, int targetQubit, Complex alpha, Complex beta)
Definition: QuEST_cpu.c:2196
static int chunkIsUpper(int chunkId, long long int chunkSize, int targetQubit)
Returns whether a given chunk in position chunkId is in the upper or lower half of a block.
void statevec_controlledPauliYConj(Qureg qureg, int controlQubit, int targetQubit)
Complex statevec_calcInnerProductLocal(Qureg bra, Qureg ket)
Definition: QuEST_cpu.c:1082
void statevec_collapseToKnownProbOutcome(Qureg qureg, int measureQubit, int outcome, qreal totalStateProb)
Represents a 2x2 matrix of complex numbers.
Definition: QuEST.h:137
void statevec_unitary(Qureg qureg, int targetQubit, ComplexMatrix2 u)
qreal densmatr_calcFidelityLocal(Qureg qureg, Qureg pureState)
computes a few dens-columns-worth of (vec^*T) dens * vec
Definition: QuEST_cpu.c:1001
qreal statevec_findProbabilityOfZeroLocal(Qureg qureg, int measureQubit)
Measure the total probability of a specified qubit being in the zero state across all amplitudes in t...
Definition: QuEST_cpu.c:3457