QuEST_cpu.c
Go to the documentation of this file.
1 // Distributed under MIT licence. See https://github.com/QuEST-Kit/QuEST/blob/master/LICENCE.txt for details
2 
13 # include "QuEST.h"
14 # include "QuEST_internal.h"
15 # include "QuEST_precision.h"
16 # include "mt19937ar.h"
17 
18 # include "QuEST_cpu_internal.h"
19 
20 # include <math.h>
21 # include <stdio.h>
22 # include <stdlib.h>
23 # include <stdint.h>
24 # include <assert.h>
25 
26 # ifdef _OPENMP
27 # include <omp.h>
28 # endif
29 
30 /* to support MSVC, we must remove the use of VLA in multiQubtUnitary.
31  * We'll instead create stack arrays use _malloca
32  */
33 #ifdef _WIN32
34  #include <malloc.h>
35 #endif
36 
37 
38 /*
39  * overloads for consistent API with GPU
40  */
41 
42 void copyStateToGPU(Qureg qureg) {
43 }
44 
45 void copyStateFromGPU(Qureg qureg) {
46 }
47 
48 
49 
50 /*
51  * state vector and density matrix operations
52  */
53 
54 void densmatr_oneQubitDegradeOffDiagonal(Qureg qureg, int targetQubit, qreal retain){
55  long long int numTasks = qureg.numAmpsPerChunk;
56  long long int innerMask = 1LL << targetQubit;
57  long long int outerMask = 1LL << (targetQubit + (qureg.numQubitsRepresented));
58 
59  long long int thisTask;
60  long long int thisPattern;
61  long long int totMask = innerMask|outerMask;
62 
63 # ifdef _OPENMP
64 # pragma omp parallel \
65  default (none) \
66  shared (innerMask,outerMask,totMask,qureg,retain,numTasks, targetQubit) \
67  private (thisTask,thisPattern)
68 # endif
69  {
70 # ifdef _OPENMP
71 # pragma omp for schedule (static)
72 # endif
73  for (thisTask=0; thisTask<numTasks; thisTask++){
74  thisPattern = (thisTask+qureg.numAmpsPerChunk*qureg.chunkId)&totMask;
75  if ((thisPattern==innerMask) || (thisPattern==outerMask)){
76  // do dephase
77  // the lines below will degrade the off-diagonal terms |..0..><..1..| and |..1..><..0..|
78  qureg.stateVec.real[thisTask] = retain*qureg.stateVec.real[thisTask];
79  qureg.stateVec.imag[thisTask] = retain*qureg.stateVec.imag[thisTask];
80  }
81  }
82  }
83 }
84 
85 void densmatr_mixDephasing(Qureg qureg, int targetQubit, qreal dephase) {
86  qreal retain=1-dephase;
87  densmatr_oneQubitDegradeOffDiagonal(qureg, targetQubit, retain);
88 }
89 
90 void densmatr_mixTwoQubitDephasing(Qureg qureg, int qubit1, int qubit2, qreal dephase) {
91  qreal retain=1-dephase;
92 
93  long long int numTasks = qureg.numAmpsPerChunk;
94  long long int innerMaskQubit1 = 1LL << qubit1;
95  long long int outerMaskQubit1 = 1LL << (qubit1 + (qureg.numQubitsRepresented));
96  long long int innerMaskQubit2 = 1LL << qubit2;
97  long long int outerMaskQubit2 = 1LL << (qubit2 + (qureg.numQubitsRepresented));
98  long long int totMaskQubit1 = innerMaskQubit1|outerMaskQubit1;
99  long long int totMaskQubit2 = innerMaskQubit2|outerMaskQubit2;
100 
101  long long int thisTask;
102  long long int thisPatternQubit1, thisPatternQubit2;
103 
104 # ifdef _OPENMP
105 # pragma omp parallel \
106  default (none) \
107  shared (innerMaskQubit1,outerMaskQubit1,totMaskQubit1,innerMaskQubit2,outerMaskQubit2, \
108  totMaskQubit2,qureg,retain,numTasks) \
109  private (thisTask,thisPatternQubit1,thisPatternQubit2)
110 # endif
111  {
112 # ifdef _OPENMP
113 # pragma omp for schedule (static)
114 # endif
115  for (thisTask=0; thisTask<numTasks; thisTask++){
116  thisPatternQubit1 = (thisTask+qureg.numAmpsPerChunk*qureg.chunkId)&totMaskQubit1;
117  thisPatternQubit2 = (thisTask+qureg.numAmpsPerChunk*qureg.chunkId)&totMaskQubit2;
118 
119  // any mismatch |...0...><...1...| etc
120  if ( (thisPatternQubit1==innerMaskQubit1) || (thisPatternQubit1==outerMaskQubit1) ||
121  (thisPatternQubit2==innerMaskQubit2) || (thisPatternQubit2==outerMaskQubit2) ){
122  // do dephase
123  // the lines below will degrade the off-diagonal terms |..0..><..1..| and |..1..><..0..|
124  qureg.stateVec.real[thisTask] = retain*qureg.stateVec.real[thisTask];
125  qureg.stateVec.imag[thisTask] = retain*qureg.stateVec.imag[thisTask];
126  }
127  }
128  }
129 }
130 
131 void densmatr_mixDepolarisingLocal(Qureg qureg, int targetQubit, qreal depolLevel) {
132  qreal retain=1-depolLevel;
133 
134  long long int numTasks = qureg.numAmpsPerChunk;
135  long long int innerMask = 1LL << targetQubit;
136  long long int outerMask = 1LL << (targetQubit + (qureg.numQubitsRepresented));
137  long long int totMask = innerMask|outerMask;
138 
139  long long int thisTask;
140  long long int partner;
141  long long int thisPattern;
142 
143  qreal realAv, imagAv;
144 
145 # ifdef _OPENMP
146 # pragma omp parallel \
147  default (none) \
148  shared (innerMask,outerMask,totMask,qureg,retain,depolLevel,numTasks) \
149  private (thisTask,partner,thisPattern,realAv,imagAv)
150 # endif
151  {
152 # ifdef _OPENMP
153 # pragma omp for schedule (static)
154 # endif
155  for (thisTask=0; thisTask<numTasks; thisTask++){
156  thisPattern = (thisTask+qureg.numAmpsPerChunk*qureg.chunkId)&totMask;
157  if ((thisPattern==innerMask) || (thisPattern==outerMask)){
158  // do dephase
159  // the lines below will degrade the off-diagonal terms |..0..><..1..| and |..1..><..0..|
160  qureg.stateVec.real[thisTask] = retain*qureg.stateVec.real[thisTask];
161  qureg.stateVec.imag[thisTask] = retain*qureg.stateVec.imag[thisTask];
162  } else {
163  if ((thisTask&totMask)==0){ //this element relates to targetQubit in state 0
164  // do depolarise
165  partner = thisTask | totMask;
166  realAv = (qureg.stateVec.real[thisTask] + qureg.stateVec.real[partner]) /2 ;
167  imagAv = (qureg.stateVec.imag[thisTask] + qureg.stateVec.imag[partner]) /2 ;
168 
169  qureg.stateVec.real[thisTask] = retain*qureg.stateVec.real[thisTask] + depolLevel*realAv;
170  qureg.stateVec.imag[thisTask] = retain*qureg.stateVec.imag[thisTask] + depolLevel*imagAv;
171 
172  qureg.stateVec.real[partner] = retain*qureg.stateVec.real[partner] + depolLevel*realAv;
173  qureg.stateVec.imag[partner] = retain*qureg.stateVec.imag[partner] + depolLevel*imagAv;
174  }
175  }
176  }
177  }
178 }
179 
180 void densmatr_mixDampingLocal(Qureg qureg, int targetQubit, qreal damping) {
181  qreal retain=1-damping;
182  qreal dephase=sqrt(retain);
183 
184  long long int numTasks = qureg.numAmpsPerChunk;
185  long long int innerMask = 1LL << targetQubit;
186  long long int outerMask = 1LL << (targetQubit + (qureg.numQubitsRepresented));
187  long long int totMask = innerMask|outerMask;
188 
189  long long int thisTask;
190  long long int partner;
191  long long int thisPattern;
192 
193  //qreal realAv, imagAv;
194 
195 # ifdef _OPENMP
196 # pragma omp parallel \
197  default (none) \
198  shared (innerMask,outerMask,totMask,qureg,retain,damping,dephase,numTasks) \
199  private (thisTask,partner,thisPattern)
200 # endif
201  {
202 # ifdef _OPENMP
203 # pragma omp for schedule (static)
204 # endif
205  for (thisTask=0; thisTask<numTasks; thisTask++){
206  thisPattern = (thisTask+qureg.numAmpsPerChunk*qureg.chunkId)&totMask;
207  if ((thisPattern==innerMask) || (thisPattern==outerMask)){
208  // do dephase
209  // the lines below will degrade the off-diagonal terms |..0..><..1..| and |..1..><..0..|
210  qureg.stateVec.real[thisTask] = dephase*qureg.stateVec.real[thisTask];
211  qureg.stateVec.imag[thisTask] = dephase*qureg.stateVec.imag[thisTask];
212  } else {
213  if ((thisTask&totMask)==0){ //this element relates to targetQubit in state 0
214  // do depolarise
215  partner = thisTask | totMask;
216  //realAv = (qureg.stateVec.real[thisTask] + qureg.stateVec.real[partner]) /2 ;
217  //imagAv = (qureg.stateVec.imag[thisTask] + qureg.stateVec.imag[partner]) /2 ;
218 
219  qureg.stateVec.real[thisTask] = qureg.stateVec.real[thisTask] + damping*qureg.stateVec.real[partner];
220  qureg.stateVec.imag[thisTask] = qureg.stateVec.imag[thisTask] + damping*qureg.stateVec.imag[partner];
221 
222  qureg.stateVec.real[partner] = retain*qureg.stateVec.real[partner];
223  qureg.stateVec.imag[partner] = retain*qureg.stateVec.imag[partner];
224  }
225  }
226  }
227  }
228 }
229 
230 void densmatr_mixDepolarisingDistributed(Qureg qureg, int targetQubit, qreal depolLevel) {
231 
232  // first do dephase part.
233  // TODO -- this might be more efficient to do at the same time as the depolarise if we move to
234  // iterating over all elements in the state vector for the purpose of vectorisation
235  // TODO -- if we keep this split, move this function to densmatr_mixDepolarising()
236  densmatr_mixDephasing(qureg, targetQubit, depolLevel);
237 
238  long long int sizeInnerBlock, sizeInnerHalfBlock;
239  long long int sizeOuterColumn, sizeOuterHalfColumn;
240  long long int thisInnerBlock, // current block
241  thisOuterColumn, // current column in density matrix
242  thisIndex, // current index in (density matrix representation) state vector
243  thisIndexInOuterColumn,
244  thisIndexInInnerBlock;
245  int outerBit;
246 
247  long long int thisTask;
248  long long int numTasks=qureg.numAmpsPerChunk>>1;
249 
250  // set dimensions
251  sizeInnerHalfBlock = 1LL << targetQubit;
252  sizeInnerBlock = 2LL * sizeInnerHalfBlock;
253  sizeOuterColumn = 1LL << qureg.numQubitsRepresented;
254  sizeOuterHalfColumn = sizeOuterColumn >> 1;
255 
256 # ifdef _OPENMP
257 # pragma omp parallel \
258  default (none) \
259  shared (sizeInnerBlock,sizeInnerHalfBlock,sizeOuterColumn,sizeOuterHalfColumn, \
260  qureg,depolLevel,numTasks,targetQubit) \
261  private (thisTask,thisInnerBlock,thisOuterColumn,thisIndex,thisIndexInOuterColumn, \
262  thisIndexInInnerBlock,outerBit)
263 # endif
264  {
265 # ifdef _OPENMP
266 # pragma omp for schedule (static)
267 # endif
268  // thisTask iterates over half the elements in this process' chunk of the density matrix
269  // treat this as iterating over all columns, then iterating over half the values
270  // within one column.
271  // If this function has been called, this process' chunk contains half an
272  // outer block or less
273  for (thisTask=0; thisTask<numTasks; thisTask++) {
274  // we want to process all columns in the density matrix,
275  // updating the values for half of each column (one half of each inner block)
276  thisOuterColumn = thisTask / sizeOuterHalfColumn;
277  thisIndexInOuterColumn = thisTask&(sizeOuterHalfColumn-1); // thisTask % sizeOuterHalfColumn
278  thisInnerBlock = thisIndexInOuterColumn/sizeInnerHalfBlock;
279  // get index in state vector corresponding to upper inner block
280  thisIndexInInnerBlock = thisTask&(sizeInnerHalfBlock-1); // thisTask % sizeInnerHalfBlock
281  thisIndex = thisOuterColumn*sizeOuterColumn + thisInnerBlock*sizeInnerBlock
282  + thisIndexInInnerBlock;
283  // check if we are in the upper or lower half of an outer block
284  outerBit = extractBit(targetQubit, (thisIndex+qureg.numAmpsPerChunk*qureg.chunkId)>>qureg.numQubitsRepresented);
285  // if we are in the lower half of an outer block, shift to be in the lower half
286  // of the inner block as well (we want to dephase |0><0| and |1><1| only)
287  thisIndex += outerBit*(sizeInnerHalfBlock);
288 
289  // NOTE: at this point thisIndex should be the index of the element we want to
290  // dephase in the chunk of the state vector on this process, in the
291  // density matrix representation.
292  // thisTask is the index of the pair element in pairStateVec
293 
294 
295  // state[thisIndex] = (1-depolLevel)*state[thisIndex] + depolLevel*(state[thisIndex]
296  // + pair[thisTask])/2
297  qureg.stateVec.real[thisIndex] = (1-depolLevel)*qureg.stateVec.real[thisIndex] +
298  depolLevel*(qureg.stateVec.real[thisIndex] + qureg.pairStateVec.real[thisTask])/2;
299 
300  qureg.stateVec.imag[thisIndex] = (1-depolLevel)*qureg.stateVec.imag[thisIndex] +
301  depolLevel*(qureg.stateVec.imag[thisIndex] + qureg.pairStateVec.imag[thisTask])/2;
302  }
303  }
304 }
305 
306 void densmatr_mixDampingDistributed(Qureg qureg, int targetQubit, qreal damping) {
307  qreal retain=1-damping;
308  qreal dephase=sqrt(1-damping);
309 
310  // multiply the off-diagonal (|0><1| and |1><0|) terms by sqrt(1-damping)
311  densmatr_oneQubitDegradeOffDiagonal(qureg, targetQubit, dephase);
312 
313  // below, we modify the diagonals terms which require |1><1| to |0><0| communication
314 
315  long long int sizeInnerBlock, sizeInnerHalfBlock;
316  long long int sizeOuterColumn, sizeOuterHalfColumn;
317  long long int thisInnerBlock, // current block
318  thisOuterColumn, // current column in density matrix
319  thisIndex, // current index in (density matrix representation) state vector
320  thisIndexInOuterColumn,
321  thisIndexInInnerBlock;
322  int outerBit;
323  int stateBit;
324 
325  long long int thisTask;
326  long long int numTasks=qureg.numAmpsPerChunk>>1;
327 
328  // set dimensions
329  sizeInnerHalfBlock = 1LL << targetQubit;
330  sizeInnerBlock = 2LL * sizeInnerHalfBlock;
331  sizeOuterColumn = 1LL << qureg.numQubitsRepresented;
332  sizeOuterHalfColumn = sizeOuterColumn >> 1;
333 
334 # ifdef _OPENMP
335 # pragma omp parallel \
336  default (none) \
337  shared (sizeInnerBlock,sizeInnerHalfBlock,sizeOuterColumn,sizeOuterHalfColumn, \
338  qureg,damping, retain, dephase, numTasks,targetQubit) \
339  private (thisTask,thisInnerBlock,thisOuterColumn,thisIndex,thisIndexInOuterColumn, \
340  thisIndexInInnerBlock,outerBit, stateBit)
341 # endif
342  {
343 # ifdef _OPENMP
344 # pragma omp for schedule (static)
345 # endif
346  // thisTask iterates over half the elements in this process' chunk of the density matrix
347  // treat this as iterating over all columns, then iterating over half the values
348  // within one column.
349  // If this function has been called, this process' chunk contains half an
350  // outer block or less
351  for (thisTask=0; thisTask<numTasks; thisTask++) {
352  // we want to process all columns in the density matrix,
353  // updating the values for half of each column (one half of each inner block)
354  thisOuterColumn = thisTask / sizeOuterHalfColumn;
355  thisIndexInOuterColumn = thisTask&(sizeOuterHalfColumn-1); // thisTask % sizeOuterHalfColumn
356  thisInnerBlock = thisIndexInOuterColumn/sizeInnerHalfBlock;
357  // get index in state vector corresponding to upper inner block
358  thisIndexInInnerBlock = thisTask&(sizeInnerHalfBlock-1); // thisTask % sizeInnerHalfBlock
359  thisIndex = thisOuterColumn*sizeOuterColumn + thisInnerBlock*sizeInnerBlock
360  + thisIndexInInnerBlock;
361  // check if we are in the upper or lower half of an outer block
362  outerBit = extractBit(targetQubit, (thisIndex+qureg.numAmpsPerChunk*qureg.chunkId)>>qureg.numQubitsRepresented);
363  // if we are in the lower half of an outer block, shift to be in the lower half
364  // of the inner block as well (we want to dephase |0><0| and |1><1| only)
365  thisIndex += outerBit*(sizeInnerHalfBlock);
366 
367  // NOTE: at this point thisIndex should be the index of the element we want to
368  // dephase in the chunk of the state vector on this process, in the
369  // density matrix representation.
370  // thisTask is the index of the pair element in pairStateVec
371 
372  // Extract state bit, is 0 if thisIndex corresponds to a state with 0 in the target qubit
373  // and is 1 if thisIndex corresponds to a state with 1 in the target qubit
374  stateBit = extractBit(targetQubit, (thisIndex+qureg.numAmpsPerChunk*qureg.chunkId));
375 
376  // state[thisIndex] = (1-depolLevel)*state[thisIndex] + depolLevel*(state[thisIndex]
377  // + pair[thisTask])/2
378  if(stateBit == 0){
379  qureg.stateVec.real[thisIndex] = qureg.stateVec.real[thisIndex] +
380  damping*( qureg.pairStateVec.real[thisTask]);
381 
382  qureg.stateVec.imag[thisIndex] = qureg.stateVec.imag[thisIndex] +
383  damping*( qureg.pairStateVec.imag[thisTask]);
384  } else{
385  qureg.stateVec.real[thisIndex] = retain*qureg.stateVec.real[thisIndex];
386 
387  qureg.stateVec.imag[thisIndex] = retain*qureg.stateVec.imag[thisIndex];
388  }
389  }
390  }
391 }
392 
393 void densmatr_mixTwoQubitDepolarisingLocal(Qureg qureg, int qubit1, int qubit2, qreal delta, qreal gamma) {
394  long long int numTasks = qureg.numAmpsPerChunk;
395  long long int innerMaskQubit1 = 1LL << qubit1;
396  long long int outerMaskQubit1= 1LL << (qubit1 + qureg.numQubitsRepresented);
397  long long int totMaskQubit1 = innerMaskQubit1 | outerMaskQubit1;
398  long long int innerMaskQubit2 = 1LL << qubit2;
399  long long int outerMaskQubit2 = 1LL << (qubit2 + qureg.numQubitsRepresented);
400  long long int totMaskQubit2 = innerMaskQubit2 | outerMaskQubit2;
401 
402  long long int thisTask;
403  long long int partner;
404  long long int thisPatternQubit1, thisPatternQubit2;
405 
406  qreal real00, imag00;
407 
408 # ifdef _OPENMP
409 # pragma omp parallel \
410  default (none) \
411  shared (totMaskQubit1,totMaskQubit2,qureg,delta,gamma,numTasks) \
412  private (thisTask,partner,thisPatternQubit1,thisPatternQubit2,real00,imag00)
413 # endif
414  {
415 # ifdef _OPENMP
416 # pragma omp for schedule (static)
417 # endif
418  //--------------------------------------- STEP ONE ---------------------
419  for (thisTask=0; thisTask<numTasks; thisTask++){
420  thisPatternQubit1 = (thisTask+qureg.numAmpsPerChunk*qureg.chunkId)&totMaskQubit1;
421  thisPatternQubit2 = (thisTask+qureg.numAmpsPerChunk*qureg.chunkId)&totMaskQubit2;
422  if ((thisPatternQubit1==0) && ((thisPatternQubit2==0)
423  || (thisPatternQubit2==totMaskQubit2))){
424  //this element of form |...X...0...><...X...0...| for X either 0 or 1.
425  partner = thisTask | totMaskQubit1;
426  real00 = qureg.stateVec.real[thisTask];
427  imag00 = qureg.stateVec.imag[thisTask];
428 
429  qureg.stateVec.real[thisTask] = qureg.stateVec.real[thisTask]
430  + delta*qureg.stateVec.real[partner];
431  qureg.stateVec.imag[thisTask] = qureg.stateVec.imag[thisTask]
432  + delta*qureg.stateVec.imag[partner];
433 
434  qureg.stateVec.real[partner] = qureg.stateVec.real[partner] + delta*real00;
435  qureg.stateVec.imag[partner] = qureg.stateVec.imag[partner] + delta*imag00;
436 
437  }
438  }
439 # ifdef _OPENMP
440 # pragma omp for schedule (static)
441 # endif
442  //--------------------------------------- STEP TWO ---------------------
443  for (thisTask=0; thisTask<numTasks; thisTask++){
444  thisPatternQubit1 = (thisTask+qureg.numAmpsPerChunk*qureg.chunkId)&totMaskQubit1;
445  thisPatternQubit2 = (thisTask+qureg.numAmpsPerChunk*qureg.chunkId)&totMaskQubit2;
446  if ((thisPatternQubit2==0) && ((thisPatternQubit1==0)
447  || (thisPatternQubit1==totMaskQubit1))){
448  //this element of form |...0...X...><...0...X...| for X either 0 or 1.
449  partner = thisTask | totMaskQubit2;
450  real00 = qureg.stateVec.real[thisTask];
451  imag00 = qureg.stateVec.imag[thisTask];
452 
453  qureg.stateVec.real[thisTask] = qureg.stateVec.real[thisTask]
454  + delta*qureg.stateVec.real[partner];
455  qureg.stateVec.imag[thisTask] = qureg.stateVec.imag[thisTask]
456  + delta*qureg.stateVec.imag[partner];
457 
458  qureg.stateVec.real[partner] = qureg.stateVec.real[partner] + delta*real00;
459  qureg.stateVec.imag[partner] = qureg.stateVec.imag[partner] + delta*imag00;
460 
461  }
462  }
463 
464 # ifdef _OPENMP
465 # pragma omp for schedule (static)
466 # endif
467  //--------------------------------------- STEP THREE ---------------------
468  for (thisTask=0; thisTask<numTasks; thisTask++){
469  thisPatternQubit1 = (thisTask+qureg.numAmpsPerChunk*qureg.chunkId)&totMaskQubit1;
470  thisPatternQubit2 = (thisTask+qureg.numAmpsPerChunk*qureg.chunkId)&totMaskQubit2;
471  if ((thisPatternQubit2==0) && ((thisPatternQubit1==0)
472  || (thisPatternQubit1==totMaskQubit1))){
473  //this element of form |...0...X...><...0...X...| for X either 0 or 1.
474  partner = thisTask | totMaskQubit2;
475  partner = partner ^ totMaskQubit1;
476  real00 = qureg.stateVec.real[thisTask];
477  imag00 = qureg.stateVec.imag[thisTask];
478 
479  qureg.stateVec.real[thisTask] = gamma * (qureg.stateVec.real[thisTask]
480  + delta*qureg.stateVec.real[partner]);
481  qureg.stateVec.imag[thisTask] = gamma * (qureg.stateVec.imag[thisTask]
482  + delta*qureg.stateVec.imag[partner]);
483 
484  qureg.stateVec.real[partner] = gamma * (qureg.stateVec.real[partner]
485  + delta*real00);
486  qureg.stateVec.imag[partner] = gamma * (qureg.stateVec.imag[partner]
487  + delta*imag00);
488 
489  }
490  }
491  }
492 }
493 
494 void densmatr_mixTwoQubitDepolarisingLocalPart1(Qureg qureg, int qubit1, int qubit2, qreal delta) {
495  long long int numTasks = qureg.numAmpsPerChunk;
496  long long int innerMaskQubit1 = 1LL << qubit1;
497  long long int outerMaskQubit1= 1LL << (qubit1 + qureg.numQubitsRepresented);
498  long long int totMaskQubit1 = innerMaskQubit1 | outerMaskQubit1;
499  long long int innerMaskQubit2 = 1LL << qubit2;
500  long long int outerMaskQubit2 = 1LL << (qubit2 + qureg.numQubitsRepresented);
501  long long int totMaskQubit2 = innerMaskQubit2 | outerMaskQubit2;
502  // correct for being in a particular chunk
503  //totMaskQubit2 = totMaskQubit2&(qureg.numAmpsPerChunk-1); // totMaskQubit2 % numAmpsPerChunk
504 
505 
506  long long int thisTask;
507  long long int partner;
508  long long int thisPatternQubit1, thisPatternQubit2;
509 
510  qreal real00, imag00;
511 
512 # ifdef _OPENMP
513 # pragma omp parallel \
514  default (none) \
515  shared (totMaskQubit1,totMaskQubit2,qureg,delta,numTasks) \
516  private (thisTask,partner,thisPatternQubit1,thisPatternQubit2,real00,imag00)
517 # endif
518  {
519 
520 # ifdef _OPENMP
521 # pragma omp for schedule (static)
522 # endif
523  //--------------------------------------- STEP ONE ---------------------
524  for (thisTask=0; thisTask<numTasks; thisTask ++){
525  thisPatternQubit1 = (thisTask+qureg.numAmpsPerChunk*qureg.chunkId)&totMaskQubit1;
526  thisPatternQubit2 = (thisTask+qureg.numAmpsPerChunk*qureg.chunkId)&totMaskQubit2;
527  if ((thisPatternQubit1==0) && ((thisPatternQubit2==0)
528  || (thisPatternQubit2==totMaskQubit2))){
529  //this element of form |...X...0...><...X...0...| for X either 0 or 1.
530  partner = thisTask | totMaskQubit1;
531  real00 = qureg.stateVec.real[thisTask];
532  imag00 = qureg.stateVec.imag[thisTask];
533 
534  qureg.stateVec.real[thisTask] = qureg.stateVec.real[thisTask]
535  + delta*qureg.stateVec.real[partner];
536  qureg.stateVec.imag[thisTask] = qureg.stateVec.imag[thisTask]
537  + delta*qureg.stateVec.imag[partner];
538 
539  qureg.stateVec.real[partner] = qureg.stateVec.real[partner] + delta*real00;
540  qureg.stateVec.imag[partner] = qureg.stateVec.imag[partner] + delta*imag00;
541 
542  }
543  }
544  }
545 }
546 
548  int qubit2, qreal delta, qreal gamma) {
549 
550  long long int sizeInnerBlockQ1, sizeInnerHalfBlockQ1;
551  long long int sizeInnerBlockQ2, sizeInnerHalfBlockQ2, sizeInnerQuarterBlockQ2;
552  long long int sizeOuterColumn, sizeOuterQuarterColumn;
553  long long int thisInnerBlockQ2,
554  thisOuterColumn, // current column in density matrix
555  thisIndex, // current index in (density matrix representation) state vector
556  thisIndexInOuterColumn,
557  thisIndexInInnerBlockQ1,
558  thisIndexInInnerBlockQ2,
559  thisInnerBlockQ1InInnerBlockQ2;
560  int outerBitQ1, outerBitQ2;
561 
562  long long int thisTask;
563  long long int numTasks=qureg.numAmpsPerChunk>>2;
564 
565  // set dimensions
566  sizeInnerHalfBlockQ1 = 1LL << targetQubit;
567  sizeInnerHalfBlockQ2 = 1LL << qubit2;
568  sizeInnerQuarterBlockQ2 = sizeInnerHalfBlockQ2 >> 1;
569  sizeInnerBlockQ2 = sizeInnerHalfBlockQ2 << 1;
570  sizeInnerBlockQ1 = 2LL * sizeInnerHalfBlockQ1;
571  sizeOuterColumn = 1LL << qureg.numQubitsRepresented;
572  sizeOuterQuarterColumn = sizeOuterColumn >> 2;
573 
574 # ifdef _OPENMP
575 # pragma omp parallel \
576  default (none) \
577  shared (sizeInnerBlockQ1,sizeInnerHalfBlockQ1,sizeInnerBlockQ2,sizeInnerHalfBlockQ2,sizeInnerQuarterBlockQ2,\
578  sizeOuterColumn,sizeOuterQuarterColumn,qureg,delta,gamma,numTasks,targetQubit,qubit2) \
579  private (thisTask,thisInnerBlockQ2,thisInnerBlockQ1InInnerBlockQ2, \
580  thisOuterColumn,thisIndex,thisIndexInOuterColumn, \
581  thisIndexInInnerBlockQ1,thisIndexInInnerBlockQ2,outerBitQ1,outerBitQ2)
582 # endif
583  {
584 # ifdef _OPENMP
585 # pragma omp for schedule (static)
586 # endif
587  // thisTask iterates over half the elements in this process' chunk of the density matrix
588  // treat this as iterating over all columns, then iterating over half the values
589  // within one column.
590  // If this function has been called, this process' chunk contains half an
591  // outer block or less
592  for (thisTask=0; thisTask<numTasks; thisTask++) {
593  // we want to process all columns in the density matrix,
594  // updating the values for half of each column (one half of each inner block)
595  thisOuterColumn = thisTask / sizeOuterQuarterColumn;
596  // thisTask % sizeOuterQuarterColumn
597  thisIndexInOuterColumn = thisTask&(sizeOuterQuarterColumn-1);
598  thisInnerBlockQ2 = thisIndexInOuterColumn / sizeInnerQuarterBlockQ2;
599  // thisTask % sizeInnerQuarterBlockQ2;
600  thisIndexInInnerBlockQ2 = thisTask&(sizeInnerQuarterBlockQ2-1);
601  thisInnerBlockQ1InInnerBlockQ2 = thisIndexInInnerBlockQ2 / sizeInnerHalfBlockQ1;
602  // thisTask % sizeInnerHalfBlockQ1;
603  thisIndexInInnerBlockQ1 = thisTask&(sizeInnerHalfBlockQ1-1);
604 
605  // get index in state vector corresponding to upper inner block
606  thisIndex = thisOuterColumn*sizeOuterColumn + thisInnerBlockQ2*sizeInnerBlockQ2
607  + thisInnerBlockQ1InInnerBlockQ2*sizeInnerBlockQ1 + thisIndexInInnerBlockQ1;
608 
609  // check if we are in the upper or lower half of an outer block for Q1
610  outerBitQ1 = extractBit(targetQubit, (thisIndex+qureg.numAmpsPerChunk*qureg.chunkId)>>qureg.numQubitsRepresented);
611  // if we are in the lower half of an outer block, shift to be in the lower half
612  // of the inner block as well (we want to dephase |0><0| and |1><1| only)
613  thisIndex += outerBitQ1*(sizeInnerHalfBlockQ1);
614 
615  // check if we are in the upper or lower half of an outer block for Q2
616  outerBitQ2 = extractBit(qubit2, (thisIndex+qureg.numAmpsPerChunk*qureg.chunkId)>>qureg.numQubitsRepresented);
617  // if we are in the lower half of an outer block, shift to be in the lower half
618  // of the inner block as well (we want to dephase |0><0| and |1><1| only)
619  thisIndex += outerBitQ2*(sizeInnerQuarterBlockQ2<<1);
620 
621  // NOTE: at this point thisIndex should be the index of the element we want to
622  // dephase in the chunk of the state vector on this process, in the
623  // density matrix representation.
624  // thisTask is the index of the pair element in pairStateVec
625 
626 
627  // state[thisIndex] = (1-depolLevel)*state[thisIndex] + depolLevel*(state[thisIndex]
628  // + pair[thisTask])/2
629  // NOTE: must set gamma=1 if using this function for steps 1 or 2
630  qureg.stateVec.real[thisIndex] = gamma*(qureg.stateVec.real[thisIndex] +
631  delta*qureg.pairStateVec.real[thisTask]);
632  qureg.stateVec.imag[thisIndex] = gamma*(qureg.stateVec.imag[thisIndex] +
633  delta*qureg.pairStateVec.imag[thisTask]);
634  }
635  }
636 }
637 
639  int qubit2, qreal delta, qreal gamma) {
640 
641  long long int sizeInnerBlockQ1, sizeInnerHalfBlockQ1;
642  long long int sizeInnerBlockQ2, sizeInnerHalfBlockQ2, sizeInnerQuarterBlockQ2;
643  long long int sizeOuterColumn, sizeOuterQuarterColumn;
644  long long int thisInnerBlockQ2,
645  thisOuterColumn, // current column in density matrix
646  thisIndex, // current index in (density matrix representation) state vector
647  thisIndexInPairVector,
648  thisIndexInOuterColumn,
649  thisIndexInInnerBlockQ1,
650  thisIndexInInnerBlockQ2,
651  thisInnerBlockQ1InInnerBlockQ2;
652  int outerBitQ1, outerBitQ2;
653 
654  long long int thisTask;
655  long long int numTasks=qureg.numAmpsPerChunk>>2;
656 
657  // set dimensions
658  sizeInnerHalfBlockQ1 = 1LL << targetQubit;
659  sizeInnerHalfBlockQ2 = 1LL << qubit2;
660  sizeInnerQuarterBlockQ2 = sizeInnerHalfBlockQ2 >> 1;
661  sizeInnerBlockQ2 = sizeInnerHalfBlockQ2 << 1;
662  sizeInnerBlockQ1 = 2LL * sizeInnerHalfBlockQ1;
663  sizeOuterColumn = 1LL << qureg.numQubitsRepresented;
664  sizeOuterQuarterColumn = sizeOuterColumn >> 2;
665 
666 //# if 0
667 # ifdef _OPENMP
668 # pragma omp parallel \
669  default (none) \
670  shared (sizeInnerBlockQ1,sizeInnerHalfBlockQ1,sizeInnerBlockQ2,sizeInnerHalfBlockQ2,sizeInnerQuarterBlockQ2,\
671  sizeOuterColumn,sizeOuterQuarterColumn,qureg,delta,gamma, numTasks,targetQubit,qubit2) \
672  private (thisTask,thisInnerBlockQ2,thisInnerBlockQ1InInnerBlockQ2, \
673  thisOuterColumn,thisIndex,thisIndexInPairVector,thisIndexInOuterColumn, \
674  thisIndexInInnerBlockQ1,thisIndexInInnerBlockQ2,outerBitQ1,outerBitQ2)
675 # endif
676  {
677 # ifdef _OPENMP
678 # pragma omp for schedule (static)
679 # endif
680 //# endif
681  // thisTask iterates over half the elements in this process' chunk of the density matrix
682  // treat this as iterating over all columns, then iterating over half the values
683  // within one column.
684  // If this function has been called, this process' chunk contains half an
685  // outer block or less
686  for (thisTask=0; thisTask<numTasks; thisTask++) {
687  // we want to process all columns in the density matrix,
688  // updating the values for half of each column (one half of each inner block)
689  thisOuterColumn = thisTask / sizeOuterQuarterColumn;
690  // thisTask % sizeOuterQuarterColumn
691  thisIndexInOuterColumn = thisTask&(sizeOuterQuarterColumn-1);
692  thisInnerBlockQ2 = thisIndexInOuterColumn / sizeInnerQuarterBlockQ2;
693  // thisTask % sizeInnerQuarterBlockQ2;
694  thisIndexInInnerBlockQ2 = thisTask&(sizeInnerQuarterBlockQ2-1);
695  thisInnerBlockQ1InInnerBlockQ2 = thisIndexInInnerBlockQ2 / sizeInnerHalfBlockQ1;
696  // thisTask % sizeInnerHalfBlockQ1;
697  thisIndexInInnerBlockQ1 = thisTask&(sizeInnerHalfBlockQ1-1);
698 
699  // get index in state vector corresponding to upper inner block
700  thisIndex = thisOuterColumn*sizeOuterColumn + thisInnerBlockQ2*sizeInnerBlockQ2
701  + thisInnerBlockQ1InInnerBlockQ2*sizeInnerBlockQ1 + thisIndexInInnerBlockQ1;
702 
703  // check if we are in the upper or lower half of an outer block for Q1
704  outerBitQ1 = extractBit(targetQubit, (thisIndex+qureg.numAmpsPerChunk*qureg.chunkId)>>qureg.numQubitsRepresented);
705  // if we are in the lower half of an outer block, shift to be in the lower half
706  // of the inner block as well (we want to dephase |0><0| and |1><1| only)
707  thisIndex += outerBitQ1*(sizeInnerHalfBlockQ1);
708 
709  // For part 3 we need to match elements such that (my Q1 != pair Q1) AND (my Q2 != pair Q2)
710  // Find correct index in pairStateVector
711  thisIndexInPairVector = thisTask + (1-outerBitQ1)*sizeInnerHalfBlockQ1*sizeOuterQuarterColumn -
712  outerBitQ1*sizeInnerHalfBlockQ1*sizeOuterQuarterColumn;
713 
714  // check if we are in the upper or lower half of an outer block for Q2
715  outerBitQ2 = extractBit(qubit2, (thisIndex+qureg.numAmpsPerChunk*qureg.chunkId)>>qureg.numQubitsRepresented);
716  // if we are in the lower half of an outer block, shift to be in the lower half
717  // of the inner block as well (we want to dephase |0><0| and |1><1| only)
718  thisIndex += outerBitQ2*(sizeInnerQuarterBlockQ2<<1);
719 
720 
721  // NOTE: at this point thisIndex should be the index of the element we want to
722  // dephase in the chunk of the state vector on this process, in the
723  // density matrix representation.
724 
725 
726  // state[thisIndex] = (1-depolLevel)*state[thisIndex] + depolLevel*(state[thisIndex]
727  // + pair[thisIndexInPairVector])/2
728  qureg.stateVec.real[thisIndex] = gamma*(qureg.stateVec.real[thisIndex] +
729  delta*qureg.pairStateVec.real[thisIndexInPairVector]);
730 
731  qureg.stateVec.imag[thisIndex] = gamma*(qureg.stateVec.imag[thisIndex] +
732  delta*qureg.pairStateVec.imag[thisIndexInPairVector]);
733  }
734  }
735 
736 }
737 
738 
739 /* Without nested parallelisation, only the outer most loops which call below are parallelised */
740 void zeroSomeAmps(Qureg qureg, long long int startInd, long long int numAmps) {
741  long long int i;
742 # ifdef _OPENMP
743 # pragma omp parallel for schedule (static)
744 # endif
745  for (i=startInd; i < startInd+numAmps; i++) {
746  qureg.stateVec.real[i] = 0;
747  qureg.stateVec.imag[i] = 0;
748  }
749 }
750 void normaliseSomeAmps(Qureg qureg, qreal norm, long long int startInd, long long int numAmps) {
751  long long int i;
752 # ifdef _OPENMP
753 # pragma omp parallel for schedule (static)
754 # endif
755  for (i=startInd; i < startInd+numAmps; i++) {
756  qureg.stateVec.real[i] /= norm;
757  qureg.stateVec.imag[i] /= norm;
758  }
759 }
761  Qureg qureg, qreal norm, int normFirst,
762  long long int startAmpInd, long long int numAmps, long long int blockSize
763 ) {
764  long long int numDubBlocks = numAmps / (2*blockSize);
765  long long int blockStartInd;
766 
767  if (normFirst) {
768  long long int dubBlockInd;
769 # ifdef _OPENMP
770 # pragma omp parallel for schedule (static) private (blockStartInd)
771 # endif
772  for (dubBlockInd=0; dubBlockInd < numDubBlocks; dubBlockInd++) {
773  blockStartInd = startAmpInd + dubBlockInd*2*blockSize;
774  normaliseSomeAmps(qureg, norm, blockStartInd, blockSize); // |0><0|
775  zeroSomeAmps( qureg, blockStartInd + blockSize, blockSize);
776  }
777  } else {
778  long long int dubBlockInd;
779 # ifdef _OPENMP
780 # pragma omp parallel for schedule (static) private (blockStartInd)
781 # endif
782  for (dubBlockInd=0; dubBlockInd < numDubBlocks; dubBlockInd++) {
783  blockStartInd = startAmpInd + dubBlockInd*2*blockSize;
784  zeroSomeAmps( qureg, blockStartInd, blockSize);
785  normaliseSomeAmps(qureg, norm, blockStartInd + blockSize, blockSize); // |1><1|
786  }
787  }
788 }
789 
791 void densmatr_collapseToKnownProbOutcome(Qureg qureg, int measureQubit, int outcome, qreal totalStateProb) {
792 
793  // only (global) indices (as bit sequence): '* outcome *(n+q) outcome *q are spared
794  // where n = measureQubit, q = qureg.numQubitsRepresented.
795  // We can thus step in blocks of 2^q+n, killing every second, and inside the others,
796  // stepping in sub-blocks of 2^q, killing every second.
797  // When outcome=1, we offset the start of these blocks by their size.
798  long long int innerBlockSize = (1LL << measureQubit);
799  long long int outerBlockSize = (1LL << (measureQubit + qureg.numQubitsRepresented));
800 
801  // Because there are 2^a number of nodes(/chunks), each node will contain 2^b number of blocks,
802  // or each block will span 2^c number of nodes. Similarly for the innerblocks.
803  long long int locNumAmps = qureg.numAmpsPerChunk;
804  long long int globalStartInd = qureg.chunkId * locNumAmps;
805  int innerBit = extractBit(measureQubit, globalStartInd);
806  int outerBit = extractBit(measureQubit + qureg.numQubitsRepresented, globalStartInd);
807 
808  // If this chunk's amps are entirely inside an outer block
809  if (locNumAmps <= outerBlockSize) {
810 
811  // if this is an undesired outer block, kill all elems
812  if (outerBit != outcome) {
813  zeroSomeAmps(qureg, 0, qureg.numAmpsPerChunk);
814  return;
815  }
816 
817  // othwerwise, if this is a desired outer block, and also entirely an inner block
818  if (locNumAmps <= innerBlockSize) {
819 
820  // and that inner block is undesired, kill all elems
821  if (innerBit != outcome)
822  zeroSomeAmps(qureg, 0, qureg.numAmpsPerChunk);
823  // otherwise normalise all elems
824  else
825  normaliseSomeAmps(qureg, totalStateProb, 0, qureg.numAmpsPerChunk);
826 
827  return;
828  }
829 
830  // otherwise this is a desired outer block which contains 2^a inner blocks; kill/renorm every second inner block
832  qureg, totalStateProb, innerBit==outcome, 0, qureg.numAmpsPerChunk, innerBlockSize);
833  return;
834  }
835 
836  // Otherwise, this chunk's amps contain multiple outer blocks (and hence multiple inner blocks)
837  long long int numOuterDoubleBlocks = locNumAmps / (2*outerBlockSize);
838  long long int firstBlockInd;
839 
840  // alternate norming* and zeroing the outer blocks (with order based on the desired outcome)
841  // These loops aren't parallelised, since they could have 1 or 2 iterations and will prevent
842  // inner parallelisation
843  if (outerBit == outcome) {
844 
845  for (long long int outerDubBlockInd = 0; outerDubBlockInd < numOuterDoubleBlocks; outerDubBlockInd++) {
846  firstBlockInd = outerDubBlockInd*2*outerBlockSize;
847 
848  // *norm only the desired inner blocks in the desired outer block
850  qureg, totalStateProb, innerBit==outcome,
851  firstBlockInd, outerBlockSize, innerBlockSize);
852 
853  // zero the undesired outer block
854  zeroSomeAmps(qureg, firstBlockInd + outerBlockSize, outerBlockSize);
855  }
856 
857  } else {
858 
859  for (long long int outerDubBlockInd = 0; outerDubBlockInd < numOuterDoubleBlocks; outerDubBlockInd++) {
860  firstBlockInd = outerDubBlockInd*2*outerBlockSize;
861 
862  // same thing but undesired outer blocks come first
863  zeroSomeAmps(qureg, firstBlockInd, outerBlockSize);
865  qureg, totalStateProb, innerBit==outcome,
866  firstBlockInd + outerBlockSize, outerBlockSize, innerBlockSize);
867  }
868  }
869 
870 }
871 
873 
874  /* sum of qureg^2, which is sum_i |qureg[i]|^2 */
875  long long int index;
876  long long int numAmps = qureg.numAmpsPerChunk;
877 
878  qreal trace = 0;
879  qreal *vecRe = qureg.stateVec.real;
880  qreal *vecIm = qureg.stateVec.imag;
881 
882 # ifdef _OPENMP
883 # pragma omp parallel \
884  shared (vecRe, vecIm, numAmps) \
885  private (index) \
886  reduction ( +:trace )
887 # endif
888  {
889 # ifdef _OPENMP
890 # pragma omp for schedule (static)
891 # endif
892  for (index=0LL; index<numAmps; index++) {
893 
894  trace += vecRe[index]*vecRe[index] + vecIm[index]*vecIm[index];
895  }
896  }
897 
898  return trace;
899 }
900 
901 void densmatr_mixDensityMatrix(Qureg combineQureg, qreal otherProb, Qureg otherQureg) {
902 
903  /* corresponding amplitudes live on the same node (same dimensions) */
904 
905  // unpack vars for OpenMP
906  qreal* combineVecRe = combineQureg.stateVec.real;
907  qreal* combineVecIm = combineQureg.stateVec.imag;
908  qreal* otherVecRe = otherQureg.stateVec.real;
909  qreal* otherVecIm = otherQureg.stateVec.imag;
910  long long int numAmps = combineQureg.numAmpsPerChunk;
911  long long int index;
912 
913 # ifdef _OPENMP
914 # pragma omp parallel \
915  default (none) \
916  shared (combineVecRe,combineVecIm,otherVecRe,otherVecIm, otherProb, numAmps) \
917  private (index)
918 # endif
919  {
920 # ifdef _OPENMP
921 # pragma omp for schedule (static)
922 # endif
923  for (index=0; index < numAmps; index++) {
924  combineVecRe[index] *= 1-otherProb;
925  combineVecIm[index] *= 1-otherProb;
926 
927  combineVecRe[index] += otherProb * otherVecRe[index];
928  combineVecIm[index] += otherProb * otherVecIm[index];
929  }
930  }
931 }
932 
935 
936  long long int index;
937  long long int numAmps = a.numAmpsPerChunk;
938 
939  qreal *aRe = a.stateVec.real;
940  qreal *aIm = a.stateVec.imag;
941  qreal *bRe = b.stateVec.real;
942  qreal *bIm = b.stateVec.imag;
943 
944  qreal trace = 0;
945  qreal difRe, difIm;
946 
947 # ifdef _OPENMP
948 # pragma omp parallel \
949  shared (aRe,aIm, bRe,bIm, numAmps) \
950  private (index,difRe,difIm) \
951  reduction ( +:trace )
952 # endif
953  {
954 # ifdef _OPENMP
955 # pragma omp for schedule (static)
956 # endif
957  for (index=0LL; index<numAmps; index++) {
958 
959  difRe = aRe[index] - bRe[index];
960  difIm = aIm[index] - bIm[index];
961  trace += difRe*difRe + difIm*difIm;
962  }
963  }
964 
965  return trace;
966 }
967 
970 
971  long long int index;
972  long long int numAmps = a.numAmpsPerChunk;
973 
974  qreal *aRe = a.stateVec.real;
975  qreal *aIm = a.stateVec.imag;
976  qreal *bRe = b.stateVec.real;
977  qreal *bIm = b.stateVec.imag;
978 
979  qreal trace = 0;
980 
981 # ifdef _OPENMP
982 # pragma omp parallel \
983  shared (aRe,aIm, bRe,bIm, numAmps) \
984  private (index) \
985  reduction ( +:trace )
986 # endif
987  {
988 # ifdef _OPENMP
989 # pragma omp for schedule (static)
990 # endif
991  for (index=0LL; index<numAmps; index++) {
992  trace += aRe[index]*bRe[index] + aIm[index]*bIm[index];
993  }
994  }
995 
996  return trace;
997 }
998 
999 
1002 
1003  /* Here, elements of pureState are not accessed (instead grabbed from qureg.pair).
1004  * We only consult the attributes.
1005  *
1006  * qureg is a density matrix, and pureState is a statevector.
1007  * Every node contains as many columns of qureg as amps by pureState.
1008  * (each node contains an integer, exponent-of-2 number of whole columns of qureg)
1009  * Ergo, this node contains columns:
1010  * qureg.chunkID * pureState.numAmpsPerChunk to
1011  * (qureg.chunkID + 1) * pureState.numAmpsPerChunk
1012  *
1013  * The first pureState.numAmpsTotal elements of qureg.pairStateVec are the
1014  * entire pureState state-vector
1015  */
1016 
1017  // unpack everything for OPENMP
1018  qreal* vecRe = qureg.pairStateVec.real;
1019  qreal* vecIm = qureg.pairStateVec.imag;
1020  qreal* densRe = qureg.stateVec.real;
1021  qreal* densIm = qureg.stateVec.imag;
1022 
1023  int row, col;
1024  int dim = (int) pureState.numAmpsTotal;
1025  int colsPerNode = (int) pureState.numAmpsPerChunk;
1026  // using only int, because density matrix has squared as many amps so its
1027  // iteration would be impossible if the pureStates numAmpsTotal didn't fit into int
1028 
1029  // starting GLOBAL column index of the qureg columns on this node
1030  int startCol = (int) (qureg.chunkId * pureState.numAmpsPerChunk);
1031 
1032  qreal densElemRe, densElemIm;
1033  qreal prefacRe, prefacIm;
1034  qreal rowSumRe, rowSumIm;
1035  qreal vecElemRe, vecElemIm;
1036 
1037  // quantity computed by this node
1038  qreal globalSumRe = 0; // imag-component is assumed zero
1039 
1040 # ifdef _OPENMP
1041 # pragma omp parallel \
1042  shared (vecRe,vecIm,densRe,densIm, dim,colsPerNode,startCol) \
1043  private (row,col, prefacRe,prefacIm, rowSumRe,rowSumIm, densElemRe,densElemIm, vecElemRe,vecElemIm) \
1044  reduction ( +:globalSumRe )
1045 # endif
1046  {
1047 # ifdef _OPENMP
1048 # pragma omp for schedule (static)
1049 # endif
1050  // indices of my GLOBAL row
1051  for (row=0; row < dim; row++) {
1052 
1053  // single element of conj(pureState)
1054  prefacRe = vecRe[row];
1055  prefacIm = - vecIm[row];
1056 
1057  rowSumRe = 0;
1058  rowSumIm = 0;
1059 
1060  // indices of my LOCAL column
1061  for (col=0; col < colsPerNode; col++) {
1062 
1063  // my local density element
1064  densElemRe = densRe[row + dim*col];
1065  densElemIm = densIm[row + dim*col];
1066 
1067  // state-vector element
1068  vecElemRe = vecRe[startCol + col];
1069  vecElemIm = vecIm[startCol + col];
1070 
1071  rowSumRe += densElemRe*vecElemRe - densElemIm*vecElemIm;
1072  rowSumIm += densElemRe*vecElemIm + densElemIm*vecElemRe;
1073  }
1074 
1075  globalSumRe += rowSumRe*prefacRe - rowSumIm*prefacIm;
1076  }
1077  }
1078 
1079  return globalSumRe;
1080 }
1081 
1083 
1084  qreal innerProdReal = 0;
1085  qreal innerProdImag = 0;
1086 
1087  long long int index;
1088  long long int numAmps = bra.numAmpsPerChunk;
1089  qreal *braVecReal = bra.stateVec.real;
1090  qreal *braVecImag = bra.stateVec.imag;
1091  qreal *ketVecReal = ket.stateVec.real;
1092  qreal *ketVecImag = ket.stateVec.imag;
1093 
1094  qreal braRe, braIm, ketRe, ketIm;
1095 
1096 # ifdef _OPENMP
1097 # pragma omp parallel \
1098  shared (braVecReal, braVecImag, ketVecReal, ketVecImag, numAmps) \
1099  private (index, braRe, braIm, ketRe, ketIm) \
1100  reduction ( +:innerProdReal, innerProdImag )
1101 # endif
1102  {
1103 # ifdef _OPENMP
1104 # pragma omp for schedule (static)
1105 # endif
1106  for (index=0; index < numAmps; index++) {
1107  braRe = braVecReal[index];
1108  braIm = braVecImag[index];
1109  ketRe = ketVecReal[index];
1110  ketIm = ketVecImag[index];
1111 
1112  // conj(bra_i) * ket_i
1113  innerProdReal += braRe*ketRe + braIm*ketIm;
1114  innerProdImag += braRe*ketIm - braIm*ketRe;
1115  }
1116  }
1117 
1118  Complex innerProd;
1119  innerProd.real = innerProdReal;
1120  innerProd.imag = innerProdImag;
1121  return innerProd;
1122 }
1123 
1124 
1125 
1126 void densmatr_initClassicalState (Qureg qureg, long long int stateInd)
1127 {
1128  // dimension of the state vector
1129  long long int densityNumElems = qureg.numAmpsPerChunk;
1130 
1131  // Can't use qureg->stateVec as a private OMP var
1132  qreal *densityReal = qureg.stateVec.real;
1133  qreal *densityImag = qureg.stateVec.imag;
1134 
1135  // initialise the state to all zeros
1136  long long int index;
1137 # ifdef _OPENMP
1138 # pragma omp parallel \
1139  default (none) \
1140  shared (densityNumElems, densityReal, densityImag) \
1141  private (index)
1142 # endif
1143  {
1144 # ifdef _OPENMP
1145 # pragma omp for schedule (static)
1146 # endif
1147  for (index=0; index<densityNumElems; index++) {
1148  densityReal[index] = 0.0;
1149  densityImag[index] = 0.0;
1150  }
1151  }
1152 
1153  // index of the single density matrix elem to set non-zero
1154  long long int densityDim = 1LL << qureg.numQubitsRepresented;
1155  long long int densityInd = (densityDim + 1)*stateInd;
1156 
1157  // give the specified classical state prob 1
1158  if (qureg.chunkId == densityInd / densityNumElems){
1159  densityReal[densityInd % densityNumElems] = 1.0;
1160  densityImag[densityInd % densityNumElems] = 0.0;
1161  }
1162 }
1163 
1164 
1166 {
1167  // |+><+| = sum_i 1/sqrt(2^N) |i> 1/sqrt(2^N) <j| = sum_ij 1/2^N |i><j|
1168  long long int dim = (1LL << qureg.numQubitsRepresented);
1169  qreal probFactor = 1.0/((qreal) dim);
1170 
1171  // Can't use qureg->stateVec as a private OMP var
1172  qreal *densityReal = qureg.stateVec.real;
1173  qreal *densityImag = qureg.stateVec.imag;
1174 
1175  long long int index;
1176  long long int chunkSize = qureg.numAmpsPerChunk;
1177  // initialise the state to |+++..+++> = 1/normFactor {1, 1, 1, ...}
1178 # ifdef _OPENMP
1179 # pragma omp parallel \
1180  default (none) \
1181  shared (chunkSize, densityReal, densityImag, probFactor) \
1182  private (index)
1183 # endif
1184  {
1185 # ifdef _OPENMP
1186 # pragma omp for schedule (static)
1187 # endif
1188  for (index=0; index<chunkSize; index++) {
1189  densityReal[index] = probFactor;
1190  densityImag[index] = 0.0;
1191  }
1192  }
1193 }
1194 
1195 void densmatr_initPureStateLocal(Qureg targetQureg, Qureg copyQureg) {
1196 
1197  /* copyQureg amps aren't explicitly used - they're accessed through targetQureg.pair,
1198  * which contains the full pure statevector.
1199  * targetQureg has as many columns on node as copyQureg has amps
1200  */
1201 
1202  long long int colOffset = targetQureg.chunkId * copyQureg.numAmpsPerChunk;
1203  long long int colsPerNode = copyQureg.numAmpsPerChunk;
1204  long long int rowsPerNode = copyQureg.numAmpsTotal;
1205 
1206  // unpack vars for OpenMP
1207  qreal* vecRe = targetQureg.pairStateVec.real;
1208  qreal* vecIm = targetQureg.pairStateVec.imag;
1209  qreal* densRe = targetQureg.stateVec.real;
1210  qreal* densIm = targetQureg.stateVec.imag;
1211 
1212  long long int col, row, index;
1213 
1214  // a_i conj(a_j) |i><j|
1215  qreal ketRe, ketIm, braRe, braIm;
1216 
1217 # ifdef _OPENMP
1218 # pragma omp parallel \
1219  default (none) \
1220  shared (colOffset, colsPerNode,rowsPerNode, vecRe,vecIm,densRe,densIm) \
1221  private (col,row, ketRe,ketIm,braRe,braIm, index)
1222 # endif
1223  {
1224 # ifdef _OPENMP
1225 # pragma omp for schedule (static)
1226 # endif
1227  // local column
1228  for (col=0; col < colsPerNode; col++) {
1229 
1230  // global row
1231  for (row=0; row < rowsPerNode; row++) {
1232 
1233  // get pure state amps
1234  ketRe = vecRe[row];
1235  ketIm = vecIm[row];
1236  braRe = vecRe[col + colOffset];
1237  braIm = - vecIm[col + colOffset]; // minus for conjugation
1238 
1239  // update density matrix
1240  index = row + col*rowsPerNode; // local ind
1241  densRe[index] = ketRe*braRe - ketIm*braIm;
1242  densIm[index] = ketRe*braIm + ketIm*braRe;
1243  }
1244  }
1245  }
1246 }
1247 
1248 void statevec_setAmps(Qureg qureg, long long int startInd, qreal* reals, qreal* imags, long long int numAmps) {
1249 
1250  /* this is actually distributed, since the user's code runs on every node */
1251 
1252  // local start/end indices of the given amplitudes, assuming they fit in this chunk
1253  // these may be negative or above qureg.numAmpsPerChunk
1254  long long int localStartInd = startInd - qureg.chunkId*qureg.numAmpsPerChunk;
1255  long long int localEndInd = localStartInd + numAmps; // exclusive
1256 
1257  // add this to a local index to get corresponding elem in reals & imags
1258  long long int offset = qureg.chunkId*qureg.numAmpsPerChunk - startInd;
1259 
1260  // restrict these indices to fit into this chunk
1261  if (localStartInd < 0)
1262  localStartInd = 0;
1263  if (localEndInd > qureg.numAmpsPerChunk)
1264  localEndInd = qureg.numAmpsPerChunk;
1265  // they may now be out of order = no iterations
1266 
1267  // unpacking OpenMP vars
1268  long long int index;
1269  qreal* vecRe = qureg.stateVec.real;
1270  qreal* vecIm = qureg.stateVec.imag;
1271 
1272 # ifdef _OPENMP
1273 # pragma omp parallel \
1274  default (none) \
1275  shared (localStartInd,localEndInd, vecRe,vecIm, reals,imags, offset) \
1276  private (index)
1277 # endif
1278  {
1279 # ifdef _OPENMP
1280 # pragma omp for schedule (static)
1281 # endif
1282  // iterate these local inds - this might involve no iterations
1283  for (index=localStartInd; index < localEndInd; index++) {
1284  vecRe[index] = reals[index + offset];
1285  vecIm[index] = imags[index + offset];
1286  }
1287  }
1288 }
1289 
1290 void statevec_createQureg(Qureg *qureg, int numQubits, QuESTEnv env)
1291 {
1292  long long int numAmps = 1LL << numQubits;
1293  long long int numAmpsPerRank = numAmps/env.numRanks;
1294 
1295  if (numAmpsPerRank > SIZE_MAX) {
1296  printf("Could not allocate memory (cannot fit numAmps into size_t)!");
1297  exit (EXIT_FAILURE);
1298  }
1299 
1300  size_t arrSize = (size_t) (numAmpsPerRank * sizeof(*(qureg->stateVec.real)));
1301  qureg->stateVec.real = malloc(arrSize);
1302  qureg->stateVec.imag = malloc(arrSize);
1303  if (env.numRanks>1){
1304  qureg->pairStateVec.real = malloc(arrSize);
1305  qureg->pairStateVec.imag = malloc(arrSize);
1306  }
1307 
1308  if ( (!(qureg->stateVec.real) || !(qureg->stateVec.imag))
1309  && numAmpsPerRank ) {
1310  printf("Could not allocate memory!");
1311  exit (EXIT_FAILURE);
1312  }
1313 
1314  if ( env.numRanks>1 && (!(qureg->pairStateVec.real) || !(qureg->pairStateVec.imag))
1315  && numAmpsPerRank ) {
1316  printf("Could not allocate memory!");
1317  exit (EXIT_FAILURE);
1318  }
1319 
1320  qureg->numQubitsInStateVec = numQubits;
1321  qureg->numAmpsTotal = numAmps;
1322  qureg->numAmpsPerChunk = numAmpsPerRank;
1323  qureg->chunkId = env.rank;
1324  qureg->numChunks = env.numRanks;
1325  qureg->isDensityMatrix = 0;
1326 }
1327 
1329 
1330  qureg.numQubitsInStateVec = 0;
1331  qureg.numAmpsTotal = 0;
1332  qureg.numAmpsPerChunk = 0;
1333 
1334  free(qureg.stateVec.real);
1335  free(qureg.stateVec.imag);
1336  if (env.numRanks>1){
1337  free(qureg.pairStateVec.real);
1338  free(qureg.pairStateVec.imag);
1339  }
1340  qureg.stateVec.real = NULL;
1341  qureg.stateVec.imag = NULL;
1342  qureg.pairStateVec.real = NULL;
1343  qureg.pairStateVec.imag = NULL;
1344 }
1345 
1347 
1348  // the 2^numQubits values will be evenly split between the env.numRanks nodes
1349  DiagonalOp op;
1350  op.numQubits = numQubits;
1351  op.numElemsPerChunk = (1LL << numQubits) / env.numRanks;
1352  op.chunkId = env.rank;
1353  op.numChunks = env.numRanks;
1354 
1355  // allocate CPU memory (initialised to zero)
1356  op.real = (qreal*) calloc(op.numElemsPerChunk, sizeof(qreal));
1357  op.imag = (qreal*) calloc(op.numElemsPerChunk, sizeof(qreal));
1358 
1359  // check cpu memory allocation was successful
1360  if ( !op.real || !op.imag ) {
1361  printf("Could not allocate memory!\n");
1362  exit(EXIT_FAILURE);
1363  }
1364 
1365  return op;
1366 }
1367 
1369  free(op.real);
1370  free(op.imag);
1371 }
1372 
1374  // nothing to do on CPU
1375 }
1376 
1378 
1379  /* each node modifies its op sub-partition, evaluating the full hamil
1380  * for every element in the sub-partition
1381  */
1382 
1383  // unpack op
1384  long long int offset = op.chunkId * op.numElemsPerChunk;
1385  long long int numElems = op.numElemsPerChunk;
1386  qreal* opRe = op.real;
1387  qreal* opIm = op.imag;
1388 
1389  // unpack hamil
1390  int numTerms = hamil.numSumTerms;
1391  int numQubits = hamil.numQubits;
1392  qreal* coeffs = hamil.termCoeffs;
1393  enum pauliOpType* codes = hamil.pauliCodes;
1394 
1395  // private OpenMP vars
1396  long long int i, globalInd;
1397  qreal elem;
1398  int t, q, isOddNumOnes, sign;
1399 
1400 # ifdef _OPENMP
1401 # pragma omp parallel \
1402  default (none) \
1403  shared (offset,numElems, opRe,opIm, numTerms,numQubits,coeffs,codes) \
1404  private (i,globalInd, elem, isOddNumOnes,t,q,sign)
1405 # endif
1406  {
1407 # ifdef _OPENMP
1408 # pragma omp for schedule (static)
1409 # endif
1410  for (i=0; i<numElems; i++) {
1411 
1412  globalInd = i + offset;
1413  elem = 0;
1414 
1415  // add every Hamiltonian coefficient to this element, either + or -
1416  for (t=0; t<numTerms; t++) {
1417 
1418  // determine parity of ones (in globalInd basis state) of the current term's targets
1419  isOddNumOnes = 0;
1420  for (q=0; q<numQubits; q++)
1421  if (codes[q + t*numQubits] == PAULI_Z)
1422  if (extractBit(q, globalInd))
1423  isOddNumOnes = !isOddNumOnes;
1424 
1425  // add +- term coeff (avoiding thread divergence)
1426  sign = 1 - 2*isOddNumOnes; // (-1 if isOddNumOnes, else +1)
1427  elem += coeffs[t] * sign;
1428  }
1429 
1430  opRe[i] = elem;
1431  opIm[i] = 0;
1432  }
1433  }
1434 
1435  // we don't synch to GPU, because in GPU mode, the GPU populates and synchs to RAM
1436  // agnostic_syncDiagonalOp(op);
1437 }
1438 
1439 void statevec_reportStateToScreen(Qureg qureg, QuESTEnv env, int reportRank){
1440  long long int index;
1441  int rank;
1442  if (qureg.numQubitsInStateVec<=5){
1443  for (rank=0; rank<qureg.numChunks; rank++){
1444  if (qureg.chunkId==rank){
1445  if (reportRank) {
1446  printf("Reporting state from rank %d [\n", qureg.chunkId);
1447  printf("real, imag\n");
1448  } else if (rank==0) {
1449  printf("Reporting state [\n");
1450  printf("real, imag\n");
1451  }
1452 
1453  for(index=0; index<qureg.numAmpsPerChunk; index++){
1454  //printf(REAL_STRING_FORMAT ", " REAL_STRING_FORMAT "\n", qureg.pairStateVec.real[index], qureg.pairStateVec.imag[index]);
1455  printf(REAL_STRING_FORMAT ", " REAL_STRING_FORMAT "\n", qureg.stateVec.real[index], qureg.stateVec.imag[index]);
1456  }
1457  if (reportRank || rank==qureg.numChunks-1) printf("]\n");
1458  }
1459  syncQuESTEnv(env);
1460  }
1461  } else printf("Error: reportStateToScreen will not print output for systems of more than 5 qubits.\n");
1462 }
1463 
1465 {
1466  long long int stateVecSize;
1467  long long int index;
1468 
1469  // dimension of the state vector
1470  stateVecSize = qureg.numAmpsPerChunk;
1471 
1472  // Can't use qureg->stateVec as a private OMP var
1473  qreal *stateVecReal = qureg.stateVec.real;
1474  qreal *stateVecImag = qureg.stateVec.imag;
1475 
1476  // initialise the state-vector to all-zeroes
1477 # ifdef _OPENMP
1478 # pragma omp parallel \
1479  default (none) \
1480  shared (stateVecSize, stateVecReal, stateVecImag) \
1481  private (index)
1482 # endif
1483  {
1484 # ifdef _OPENMP
1485 # pragma omp for schedule (static)
1486 # endif
1487  for (index=0; index<stateVecSize; index++) {
1488  stateVecReal[index] = 0.0;
1489  stateVecImag[index] = 0.0;
1490  }
1491  }
1492 }
1493 
1495 {
1496  statevec_initBlankState(qureg);
1497  if (qureg.chunkId==0){
1498  // zero state |0000..0000> has probability 1
1499  qureg.stateVec.real[0] = 1.0;
1500  qureg.stateVec.imag[0] = 0.0;
1501  }
1502 }
1503 
1505 {
1506  long long int chunkSize, stateVecSize;
1507  long long int index;
1508 
1509  // dimension of the state vector
1510  chunkSize = qureg.numAmpsPerChunk;
1511  stateVecSize = chunkSize*qureg.numChunks;
1512  qreal normFactor = 1.0/sqrt((qreal)stateVecSize);
1513 
1514  // Can't use qureg->stateVec as a private OMP var
1515  qreal *stateVecReal = qureg.stateVec.real;
1516  qreal *stateVecImag = qureg.stateVec.imag;
1517 
1518  // initialise the state to |+++..+++> = 1/normFactor {1, 1, 1, ...}
1519 # ifdef _OPENMP
1520 # pragma omp parallel \
1521  default (none) \
1522  shared (chunkSize, stateVecReal, stateVecImag, normFactor) \
1523  private (index)
1524 # endif
1525  {
1526 # ifdef _OPENMP
1527 # pragma omp for schedule (static)
1528 # endif
1529  for (index=0; index<chunkSize; index++) {
1530  stateVecReal[index] = normFactor;
1531  stateVecImag[index] = 0.0;
1532  }
1533  }
1534 }
1535 
1536 void statevec_initClassicalState (Qureg qureg, long long int stateInd)
1537 {
1538  long long int stateVecSize;
1539  long long int index;
1540 
1541  // dimension of the state vector
1542  stateVecSize = qureg.numAmpsPerChunk;
1543 
1544  // Can't use qureg->stateVec as a private OMP var
1545  qreal *stateVecReal = qureg.stateVec.real;
1546  qreal *stateVecImag = qureg.stateVec.imag;
1547 
1548  // initialise the state to vector to all zeros
1549 # ifdef _OPENMP
1550 # pragma omp parallel \
1551  default (none) \
1552  shared (stateVecSize, stateVecReal, stateVecImag) \
1553  private (index)
1554 # endif
1555  {
1556 # ifdef _OPENMP
1557 # pragma omp for schedule (static)
1558 # endif
1559  for (index=0; index<stateVecSize; index++) {
1560  stateVecReal[index] = 0.0;
1561  stateVecImag[index] = 0.0;
1562  }
1563  }
1564 
1565  // give the specified classical state prob 1
1566  if (qureg.chunkId == stateInd/stateVecSize){
1567  stateVecReal[stateInd % stateVecSize] = 1.0;
1568  stateVecImag[stateInd % stateVecSize] = 0.0;
1569  }
1570 }
1571 
1572 void statevec_cloneQureg(Qureg targetQureg, Qureg copyQureg) {
1573 
1574  // registers are equal sized, so nodes hold the same state-vector partitions
1575  long long int stateVecSize;
1576  long long int index;
1577 
1578  // dimension of the state vector
1579  stateVecSize = targetQureg.numAmpsPerChunk;
1580 
1581  // Can't use qureg->stateVec as a private OMP var
1582  qreal *targetStateVecReal = targetQureg.stateVec.real;
1583  qreal *targetStateVecImag = targetQureg.stateVec.imag;
1584  qreal *copyStateVecReal = copyQureg.stateVec.real;
1585  qreal *copyStateVecImag = copyQureg.stateVec.imag;
1586 
1587  // initialise the state to |0000..0000>
1588 # ifdef _OPENMP
1589 # pragma omp parallel \
1590  default (none) \
1591  shared (stateVecSize, targetStateVecReal, targetStateVecImag, copyStateVecReal, copyStateVecImag) \
1592  private (index)
1593 # endif
1594  {
1595 # ifdef _OPENMP
1596 # pragma omp for schedule (static)
1597 # endif
1598  for (index=0; index<stateVecSize; index++) {
1599  targetStateVecReal[index] = copyStateVecReal[index];
1600  targetStateVecImag[index] = copyStateVecImag[index];
1601  }
1602  }
1603 }
1604 
1611 void statevec_initStateOfSingleQubit(Qureg *qureg, int qubitId, int outcome)
1612 {
1613  long long int chunkSize, stateVecSize;
1614  long long int index;
1615  int bit;
1616  long long int chunkId=qureg->chunkId;
1617 
1618  // dimension of the state vector
1619  chunkSize = qureg->numAmpsPerChunk;
1620  stateVecSize = chunkSize*qureg->numChunks;
1621  qreal normFactor = 1.0/sqrt((qreal)stateVecSize/2.0);
1622 
1623  // Can't use qureg->stateVec as a private OMP var
1624  qreal *stateVecReal = qureg->stateVec.real;
1625  qreal *stateVecImag = qureg->stateVec.imag;
1626 
1627  // initialise the state to |0000..0000>
1628 # ifdef _OPENMP
1629 # pragma omp parallel \
1630  default (none) \
1631  shared (chunkSize, stateVecReal, stateVecImag, normFactor, qubitId, outcome, chunkId) \
1632  private (index, bit)
1633 # endif
1634  {
1635 # ifdef _OPENMP
1636 # pragma omp for schedule (static)
1637 # endif
1638  for (index=0; index<chunkSize; index++) {
1639  bit = extractBit(qubitId, index+chunkId*chunkSize);
1640  if (bit==outcome) {
1641  stateVecReal[index] = normFactor;
1642  stateVecImag[index] = 0.0;
1643  } else {
1644  stateVecReal[index] = 0.0;
1645  stateVecImag[index] = 0.0;
1646  }
1647  }
1648  }
1649 }
1650 
1651 
1658 {
1659  long long int chunkSize;
1660  long long int index;
1661  long long int indexOffset;
1662 
1663  // dimension of the state vector
1664  chunkSize = qureg.numAmpsPerChunk;
1665 
1666  // Can't use qureg->stateVec as a private OMP var
1667  qreal *stateVecReal = qureg.stateVec.real;
1668  qreal *stateVecImag = qureg.stateVec.imag;
1669 
1670  indexOffset = chunkSize * qureg.chunkId;
1671 
1672  // initialise the state to |0000..0000>
1673 # ifdef _OPENMP
1674 # pragma omp parallel \
1675  default (none) \
1676  shared (chunkSize, stateVecReal, stateVecImag, indexOffset) \
1677  private (index)
1678 # endif
1679  {
1680 # ifdef _OPENMP
1681 # pragma omp for schedule (static)
1682 # endif
1683  for (index=0; index<chunkSize; index++) {
1684  stateVecReal[index] = ((indexOffset + index)*2.0)/10.0;
1685  stateVecImag[index] = ((indexOffset + index)*2.0+1.0)/10.0;
1686  }
1687  }
1688 }
1689 
1690 // returns 1 if successful, else 0
1691 int statevec_initStateFromSingleFile(Qureg *qureg, char filename[200], QuESTEnv env){
1692  long long int chunkSize, stateVecSize;
1693  long long int indexInChunk, totalIndex;
1694 
1695  chunkSize = qureg->numAmpsPerChunk;
1696  stateVecSize = chunkSize*qureg->numChunks;
1697 
1698  qreal *stateVecReal = qureg->stateVec.real;
1699  qreal *stateVecImag = qureg->stateVec.imag;
1700 
1701  FILE *fp;
1702  char line[200];
1703 
1704  for (int rank=0; rank<(qureg->numChunks); rank++){
1705  if (rank==qureg->chunkId){
1706  fp = fopen(filename, "r");
1707 
1708  // indicate file open failure
1709  if (fp == NULL)
1710  return 0;
1711 
1712  indexInChunk = 0; totalIndex = 0;
1713  while (fgets(line, sizeof(char)*200, fp) != NULL && totalIndex<stateVecSize){
1714  if (line[0]!='#'){
1715  int chunkId = (int) (totalIndex/chunkSize);
1716  if (chunkId==qureg->chunkId){
1717  # if QuEST_PREC==1
1718  sscanf(line, "%f, %f", &(stateVecReal[indexInChunk]),
1719  &(stateVecImag[indexInChunk]));
1720  # elif QuEST_PREC==2
1721  sscanf(line, "%lf, %lf", &(stateVecReal[indexInChunk]),
1722  &(stateVecImag[indexInChunk]));
1723  # elif QuEST_PREC==4
1724  sscanf(line, "%Lf, %Lf", &(stateVecReal[indexInChunk]),
1725  &(stateVecImag[indexInChunk]));
1726  # endif
1727  indexInChunk += 1;
1728  }
1729  totalIndex += 1;
1730  }
1731  }
1732  fclose(fp);
1733  }
1734  syncQuESTEnv(env);
1735  }
1736 
1737  // indicate success
1738  return 1;
1739 }
1740 
1741 int statevec_compareStates(Qureg mq1, Qureg mq2, qreal precision){
1742  qreal diff;
1743  long long int chunkSize = mq1.numAmpsPerChunk;
1744 
1745  for (long long int i=0; i<chunkSize; i++){
1746  diff = absReal(mq1.stateVec.real[i] - mq2.stateVec.real[i]);
1747  if (diff>precision) return 0;
1748  diff = absReal(mq1.stateVec.imag[i] - mq2.stateVec.imag[i]);
1749  if (diff>precision) return 0;
1750  }
1751  return 1;
1752 }
1753 
1754 void statevec_compactUnitaryLocal (Qureg qureg, int targetQubit, Complex alpha, Complex beta)
1755 {
1756  long long int sizeBlock, sizeHalfBlock;
1757  long long int thisBlock, // current block
1758  indexUp,indexLo; // current index and corresponding index in lower half block
1759 
1760  qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
1761  long long int thisTask;
1762  long long int numTasks=qureg.numAmpsPerChunk>>1;
1763 
1764  // set dimensions
1765  sizeHalfBlock = 1LL << targetQubit;
1766  sizeBlock = 2LL * sizeHalfBlock;
1767 
1768  // Can't use qureg.stateVec as a private OMP var
1769  qreal *stateVecReal = qureg.stateVec.real;
1770  qreal *stateVecImag = qureg.stateVec.imag;
1771  qreal alphaImag=alpha.imag, alphaReal=alpha.real;
1772  qreal betaImag=beta.imag, betaReal=beta.real;
1773 
1774 # ifdef _OPENMP
1775 # pragma omp parallel \
1776  default (none) \
1777  shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, alphaReal,alphaImag, betaReal,betaImag, numTasks) \
1778  private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp,stateRealLo,stateImagLo)
1779 # endif
1780  {
1781 # ifdef _OPENMP
1782 # pragma omp for schedule (static)
1783 # endif
1784  for (thisTask=0; thisTask<numTasks; thisTask++) {
1785 
1786  thisBlock = thisTask / sizeHalfBlock;
1787  indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
1788  indexLo = indexUp + sizeHalfBlock;
1789 
1790  // store current state vector values in temp variables
1791  stateRealUp = stateVecReal[indexUp];
1792  stateImagUp = stateVecImag[indexUp];
1793 
1794  stateRealLo = stateVecReal[indexLo];
1795  stateImagLo = stateVecImag[indexLo];
1796 
1797  // state[indexUp] = alpha * state[indexUp] - conj(beta) * state[indexLo]
1798  stateVecReal[indexUp] = alphaReal*stateRealUp - alphaImag*stateImagUp
1799  - betaReal*stateRealLo - betaImag*stateImagLo;
1800  stateVecImag[indexUp] = alphaReal*stateImagUp + alphaImag*stateRealUp
1801  - betaReal*stateImagLo + betaImag*stateRealLo;
1802 
1803  // state[indexLo] = beta * state[indexUp] + conj(alpha) * state[indexLo]
1804  stateVecReal[indexLo] = betaReal*stateRealUp - betaImag*stateImagUp
1805  + alphaReal*stateRealLo + alphaImag*stateImagLo;
1806  stateVecImag[indexLo] = betaReal*stateImagUp + betaImag*stateRealUp
1807  + alphaReal*stateImagLo - alphaImag*stateRealLo;
1808  }
1809  }
1810 
1811 }
1812 
1813 void statevec_multiControlledTwoQubitUnitaryLocal(Qureg qureg, long long int ctrlMask, int q1, int q2, ComplexMatrix4 u) {
1814 
1815  // can't use qureg.stateVec as a private OMP var
1816  qreal *reVec = qureg.stateVec.real;
1817  qreal *imVec = qureg.stateVec.imag;
1818 
1819  // the global (between all nodes) index of this node's start index
1820  long long int globalIndStart = qureg.chunkId*qureg.numAmpsPerChunk;
1821 
1822  long long int numTasks = qureg.numAmpsPerChunk >> 2; // each iteration updates 4 amplitudes
1823  long long int thisTask;
1824  long long int thisGlobalInd00;
1825  long long int ind00, ind01, ind10, ind11;
1826  qreal re00, re01, re10, re11;
1827  qreal im00, im01, im10, im11;
1828 
1829 # ifdef _OPENMP
1830 # pragma omp parallel \
1831  default (none) \
1832  shared (reVec,imVec,globalIndStart,numTasks,ctrlMask,u,q2,q1) \
1833  private (thisTask, thisGlobalInd00, ind00,ind01,ind10,ind11, re00,re01,re10,re11, im00,im01,im10,im11)
1834 # endif
1835  {
1836 # ifdef _OPENMP
1837 # pragma omp for schedule (static)
1838 # endif
1839  for (thisTask=0; thisTask<numTasks; thisTask++) {
1840 
1841  // determine ind00 of |..0..0..>
1842  ind00 = insertTwoZeroBits(thisTask, q1, q2);
1843 
1844  // skip amplitude if controls aren't in 1 state (overloaded for speed)
1845  thisGlobalInd00 = ind00 + globalIndStart;
1846  if (ctrlMask && ((ctrlMask & thisGlobalInd00) != ctrlMask))
1847  continue;
1848 
1849  // inds of |..0..1..>, |..1..0..> and |..1..1..>
1850  ind01 = flipBit(ind00, q1);
1851  ind10 = flipBit(ind00, q2);
1852  ind11 = flipBit(ind01, q2);
1853 
1854  // extract statevec amplitudes
1855  re00 = reVec[ind00]; im00 = imVec[ind00];
1856  re01 = reVec[ind01]; im01 = imVec[ind01];
1857  re10 = reVec[ind10]; im10 = imVec[ind10];
1858  re11 = reVec[ind11]; im11 = imVec[ind11];
1859 
1860  // apply u * {amp00, amp01, amp10, amp11}
1861  reVec[ind00] =
1862  u.real[0][0]*re00 - u.imag[0][0]*im00 +
1863  u.real[0][1]*re01 - u.imag[0][1]*im01 +
1864  u.real[0][2]*re10 - u.imag[0][2]*im10 +
1865  u.real[0][3]*re11 - u.imag[0][3]*im11;
1866  imVec[ind00] =
1867  u.imag[0][0]*re00 + u.real[0][0]*im00 +
1868  u.imag[0][1]*re01 + u.real[0][1]*im01 +
1869  u.imag[0][2]*re10 + u.real[0][2]*im10 +
1870  u.imag[0][3]*re11 + u.real[0][3]*im11;
1871 
1872  reVec[ind01] =
1873  u.real[1][0]*re00 - u.imag[1][0]*im00 +
1874  u.real[1][1]*re01 - u.imag[1][1]*im01 +
1875  u.real[1][2]*re10 - u.imag[1][2]*im10 +
1876  u.real[1][3]*re11 - u.imag[1][3]*im11;
1877  imVec[ind01] =
1878  u.imag[1][0]*re00 + u.real[1][0]*im00 +
1879  u.imag[1][1]*re01 + u.real[1][1]*im01 +
1880  u.imag[1][2]*re10 + u.real[1][2]*im10 +
1881  u.imag[1][3]*re11 + u.real[1][3]*im11;
1882 
1883  reVec[ind10] =
1884  u.real[2][0]*re00 - u.imag[2][0]*im00 +
1885  u.real[2][1]*re01 - u.imag[2][1]*im01 +
1886  u.real[2][2]*re10 - u.imag[2][2]*im10 +
1887  u.real[2][3]*re11 - u.imag[2][3]*im11;
1888  imVec[ind10] =
1889  u.imag[2][0]*re00 + u.real[2][0]*im00 +
1890  u.imag[2][1]*re01 + u.real[2][1]*im01 +
1891  u.imag[2][2]*re10 + u.real[2][2]*im10 +
1892  u.imag[2][3]*re11 + u.real[2][3]*im11;
1893 
1894  reVec[ind11] =
1895  u.real[3][0]*re00 - u.imag[3][0]*im00 +
1896  u.real[3][1]*re01 - u.imag[3][1]*im01 +
1897  u.real[3][2]*re10 - u.imag[3][2]*im10 +
1898  u.real[3][3]*re11 - u.imag[3][3]*im11;
1899  imVec[ind11] =
1900  u.imag[3][0]*re00 + u.real[3][0]*im00 +
1901  u.imag[3][1]*re01 + u.real[3][1]*im01 +
1902  u.imag[3][2]*re10 + u.real[3][2]*im10 +
1903  u.imag[3][3]*re11 + u.real[3][3]*im11;
1904  }
1905  }
1906 }
1907 
1908 int qsortComp(const void *a, const void *b) {
1909  return *(int*)a - *(int*)b;
1910 }
1911 
1912 void statevec_multiControlledMultiQubitUnitaryLocal(Qureg qureg, long long int ctrlMask, int* targs, int numTargs, ComplexMatrixN u)
1913 {
1914  // can't use qureg.stateVec as a private OMP var
1915  qreal *reVec = qureg.stateVec.real;
1916  qreal *imVec = qureg.stateVec.imag;
1917 
1918  long long int numTasks = qureg.numAmpsPerChunk >> numTargs; // kernel called on every 1 in 2^numTargs amplitudes
1919  long long int numTargAmps = 1 << u.numQubits; // num amps to be modified by each task
1920 
1921  // the global (between all nodes) index of this node's start index
1922  long long int globalIndStart = qureg.chunkId*qureg.numAmpsPerChunk;
1923 
1924  long long int thisTask;
1925  long long int thisInd00; // this thread's index of |..0..0..> (target qubits = 0)
1926  long long int thisGlobalInd00; // the global (between all nodes) index of this thread's |..0..0..> state
1927  long long int ind; // each thread's iteration of amplitudes to modify
1928  int i, t, r, c; // each thread's iteration of amps and targets
1929  qreal reElem, imElem; // each thread's iteration of u elements
1930 
1931  // each thread/task will record and modify numTargAmps amplitudes, privately
1932  // (of course, tasks eliminated by the ctrlMask won't edit their allocation)
1933  //
1934  // If we're NOT on windows, we can fortunately use the stack directly
1935  #ifndef _WIN32
1936  long long int ampInds[numTargAmps];
1937  qreal reAmps[numTargAmps];
1938  qreal imAmps[numTargAmps];
1939 
1940  int sortedTargs[numTargs];
1941  // on Windows, with no VLA, we can use _malloca to allocate on stack (must free)
1942  #else
1943  long long int* ampInds;
1944  qreal* reAmps;
1945  qreal* imAmps;
1946  int* sortedTargs = (int*) _malloca(numTargs * sizeof *sortedTargs);
1947  #endif
1948 
1949  // we need a sorted targets list to find thisInd00 for each task.
1950  // we can't modify targets, because the user-ordering of targets matters in u
1951  for (int t=0; t < numTargs; t++)
1952  sortedTargs[t] = targs[t];
1953  qsort(sortedTargs, numTargs, sizeof(int), qsortComp);
1954 
1955 # ifdef _OPENMP
1956 # pragma omp parallel \
1957  default (none) \
1958  shared (reVec,imVec, numTasks,numTargAmps,globalIndStart, ctrlMask,targs,sortedTargs,u,numTargs) \
1959  private (thisTask,thisInd00,thisGlobalInd00,ind,i,t,r,c,reElem,imElem, ampInds,reAmps,imAmps)
1960 # endif
1961  {
1962  // when manually allocating array memory (on Windows), this must be done in each thread
1963  // separately and is not performed automatically by declaring a var as omp-private
1964  # ifdef _WIN32
1965  ampInds = (long long int*) _malloca(numTargAmps * sizeof *ampInds);
1966  reAmps = (qreal*) _malloca(numTargAmps * sizeof *reAmps);
1967  imAmps = (qreal*) _malloca(numTargAmps * sizeof *imAmps);
1968  # endif
1969 # ifdef _OPENMP
1970 # pragma omp for schedule (static)
1971 # endif
1972  for (thisTask=0; thisTask<numTasks; thisTask++) {
1973 
1974  // find this task's start index (where all targs are 0)
1975  thisInd00 = thisTask;
1976  for (t=0; t < numTargs; t++)
1977  thisInd00 = insertZeroBit(thisInd00, sortedTargs[t]);
1978 
1979  // this task only modifies amplitudes if control qubits are 1 for this state
1980  thisGlobalInd00 = thisInd00 + globalIndStart;
1981  if (ctrlMask && ((ctrlMask & thisGlobalInd00) != ctrlMask))
1982  continue;
1983 
1984  // determine the indices and record values of this tasks's target amps
1985  for (i=0; i < numTargAmps; i++) {
1986 
1987  // get statevec index of current target qubit assignment
1988  ind = thisInd00;
1989  for (t=0; t < numTargs; t++)
1990  if (extractBit(t, i))
1991  ind = flipBit(ind, targs[t]);
1992 
1993  // update this tasks's private arrays
1994  ampInds[i] = ind;
1995  reAmps [i] = reVec[ind];
1996  imAmps [i] = imVec[ind];
1997  }
1998 
1999  // modify this tasks's target amplitudes
2000  for (r=0; r < numTargAmps; r++) {
2001  ind = ampInds[r];
2002  reVec[ind] = 0;
2003  imVec[ind] = 0;
2004 
2005  for (c=0; c < numTargAmps; c++) {
2006  reElem = u.real[r][c];
2007  imElem = u.imag[r][c];
2008  reVec[ind] += reAmps[c]*reElem - imAmps[c]*imElem;
2009  imVec[ind] += reAmps[c]*imElem + imAmps[c]*reElem;
2010  }
2011  }
2012  }
2013  // on Windows, we must explicitly free the stack structures
2014  #ifdef _WIN32
2015  _freea(ampInds);
2016  _freea(reAmps);
2017  _freea(imAmps);
2018  #endif
2019  }
2020 
2021  #ifdef _WIN32
2022  _freea(sortedTargs);
2023  #endif
2024 }
2025 
2026 void statevec_unitaryLocal(Qureg qureg, int targetQubit, ComplexMatrix2 u)
2027 {
2028  long long int sizeBlock, sizeHalfBlock;
2029  long long int thisBlock, // current block
2030  indexUp,indexLo; // current index and corresponding index in lower half block
2031 
2032  qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2033  long long int thisTask;
2034  long long int numTasks=qureg.numAmpsPerChunk>>1;
2035 
2036  // set dimensions
2037  sizeHalfBlock = 1LL << targetQubit;
2038  sizeBlock = 2LL * sizeHalfBlock;
2039 
2040  // Can't use qureg.stateVec as a private OMP var
2041  qreal *stateVecReal = qureg.stateVec.real;
2042  qreal *stateVecImag = qureg.stateVec.imag;
2043 
2044 # ifdef _OPENMP
2045 # pragma omp parallel \
2046  default (none) \
2047  shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, u,numTasks) \
2048  private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp,stateRealLo,stateImagLo)
2049 # endif
2050  {
2051 # ifdef _OPENMP
2052 # pragma omp for schedule (static)
2053 # endif
2054  for (thisTask=0; thisTask<numTasks; thisTask++) {
2055 
2056  thisBlock = thisTask / sizeHalfBlock;
2057  indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2058  indexLo = indexUp + sizeHalfBlock;
2059 
2060  // store current state vector values in temp variables
2061  stateRealUp = stateVecReal[indexUp];
2062  stateImagUp = stateVecImag[indexUp];
2063 
2064  stateRealLo = stateVecReal[indexLo];
2065  stateImagLo = stateVecImag[indexLo];
2066 
2067 
2068  // state[indexUp] = u00 * state[indexUp] + u01 * state[indexLo]
2069  stateVecReal[indexUp] = u.real[0][0]*stateRealUp - u.imag[0][0]*stateImagUp
2070  + u.real[0][1]*stateRealLo - u.imag[0][1]*stateImagLo;
2071  stateVecImag[indexUp] = u.real[0][0]*stateImagUp + u.imag[0][0]*stateRealUp
2072  + u.real[0][1]*stateImagLo + u.imag[0][1]*stateRealLo;
2073 
2074  // state[indexLo] = u10 * state[indexUp] + u11 * state[indexLo]
2075  stateVecReal[indexLo] = u.real[1][0]*stateRealUp - u.imag[1][0]*stateImagUp
2076  + u.real[1][1]*stateRealLo - u.imag[1][1]*stateImagLo;
2077  stateVecImag[indexLo] = u.real[1][0]*stateImagUp + u.imag[1][0]*stateRealUp
2078  + u.real[1][1]*stateImagLo + u.imag[1][1]*stateRealLo;
2079 
2080  }
2081  }
2082 }
2083 
2096  Complex rot1, Complex rot2,
2097  ComplexArray stateVecUp,
2098  ComplexArray stateVecLo,
2099  ComplexArray stateVecOut)
2100 {
2101 
2102  qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2103  long long int thisTask;
2104  long long int numTasks=qureg.numAmpsPerChunk;
2105 
2106  qreal rot1Real=rot1.real, rot1Imag=rot1.imag;
2107  qreal rot2Real=rot2.real, rot2Imag=rot2.imag;
2108  qreal *stateVecRealUp=stateVecUp.real, *stateVecImagUp=stateVecUp.imag;
2109  qreal *stateVecRealLo=stateVecLo.real, *stateVecImagLo=stateVecLo.imag;
2110  qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2111 
2112 # ifdef _OPENMP
2113 # pragma omp parallel \
2114  default (none) \
2115  shared (stateVecRealUp,stateVecImagUp,stateVecRealLo,stateVecImagLo,stateVecRealOut,stateVecImagOut, \
2116  rot1Real,rot1Imag, rot2Real,rot2Imag,numTasks) \
2117  private (thisTask,stateRealUp,stateImagUp,stateRealLo,stateImagLo)
2118 # endif
2119  {
2120 # ifdef _OPENMP
2121 # pragma omp for schedule (static)
2122 # endif
2123  for (thisTask=0; thisTask<numTasks; thisTask++) {
2124  // store current state vector values in temp variables
2125  stateRealUp = stateVecRealUp[thisTask];
2126  stateImagUp = stateVecImagUp[thisTask];
2127 
2128  stateRealLo = stateVecRealLo[thisTask];
2129  stateImagLo = stateVecImagLo[thisTask];
2130 
2131  // state[indexUp] = alpha * state[indexUp] - conj(beta) * state[indexLo]
2132  stateVecRealOut[thisTask] = rot1Real*stateRealUp - rot1Imag*stateImagUp + rot2Real*stateRealLo + rot2Imag*stateImagLo;
2133  stateVecImagOut[thisTask] = rot1Real*stateImagUp + rot1Imag*stateRealUp + rot2Real*stateImagLo - rot2Imag*stateRealLo;
2134  }
2135  }
2136 }
2137 
2152  Complex rot1, Complex rot2,
2153  ComplexArray stateVecUp,
2154  ComplexArray stateVecLo,
2155  ComplexArray stateVecOut)
2156 {
2157 
2158  qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2159  long long int thisTask;
2160  long long int numTasks=qureg.numAmpsPerChunk;
2161 
2162  qreal rot1Real=rot1.real, rot1Imag=rot1.imag;
2163  qreal rot2Real=rot2.real, rot2Imag=rot2.imag;
2164  qreal *stateVecRealUp=stateVecUp.real, *stateVecImagUp=stateVecUp.imag;
2165  qreal *stateVecRealLo=stateVecLo.real, *stateVecImagLo=stateVecLo.imag;
2166  qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2167 
2168 
2169 # ifdef _OPENMP
2170 # pragma omp parallel \
2171  default (none) \
2172  shared (stateVecRealUp,stateVecImagUp,stateVecRealLo,stateVecImagLo,stateVecRealOut,stateVecImagOut, \
2173  rot1Real, rot1Imag, rot2Real, rot2Imag,numTasks) \
2174  private (thisTask,stateRealUp,stateImagUp,stateRealLo,stateImagLo)
2175 # endif
2176  {
2177 # ifdef _OPENMP
2178 # pragma omp for schedule (static)
2179 # endif
2180  for (thisTask=0; thisTask<numTasks; thisTask++) {
2181  // store current state vector values in temp variables
2182  stateRealUp = stateVecRealUp[thisTask];
2183  stateImagUp = stateVecImagUp[thisTask];
2184 
2185  stateRealLo = stateVecRealLo[thisTask];
2186  stateImagLo = stateVecImagLo[thisTask];
2187 
2188  stateVecRealOut[thisTask] = rot1Real*stateRealUp - rot1Imag*stateImagUp
2189  + rot2Real*stateRealLo - rot2Imag*stateImagLo;
2190  stateVecImagOut[thisTask] = rot1Real*stateImagUp + rot1Imag*stateRealUp
2191  + rot2Real*stateImagLo + rot2Imag*stateRealLo;
2192  }
2193  }
2194 }
2195 
2196 void statevec_controlledCompactUnitaryLocal (Qureg qureg, int controlQubit, int targetQubit,
2197  Complex alpha, Complex beta)
2198 {
2199  long long int sizeBlock, sizeHalfBlock;
2200  long long int thisBlock, // current block
2201  indexUp,indexLo; // current index and corresponding index in lower half block
2202 
2203  qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2204  long long int thisTask;
2205  long long int numTasks=qureg.numAmpsPerChunk>>1;
2206  long long int chunkSize=qureg.numAmpsPerChunk;
2207  long long int chunkId=qureg.chunkId;
2208 
2209  int controlBit;
2210 
2211  // set dimensions
2212  sizeHalfBlock = 1LL << targetQubit;
2213  sizeBlock = 2LL * sizeHalfBlock;
2214 
2215  // Can't use qureg.stateVec as a private OMP var
2216  qreal *stateVecReal = qureg.stateVec.real;
2217  qreal *stateVecImag = qureg.stateVec.imag;
2218  qreal alphaImag=alpha.imag, alphaReal=alpha.real;
2219  qreal betaImag=beta.imag, betaReal=beta.real;
2220 
2221 # ifdef _OPENMP
2222 # pragma omp parallel \
2223  default (none) \
2224  shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, alphaReal,alphaImag, betaReal,betaImag, \
2225  numTasks,chunkId,chunkSize,controlQubit) \
2226  private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp,stateRealLo,stateImagLo,controlBit)
2227 # endif
2228  {
2229 # ifdef _OPENMP
2230 # pragma omp for schedule (static)
2231 # endif
2232  for (thisTask=0; thisTask<numTasks; thisTask++) {
2233 
2234  thisBlock = thisTask / sizeHalfBlock;
2235  indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2236  indexLo = indexUp + sizeHalfBlock;
2237 
2238  controlBit = extractBit (controlQubit, indexUp+chunkId*chunkSize);
2239  if (controlBit){
2240  // store current state vector values in temp variables
2241  stateRealUp = stateVecReal[indexUp];
2242  stateImagUp = stateVecImag[indexUp];
2243 
2244  stateRealLo = stateVecReal[indexLo];
2245  stateImagLo = stateVecImag[indexLo];
2246 
2247  // state[indexUp] = alpha * state[indexUp] - conj(beta) * state[indexLo]
2248  stateVecReal[indexUp] = alphaReal*stateRealUp - alphaImag*stateImagUp
2249  - betaReal*stateRealLo - betaImag*stateImagLo;
2250  stateVecImag[indexUp] = alphaReal*stateImagUp + alphaImag*stateRealUp
2251  - betaReal*stateImagLo + betaImag*stateRealLo;
2252 
2253  // state[indexLo] = beta * state[indexUp] + conj(alpha) * state[indexLo]
2254  stateVecReal[indexLo] = betaReal*stateRealUp - betaImag*stateImagUp
2255  + alphaReal*stateRealLo + alphaImag*stateImagLo;
2256  stateVecImag[indexLo] = betaReal*stateImagUp + betaImag*stateRealUp
2257  + alphaReal*stateImagLo - alphaImag*stateRealLo;
2258  }
2259  }
2260  }
2261 
2262 }
2263 
2264 /* ctrlQubitsMask is a bit mask indicating which qubits are control Qubits
2265  * ctrlFlipMask is a bit mask indicating which control qubits should be 'flipped'
2266  * in the condition, i.e. they should have value 0 when the unitary is applied
2267  */
2269  Qureg qureg, int targetQubit,
2270  long long int ctrlQubitsMask, long long int ctrlFlipMask,
2271  ComplexMatrix2 u)
2272 {
2273  long long int sizeBlock, sizeHalfBlock;
2274  long long int thisBlock, // current block
2275  indexUp,indexLo; // current index and corresponding index in lower half block
2276 
2277  qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2278  long long int thisTask;
2279  long long int numTasks=qureg.numAmpsPerChunk>>1;
2280  long long int chunkSize=qureg.numAmpsPerChunk;
2281  long long int chunkId=qureg.chunkId;
2282 
2283  // set dimensions
2284  sizeHalfBlock = 1LL << targetQubit;
2285  sizeBlock = 2LL * sizeHalfBlock;
2286 
2287  // Can't use qureg.stateVec as a private OMP var
2288  qreal *stateVecReal = qureg.stateVec.real;
2289  qreal *stateVecImag = qureg.stateVec.imag;
2290 
2291 # ifdef _OPENMP
2292 # pragma omp parallel \
2293  default (none) \
2294  shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, u, ctrlQubitsMask,ctrlFlipMask, \
2295  numTasks,chunkId,chunkSize) \
2296  private (thisTask,thisBlock, indexUp,indexLo, stateRealUp,stateImagUp,stateRealLo,stateImagLo)
2297 # endif
2298  {
2299 # ifdef _OPENMP
2300 # pragma omp for schedule (static)
2301 # endif
2302  for (thisTask=0; thisTask<numTasks; thisTask++) {
2303 
2304  thisBlock = thisTask / sizeHalfBlock;
2305  indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2306  indexLo = indexUp + sizeHalfBlock;
2307 
2308 
2309  // take the basis index, flip the designated (XOR) 'control' bits, AND with the controls.
2310  // if this equals the control mask, the control qubits have the desired values in the basis index
2311  if (ctrlQubitsMask == (ctrlQubitsMask & ((indexUp+chunkId*chunkSize) ^ ctrlFlipMask))) {
2312  // store current state vector values in temp variables
2313  stateRealUp = stateVecReal[indexUp];
2314  stateImagUp = stateVecImag[indexUp];
2315 
2316  stateRealLo = stateVecReal[indexLo];
2317  stateImagLo = stateVecImag[indexLo];
2318 
2319  // state[indexUp] = u00 * state[indexUp] + u01 * state[indexLo]
2320  stateVecReal[indexUp] = u.real[0][0]*stateRealUp - u.imag[0][0]*stateImagUp
2321  + u.real[0][1]*stateRealLo - u.imag[0][1]*stateImagLo;
2322  stateVecImag[indexUp] = u.real[0][0]*stateImagUp + u.imag[0][0]*stateRealUp
2323  + u.real[0][1]*stateImagLo + u.imag[0][1]*stateRealLo;
2324 
2325  // state[indexLo] = u10 * state[indexUp] + u11 * state[indexLo]
2326  stateVecReal[indexLo] = u.real[1][0]*stateRealUp - u.imag[1][0]*stateImagUp
2327  + u.real[1][1]*stateRealLo - u.imag[1][1]*stateImagLo;
2328  stateVecImag[indexLo] = u.real[1][0]*stateImagUp + u.imag[1][0]*stateRealUp
2329  + u.real[1][1]*stateImagLo + u.imag[1][1]*stateRealLo;
2330  }
2331  }
2332  }
2333 
2334 }
2335 
2336 void statevec_controlledUnitaryLocal(Qureg qureg, int controlQubit, int targetQubit,
2337  ComplexMatrix2 u)
2338 {
2339  long long int sizeBlock, sizeHalfBlock;
2340  long long int thisBlock, // current block
2341  indexUp,indexLo; // current index and corresponding index in lower half block
2342 
2343  qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2344  long long int thisTask;
2345  long long int numTasks=qureg.numAmpsPerChunk>>1;
2346  long long int chunkSize=qureg.numAmpsPerChunk;
2347  long long int chunkId=qureg.chunkId;
2348 
2349  int controlBit;
2350 
2351  // set dimensions
2352  sizeHalfBlock = 1LL << targetQubit;
2353  sizeBlock = 2LL * sizeHalfBlock;
2354 
2355  // Can't use qureg.stateVec as a private OMP var
2356  qreal *stateVecReal = qureg.stateVec.real;
2357  qreal *stateVecImag = qureg.stateVec.imag;
2358 
2359 # ifdef _OPENMP
2360 # pragma omp parallel \
2361  default (none) \
2362  shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, u,numTasks,chunkId,chunkSize,controlQubit) \
2363  private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp,stateRealLo,stateImagLo,controlBit)
2364 # endif
2365  {
2366 # ifdef _OPENMP
2367 # pragma omp for schedule (static)
2368 # endif
2369  for (thisTask=0; thisTask<numTasks; thisTask++) {
2370 
2371  thisBlock = thisTask / sizeHalfBlock;
2372  indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2373  indexLo = indexUp + sizeHalfBlock;
2374 
2375  controlBit = extractBit (controlQubit, indexUp+chunkId*chunkSize);
2376  if (controlBit){
2377  // store current state vector values in temp variables
2378  stateRealUp = stateVecReal[indexUp];
2379  stateImagUp = stateVecImag[indexUp];
2380 
2381  stateRealLo = stateVecReal[indexLo];
2382  stateImagLo = stateVecImag[indexLo];
2383 
2384 
2385  // state[indexUp] = u00 * state[indexUp] + u01 * state[indexLo]
2386  stateVecReal[indexUp] = u.real[0][0]*stateRealUp - u.imag[0][0]*stateImagUp
2387  + u.real[0][1]*stateRealLo - u.imag[0][1]*stateImagLo;
2388  stateVecImag[indexUp] = u.real[0][0]*stateImagUp + u.imag[0][0]*stateRealUp
2389  + u.real[0][1]*stateImagLo + u.imag[0][1]*stateRealLo;
2390 
2391  // state[indexLo] = u10 * state[indexUp] + u11 * state[indexLo]
2392  stateVecReal[indexLo] = u.real[1][0]*stateRealUp - u.imag[1][0]*stateImagUp
2393  + u.real[1][1]*stateRealLo - u.imag[1][1]*stateImagLo;
2394  stateVecImag[indexLo] = u.real[1][0]*stateImagUp + u.imag[1][0]*stateRealUp
2395  + u.real[1][1]*stateImagLo + u.imag[1][1]*stateRealLo;
2396  }
2397  }
2398  }
2399 
2400 }
2401 
2415  Complex rot1, Complex rot2,
2416  ComplexArray stateVecUp,
2417  ComplexArray stateVecLo,
2418  ComplexArray stateVecOut)
2419 {
2420 
2421  qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2422  long long int thisTask;
2423  long long int numTasks=qureg.numAmpsPerChunk;
2424  long long int chunkSize=qureg.numAmpsPerChunk;
2425  long long int chunkId=qureg.chunkId;
2426 
2427  int controlBit;
2428 
2429  qreal rot1Real=rot1.real, rot1Imag=rot1.imag;
2430  qreal rot2Real=rot2.real, rot2Imag=rot2.imag;
2431  qreal *stateVecRealUp=stateVecUp.real, *stateVecImagUp=stateVecUp.imag;
2432  qreal *stateVecRealLo=stateVecLo.real, *stateVecImagLo=stateVecLo.imag;
2433  qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2434 
2435 # ifdef _OPENMP
2436 # pragma omp parallel \
2437  default (none) \
2438  shared (stateVecRealUp,stateVecImagUp,stateVecRealLo,stateVecImagLo,stateVecRealOut,stateVecImagOut, \
2439  rot1Real,rot1Imag, rot2Real,rot2Imag,numTasks,chunkId,chunkSize,controlQubit) \
2440  private (thisTask,stateRealUp,stateImagUp,stateRealLo,stateImagLo,controlBit)
2441 # endif
2442  {
2443 # ifdef _OPENMP
2444 # pragma omp for schedule (static)
2445 # endif
2446  for (thisTask=0; thisTask<numTasks; thisTask++) {
2447  controlBit = extractBit (controlQubit, thisTask+chunkId*chunkSize);
2448  if (controlBit){
2449  // store current state vector values in temp variables
2450  stateRealUp = stateVecRealUp[thisTask];
2451  stateImagUp = stateVecImagUp[thisTask];
2452 
2453  stateRealLo = stateVecRealLo[thisTask];
2454  stateImagLo = stateVecImagLo[thisTask];
2455 
2456  // state[indexUp] = alpha * state[indexUp] - conj(beta) * state[indexLo]
2457  stateVecRealOut[thisTask] = rot1Real*stateRealUp - rot1Imag*stateImagUp + rot2Real*stateRealLo + rot2Imag*stateImagLo;
2458  stateVecImagOut[thisTask] = rot1Real*stateImagUp + rot1Imag*stateRealUp + rot2Real*stateImagLo - rot2Imag*stateRealLo;
2459  }
2460  }
2461  }
2462 }
2463 
2476 void statevec_controlledUnitaryDistributed (Qureg qureg, int controlQubit,
2477  Complex rot1, Complex rot2,
2478  ComplexArray stateVecUp,
2479  ComplexArray stateVecLo,
2480  ComplexArray stateVecOut)
2481 {
2482 
2483  qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2484  long long int thisTask;
2485  long long int numTasks=qureg.numAmpsPerChunk;
2486  long long int chunkSize=qureg.numAmpsPerChunk;
2487  long long int chunkId=qureg.chunkId;
2488 
2489  int controlBit;
2490 
2491  qreal rot1Real=rot1.real, rot1Imag=rot1.imag;
2492  qreal rot2Real=rot2.real, rot2Imag=rot2.imag;
2493  qreal *stateVecRealUp=stateVecUp.real, *stateVecImagUp=stateVecUp.imag;
2494  qreal *stateVecRealLo=stateVecLo.real, *stateVecImagLo=stateVecLo.imag;
2495  qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2496 
2497 # ifdef _OPENMP
2498 # pragma omp parallel \
2499  default (none) \
2500  shared (stateVecRealUp,stateVecImagUp,stateVecRealLo,stateVecImagLo,stateVecRealOut,stateVecImagOut, \
2501  rot1Real,rot1Imag, rot2Real,rot2Imag, numTasks,chunkId,chunkSize,controlQubit) \
2502  private (thisTask,stateRealUp,stateImagUp,stateRealLo,stateImagLo,controlBit)
2503 # endif
2504  {
2505 # ifdef _OPENMP
2506 # pragma omp for schedule (static)
2507 # endif
2508  for (thisTask=0; thisTask<numTasks; thisTask++) {
2509  controlBit = extractBit (controlQubit, thisTask+chunkId*chunkSize);
2510  if (controlBit){
2511  // store current state vector values in temp variables
2512  stateRealUp = stateVecRealUp[thisTask];
2513  stateImagUp = stateVecImagUp[thisTask];
2514 
2515  stateRealLo = stateVecRealLo[thisTask];
2516  stateImagLo = stateVecImagLo[thisTask];
2517 
2518  stateVecRealOut[thisTask] = rot1Real*stateRealUp - rot1Imag*stateImagUp
2519  + rot2Real*stateRealLo - rot2Imag*stateImagLo;
2520  stateVecImagOut[thisTask] = rot1Real*stateImagUp + rot1Imag*stateRealUp
2521  + rot2Real*stateImagLo + rot2Imag*stateRealLo;
2522  }
2523  }
2524  }
2525 }
2526 
2543  Qureg qureg,
2544  int targetQubit,
2545  long long int ctrlQubitsMask, long long int ctrlFlipMask,
2546  Complex rot1, Complex rot2,
2547  ComplexArray stateVecUp,
2548  ComplexArray stateVecLo,
2549  ComplexArray stateVecOut)
2550 {
2551 
2552  qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
2553  long long int thisTask;
2554  long long int numTasks=qureg.numAmpsPerChunk;
2555  long long int chunkSize=qureg.numAmpsPerChunk;
2556  long long int chunkId=qureg.chunkId;
2557 
2558  qreal rot1Real=rot1.real, rot1Imag=rot1.imag;
2559  qreal rot2Real=rot2.real, rot2Imag=rot2.imag;
2560  qreal *stateVecRealUp=stateVecUp.real, *stateVecImagUp=stateVecUp.imag;
2561  qreal *stateVecRealLo=stateVecLo.real, *stateVecImagLo=stateVecLo.imag;
2562  qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2563 
2564 # ifdef _OPENMP
2565 # pragma omp parallel \
2566  default (none) \
2567  shared (stateVecRealUp,stateVecImagUp,stateVecRealLo,stateVecImagLo,stateVecRealOut,stateVecImagOut, \
2568  rot1Real,rot1Imag, rot2Real,rot2Imag, ctrlQubitsMask,ctrlFlipMask, numTasks,chunkId,chunkSize) \
2569  private (thisTask,stateRealUp,stateImagUp,stateRealLo,stateImagLo)
2570 # endif
2571  {
2572 # ifdef _OPENMP
2573 # pragma omp for schedule (static)
2574 # endif
2575  for (thisTask=0; thisTask<numTasks; thisTask++) {
2576  if (ctrlQubitsMask == (ctrlQubitsMask & ((thisTask+chunkId*chunkSize) ^ ctrlFlipMask))) {
2577  // store current state vector values in temp variables
2578  stateRealUp = stateVecRealUp[thisTask];
2579  stateImagUp = stateVecImagUp[thisTask];
2580 
2581  stateRealLo = stateVecRealLo[thisTask];
2582  stateImagLo = stateVecImagLo[thisTask];
2583 
2584  stateVecRealOut[thisTask] = rot1Real*stateRealUp - rot1Imag*stateImagUp
2585  + rot2Real*stateRealLo - rot2Imag*stateImagLo;
2586  stateVecImagOut[thisTask] = rot1Real*stateImagUp + rot1Imag*stateRealUp
2587  + rot2Real*stateImagLo + rot2Imag*stateRealLo;
2588  }
2589  }
2590  }
2591 }
2592 
2593 void statevec_pauliXLocal(Qureg qureg, int targetQubit)
2594 {
2595  long long int sizeBlock, sizeHalfBlock;
2596  long long int thisBlock, // current block
2597  indexUp,indexLo; // current index and corresponding index in lower half block
2598 
2599  qreal stateRealUp,stateImagUp;
2600  long long int thisTask;
2601  long long int numTasks=qureg.numAmpsPerChunk>>1;
2602 
2603  // set dimensions
2604  sizeHalfBlock = 1LL << targetQubit;
2605  sizeBlock = 2LL * sizeHalfBlock;
2606 
2607  // Can't use qureg.stateVec as a private OMP var
2608  qreal *stateVecReal = qureg.stateVec.real;
2609  qreal *stateVecImag = qureg.stateVec.imag;
2610 
2611 # ifdef _OPENMP
2612 # pragma omp parallel \
2613  default (none) \
2614  shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, numTasks) \
2615  private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp)
2616 # endif
2617  {
2618 # ifdef _OPENMP
2619 # pragma omp for schedule (static)
2620 # endif
2621  for (thisTask=0; thisTask<numTasks; thisTask++) {
2622  thisBlock = thisTask / sizeHalfBlock;
2623  indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2624  indexLo = indexUp + sizeHalfBlock;
2625 
2626  stateRealUp = stateVecReal[indexUp];
2627  stateImagUp = stateVecImag[indexUp];
2628 
2629  stateVecReal[indexUp] = stateVecReal[indexLo];
2630  stateVecImag[indexUp] = stateVecImag[indexLo];
2631 
2632  stateVecReal[indexLo] = stateRealUp;
2633  stateVecImag[indexLo] = stateImagUp;
2634  }
2635  }
2636 
2637 }
2638 
2652  ComplexArray stateVecIn,
2653  ComplexArray stateVecOut)
2654 {
2655 
2656  long long int thisTask;
2657  long long int numTasks=qureg.numAmpsPerChunk;
2658 
2659  qreal *stateVecRealIn=stateVecIn.real, *stateVecImagIn=stateVecIn.imag;
2660  qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2661 
2662 # ifdef _OPENMP
2663 # pragma omp parallel \
2664  default (none) \
2665  shared (stateVecRealIn,stateVecImagIn,stateVecRealOut,stateVecImagOut,numTasks) \
2666  private (thisTask)
2667 # endif
2668  {
2669 # ifdef _OPENMP
2670 # pragma omp for schedule (static)
2671 # endif
2672  for (thisTask=0; thisTask<numTasks; thisTask++) {
2673  stateVecRealOut[thisTask] = stateVecRealIn[thisTask];
2674  stateVecImagOut[thisTask] = stateVecImagIn[thisTask];
2675  }
2676  }
2677 }
2678 
2679 void statevec_controlledNotLocal(Qureg qureg, int controlQubit, int targetQubit)
2680 {
2681  long long int sizeBlock, sizeHalfBlock;
2682  long long int thisBlock, // current block
2683  indexUp,indexLo; // current index and corresponding index in lower half block
2684 
2685  qreal stateRealUp,stateImagUp;
2686  long long int thisTask;
2687  long long int numTasks=qureg.numAmpsPerChunk>>1;
2688  long long int chunkSize=qureg.numAmpsPerChunk;
2689  long long int chunkId=qureg.chunkId;
2690 
2691  int controlBit;
2692 
2693  // set dimensions
2694  sizeHalfBlock = 1LL << targetQubit;
2695  sizeBlock = 2LL * sizeHalfBlock;
2696 
2697  // Can't use qureg.stateVec as a private OMP var
2698  qreal *stateVecReal = qureg.stateVec.real;
2699  qreal *stateVecImag = qureg.stateVec.imag;
2700 
2701 # ifdef _OPENMP
2702 # pragma omp parallel \
2703  default (none) \
2704  shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag,numTasks,chunkId,chunkSize,controlQubit) \
2705  private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp,controlBit)
2706 # endif
2707  {
2708 # ifdef _OPENMP
2709 # pragma omp for schedule (static)
2710 # endif
2711  for (thisTask=0; thisTask<numTasks; thisTask++) {
2712  thisBlock = thisTask / sizeHalfBlock;
2713  indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2714  indexLo = indexUp + sizeHalfBlock;
2715 
2716  controlBit = extractBit(controlQubit, indexUp+chunkId*chunkSize);
2717  if (controlBit){
2718  stateRealUp = stateVecReal[indexUp];
2719  stateImagUp = stateVecImag[indexUp];
2720 
2721  stateVecReal[indexUp] = stateVecReal[indexLo];
2722  stateVecImag[indexUp] = stateVecImag[indexLo];
2723 
2724  stateVecReal[indexLo] = stateRealUp;
2725  stateVecImag[indexLo] = stateImagUp;
2726  }
2727  }
2728  }
2729 }
2730 
2742 void statevec_controlledNotDistributed (Qureg qureg, int controlQubit,
2743  ComplexArray stateVecIn,
2744  ComplexArray stateVecOut)
2745 {
2746 
2747  long long int thisTask;
2748  long long int numTasks=qureg.numAmpsPerChunk;
2749  long long int chunkSize=qureg.numAmpsPerChunk;
2750  long long int chunkId=qureg.chunkId;
2751 
2752  int controlBit;
2753 
2754  qreal *stateVecRealIn=stateVecIn.real, *stateVecImagIn=stateVecIn.imag;
2755  qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2756 
2757 # ifdef _OPENMP
2758 # pragma omp parallel \
2759  default (none) \
2760  shared (stateVecRealIn,stateVecImagIn,stateVecRealOut,stateVecImagOut, \
2761  numTasks,chunkId,chunkSize,controlQubit) \
2762  private (thisTask,controlBit)
2763 # endif
2764  {
2765 # ifdef _OPENMP
2766 # pragma omp for schedule (static)
2767 # endif
2768  for (thisTask=0; thisTask<numTasks; thisTask++) {
2769  controlBit = extractBit (controlQubit, thisTask+chunkId*chunkSize);
2770  if (controlBit){
2771  stateVecRealOut[thisTask] = stateVecRealIn[thisTask];
2772  stateVecImagOut[thisTask] = stateVecImagIn[thisTask];
2773  }
2774  }
2775  }
2776 }
2777 
2778 void statevec_multiControlledMultiQubitNotLocal(Qureg qureg, int ctrlMask, int targMask) {
2779  long long int numAmps = qureg.numAmpsPerChunk;
2780  qreal* stateRe = qureg.stateVec.real;
2781  qreal* stateIm = qureg.stateVec.imag;
2782 
2783  long long int globalOffset = qureg.chunkId * numAmps;
2784 
2785  // each amplitude is swapped with a 'mate' amplitude
2786  long long int ampInd, mateInd, globalInd;
2787  qreal mateRe, mateIm;
2788 
2789 # ifdef _OPENMP
2790 # pragma omp parallel \
2791  default (none) \
2792  shared (stateRe,stateIm, numAmps, ctrlMask,targMask, globalOffset) \
2793  private (ampInd, mateInd,mateRe,mateIm, globalInd)
2794 # endif
2795  {
2796 # ifdef _OPENMP
2797 # pragma omp for schedule (static)
2798 # endif
2799  for (ampInd = 0; ampInd < numAmps; ampInd++) {
2800 
2801  /* it may be a premature optimisation to remove the seemingly wasteful continues below,
2802  * because the maximum skipped amplitudes is 1/2 that stored in the node
2803  * (e.g. since this function is not called if all amps should be skipped via controls),
2804  * and since we're memory-bandwidth bottlenecked.
2805  */
2806 
2807  // although amps are local, we may still be running in distributed mode,
2808  // and hence need to consult the global index to determine the values of
2809  // the control qubits
2810  globalInd = ampInd + globalOffset;
2811 
2812  // modify amplitude only if control qubits are 1 for this state
2813  if (ctrlMask && ((ctrlMask & globalInd) != ctrlMask))
2814  continue;
2815 
2816  mateInd = ampInd ^ targMask;
2817 
2818  // if the mate is behind, it was already processed
2819  if (mateInd < ampInd)
2820  continue;
2821 
2822  mateRe = stateRe[mateInd];
2823  mateIm = stateIm[mateInd];
2824 
2825  // swap amp with mate
2826  stateRe[mateInd] = stateRe[ampInd];
2827  stateIm[mateInd] = stateIm[ampInd];
2828  stateRe[ampInd] = mateRe;
2829  stateIm[ampInd] = mateIm;
2830  }
2831  }
2832 }
2833 
2835  Qureg qureg, int ctrlMask, int targMask,
2836  ComplexArray stateVecIn,
2837  ComplexArray stateVecOut
2838 ) {
2839  long long int numAmps = qureg.numAmpsPerChunk;
2840  long long int globalOffset = qureg.chunkId * numAmps;
2841 
2842  /* stateVecOut is qureg's local state-vector partition, which we modify.
2843  * stateVecIn is the pair node's state-vector partition, in an order which
2844  * does not necessarily correlate to stateVecOut's
2845  */
2846  qreal* inReal = stateVecIn.real;
2847  qreal* inImag = stateVecIn.imag;
2848  qreal* outReal = stateVecOut.real;
2849  qreal* outImag = stateVecOut.imag;
2850 
2851  long long int outInd, outIndGlobal, inInd, inIndGlobal;
2852 
2853 # ifdef _OPENMP
2854 # pragma omp parallel \
2855  default (none) \
2856  shared (inReal,inImag,outReal,outImag, numAmps,globalOffset, ctrlMask,targMask) \
2857  private (outInd,outIndGlobal, inInd,inIndGlobal)
2858 # endif
2859  {
2860 # ifdef _OPENMP
2861 # pragma omp for schedule (static)
2862 # endif
2863  for (outInd = 0; outInd < numAmps; outInd++) {
2864 
2865  // modify amplitude only if control qubits are 1 for this state
2866  outIndGlobal = outInd + globalOffset;
2867  if (ctrlMask && ((ctrlMask & outIndGlobal) != ctrlMask))
2868  continue;
2869  /* it is a premature optimisation to remove this seemingly wasteful abort above,
2870  * because the maximum skipped amplitudes is 1/2 that stored in the node
2871  * (since this function is not called if all amps should be skipped)
2872  */
2873 
2874  /* unlike statevec_controlledNotDistributed(), we cannot assume stateVecOut
2875  * maps contiguously/parallel into stateVecIn; we must map each amplitude, bit-wise.
2876  * However, the arithmetic doesn't necessitate knowing the rank of stateVecIn
2877  */
2878  inIndGlobal = outIndGlobal ^ targMask;
2879  inInd = inIndGlobal % numAmps; // = inIndGlobal - pairRank * numAmps
2880 
2881  outReal[outInd] = inReal[inInd];
2882  outImag[outInd] = inImag[inInd];
2883  }
2884  }
2885 }
2886 
2887 void statevec_pauliYLocal(Qureg qureg, int targetQubit, int conjFac)
2888 {
2889  long long int sizeBlock, sizeHalfBlock;
2890  long long int thisBlock, // current block
2891  indexUp,indexLo; // current index and corresponding index in lower half block
2892 
2893  qreal stateRealUp,stateImagUp;
2894  long long int thisTask;
2895  long long int numTasks=qureg.numAmpsPerChunk>>1;
2896 
2897  // set dimensions
2898  sizeHalfBlock = 1LL << targetQubit;
2899  sizeBlock = 2LL * sizeHalfBlock;
2900 
2901  // Can't use qureg.stateVec as a private OMP var
2902  qreal *stateVecReal = qureg.stateVec.real;
2903  qreal *stateVecImag = qureg.stateVec.imag;
2904 
2905 # ifdef _OPENMP
2906 # pragma omp parallel \
2907  default (none) \
2908  shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, numTasks,conjFac) \
2909  private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp)
2910 # endif
2911  {
2912 # ifdef _OPENMP
2913 # pragma omp for schedule (static)
2914 # endif
2915  for (thisTask=0; thisTask<numTasks; thisTask++) {
2916  thisBlock = thisTask / sizeHalfBlock;
2917  indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
2918  indexLo = indexUp + sizeHalfBlock;
2919 
2920  stateRealUp = stateVecReal[indexUp];
2921  stateImagUp = stateVecImag[indexUp];
2922 
2923  stateVecReal[indexUp] = conjFac * stateVecImag[indexLo];
2924  stateVecImag[indexUp] = conjFac * -stateVecReal[indexLo];
2925  stateVecReal[indexLo] = conjFac * -stateImagUp;
2926  stateVecImag[indexLo] = conjFac * stateRealUp;
2927  }
2928  }
2929 }
2930 
2946  ComplexArray stateVecIn,
2947  ComplexArray stateVecOut,
2948  int updateUpper, int conjFac)
2949 {
2950 
2951  long long int thisTask;
2952  long long int numTasks=qureg.numAmpsPerChunk;
2953 
2954  qreal *stateVecRealIn=stateVecIn.real, *stateVecImagIn=stateVecIn.imag;
2955  qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
2956 
2957  int realSign=1, imagSign=1;
2958  if (updateUpper) imagSign=-1;
2959  else realSign = -1;
2960 
2961 # ifdef _OPENMP
2962 # pragma omp parallel \
2963  default (none) \
2964  shared (stateVecRealIn,stateVecImagIn,stateVecRealOut,stateVecImagOut, \
2965  realSign,imagSign, numTasks,conjFac) \
2966  private (thisTask)
2967 # endif
2968  {
2969 # ifdef _OPENMP
2970 # pragma omp for schedule (static)
2971 # endif
2972  for (thisTask=0; thisTask<numTasks; thisTask++) {
2973  stateVecRealOut[thisTask] = conjFac * realSign * stateVecImagIn[thisTask];
2974  stateVecImagOut[thisTask] = conjFac * imagSign * stateVecRealIn[thisTask];
2975  }
2976  }
2977 }
2978 
2979 
2980 
2981 
2982 void statevec_controlledPauliYLocal(Qureg qureg, int controlQubit, int targetQubit, int conjFac)
2983 {
2984  long long int sizeBlock, sizeHalfBlock;
2985  long long int thisBlock, // current block
2986  indexUp,indexLo; // current index and corresponding index in lower half block
2987 
2988  qreal stateRealUp,stateImagUp;
2989  long long int thisTask;
2990  long long int numTasks=qureg.numAmpsPerChunk>>1;
2991  long long int chunkSize=qureg.numAmpsPerChunk;
2992  long long int chunkId=qureg.chunkId;
2993 
2994  int controlBit;
2995 
2996  // set dimensions
2997  sizeHalfBlock = 1LL << targetQubit;
2998  sizeBlock = 2LL * sizeHalfBlock;
2999 
3000  // Can't use qureg.stateVec as a private OMP var
3001  qreal *stateVecReal = qureg.stateVec.real;
3002  qreal *stateVecImag = qureg.stateVec.imag;
3003 
3004 # ifdef _OPENMP
3005 # pragma omp parallel \
3006  default (none) \
3007  shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, numTasks,chunkId, \
3008  chunkSize,controlQubit,conjFac) \
3009  private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp,controlBit)
3010 # endif
3011  {
3012 # ifdef _OPENMP
3013 # pragma omp for schedule (static)
3014 # endif
3015  for (thisTask=0; thisTask<numTasks; thisTask++) {
3016  thisBlock = thisTask / sizeHalfBlock;
3017  indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
3018  indexLo = indexUp + sizeHalfBlock;
3019 
3020  controlBit = extractBit(controlQubit, indexUp+chunkId*chunkSize);
3021  if (controlBit){
3022  stateRealUp = stateVecReal[indexUp];
3023  stateImagUp = stateVecImag[indexUp];
3024 
3025  // update under +-{{0, -i}, {i, 0}}
3026  stateVecReal[indexUp] = conjFac * stateVecImag[indexLo];
3027  stateVecImag[indexUp] = conjFac * -stateVecReal[indexLo];
3028  stateVecReal[indexLo] = conjFac * -stateImagUp;
3029  stateVecImag[indexLo] = conjFac * stateRealUp;
3030  }
3031  }
3032  }
3033 }
3034 
3035 
3036 void statevec_controlledPauliYDistributed (Qureg qureg, int controlQubit,
3037  ComplexArray stateVecIn,
3038  ComplexArray stateVecOut, int conjFac)
3039 {
3040 
3041  long long int thisTask;
3042  long long int numTasks=qureg.numAmpsPerChunk;
3043  long long int chunkSize=qureg.numAmpsPerChunk;
3044  long long int chunkId=qureg.chunkId;
3045 
3046  int controlBit;
3047 
3048  qreal *stateVecRealIn=stateVecIn.real, *stateVecImagIn=stateVecIn.imag;
3049  qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
3050 
3051 # ifdef _OPENMP
3052 # pragma omp parallel \
3053  default (none) \
3054  shared (stateVecRealIn,stateVecImagIn,stateVecRealOut,stateVecImagOut, \
3055  numTasks,chunkId,chunkSize,controlQubit,conjFac) \
3056  private (thisTask,controlBit)
3057 # endif
3058  {
3059 # ifdef _OPENMP
3060 # pragma omp for schedule (static)
3061 # endif
3062  for (thisTask=0; thisTask<numTasks; thisTask++) {
3063  controlBit = extractBit (controlQubit, thisTask+chunkId*chunkSize);
3064  if (controlBit){
3065  stateVecRealOut[thisTask] = conjFac * stateVecImagIn[thisTask];
3066  stateVecImagOut[thisTask] = conjFac * -stateVecRealIn[thisTask];
3067  }
3068  }
3069  }
3070 }
3071 
3072 
3073 
3074 
3075 
3076 
3077 
3078 void statevec_hadamardLocal(Qureg qureg, int targetQubit)
3079 {
3080  long long int sizeBlock, sizeHalfBlock;
3081  long long int thisBlock, // current block
3082  indexUp,indexLo; // current index and corresponding index in lower half block
3083 
3084  qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
3085  long long int thisTask;
3086  long long int numTasks=qureg.numAmpsPerChunk>>1;
3087 
3088  // set dimensions
3089  sizeHalfBlock = 1LL << targetQubit;
3090  sizeBlock = 2LL * sizeHalfBlock;
3091 
3092  // Can't use qureg.stateVec as a private OMP var
3093  qreal *stateVecReal = qureg.stateVec.real;
3094  qreal *stateVecImag = qureg.stateVec.imag;
3095 
3096  qreal recRoot2 = 1.0/sqrt(2);
3097 
3098 # ifdef _OPENMP
3099 # pragma omp parallel \
3100  default (none) \
3101  shared (sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag, recRoot2, numTasks) \
3102  private (thisTask,thisBlock ,indexUp,indexLo, stateRealUp,stateImagUp,stateRealLo,stateImagLo)
3103 # endif
3104  {
3105 # ifdef _OPENMP
3106 # pragma omp for schedule (static)
3107 # endif
3108  for (thisTask=0; thisTask<numTasks; thisTask++) {
3109  thisBlock = thisTask / sizeHalfBlock;
3110  indexUp = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
3111  indexLo = indexUp + sizeHalfBlock;
3112 
3113  stateRealUp = stateVecReal[indexUp];
3114  stateImagUp = stateVecImag[indexUp];
3115 
3116  stateRealLo = stateVecReal[indexLo];
3117  stateImagLo = stateVecImag[indexLo];
3118 
3119  stateVecReal[indexUp] = recRoot2*(stateRealUp + stateRealLo);
3120  stateVecImag[indexUp] = recRoot2*(stateImagUp + stateImagLo);
3121 
3122  stateVecReal[indexLo] = recRoot2*(stateRealUp - stateRealLo);
3123  stateVecImag[indexLo] = recRoot2*(stateImagUp - stateImagLo);
3124  }
3125  }
3126 }
3127 
3140  ComplexArray stateVecUp,
3141  ComplexArray stateVecLo,
3142  ComplexArray stateVecOut,
3143  int updateUpper)
3144 {
3145 
3146  qreal stateRealUp,stateRealLo,stateImagUp,stateImagLo;
3147  long long int thisTask;
3148  long long int numTasks=qureg.numAmpsPerChunk;
3149 
3150  int sign;
3151  if (updateUpper) sign=1;
3152  else sign=-1;
3153 
3154  qreal recRoot2 = 1.0/sqrt(2);
3155 
3156  qreal *stateVecRealUp=stateVecUp.real, *stateVecImagUp=stateVecUp.imag;
3157  qreal *stateVecRealLo=stateVecLo.real, *stateVecImagLo=stateVecLo.imag;
3158  qreal *stateVecRealOut=stateVecOut.real, *stateVecImagOut=stateVecOut.imag;
3159 
3160 # ifdef _OPENMP
3161 # pragma omp parallel \
3162  default (none) \
3163  shared (stateVecRealUp,stateVecImagUp,stateVecRealLo,stateVecImagLo,stateVecRealOut,stateVecImagOut, \
3164  recRoot2, sign, numTasks) \
3165  private (thisTask,stateRealUp,stateImagUp,stateRealLo,stateImagLo)
3166 # endif
3167  {
3168 # ifdef _OPENMP
3169 # pragma omp for schedule (static)
3170 # endif
3171  for (thisTask=0; thisTask<numTasks; thisTask++) {
3172  // store current state vector values in temp variables
3173  stateRealUp = stateVecRealUp[thisTask];
3174  stateImagUp = stateVecImagUp[thisTask];
3175 
3176  stateRealLo = stateVecRealLo[thisTask];
3177  stateImagLo = stateVecImagLo[thisTask];
3178 
3179  stateVecRealOut[thisTask] = recRoot2*(stateRealUp + sign*stateRealLo);
3180  stateVecImagOut[thisTask] = recRoot2*(stateImagUp + sign*stateImagLo);
3181  }
3182  }
3183 }
3184 
3185 void statevec_phaseShiftByTerm (Qureg qureg, int targetQubit, Complex term)
3186 {
3187  long long int index;
3188  long long int stateVecSize;
3189  int targetBit;
3190 
3191  long long int chunkSize=qureg.numAmpsPerChunk;
3192  long long int chunkId=qureg.chunkId;
3193 
3194  // dimension of the state vector
3195  stateVecSize = qureg.numAmpsPerChunk;
3196  qreal *stateVecReal = qureg.stateVec.real;
3197  qreal *stateVecImag = qureg.stateVec.imag;
3198 
3199  qreal stateRealLo, stateImagLo;
3200  qreal cosAngle = term.real;
3201  qreal sinAngle = term.imag;
3202 
3203 # ifdef _OPENMP
3204 # pragma omp parallel for \
3205  default (none) \
3206  shared (stateVecSize, stateVecReal,stateVecImag, cosAngle,sinAngle, \
3207  chunkId,chunkSize,targetQubit) \
3208  private (index,targetBit,stateRealLo,stateImagLo) \
3209  schedule (static)
3210 # endif
3211  for (index=0; index<stateVecSize; index++) {
3212 
3213  // update the coeff of the |1> state of the target qubit
3214  targetBit = extractBit (targetQubit, index+chunkId*chunkSize);
3215  if (targetBit) {
3216 
3217  stateRealLo = stateVecReal[index];
3218  stateImagLo = stateVecImag[index];
3219 
3220  stateVecReal[index] = cosAngle*stateRealLo - sinAngle*stateImagLo;
3221  stateVecImag[index] = sinAngle*stateRealLo + cosAngle*stateImagLo;
3222  }
3223  }
3224 }
3225 
3226 void statevec_controlledPhaseShift (Qureg qureg, int idQubit1, int idQubit2, qreal angle)
3227 {
3228  long long int index;
3229  long long int stateVecSize;
3230  int bit1, bit2;
3231 
3232  long long int chunkSize=qureg.numAmpsPerChunk;
3233  long long int chunkId=qureg.chunkId;
3234 
3235  // dimension of the state vector
3236  stateVecSize = qureg.numAmpsPerChunk;
3237  qreal *stateVecReal = qureg.stateVec.real;
3238  qreal *stateVecImag = qureg.stateVec.imag;
3239 
3240  qreal stateRealLo, stateImagLo;
3241  qreal cosAngle = cos(angle);
3242  qreal sinAngle = sin(angle);
3243 
3244 # ifdef _OPENMP
3245 # pragma omp parallel for \
3246  default (none) \
3247  shared (stateVecSize, stateVecReal,stateVecImag, chunkId,chunkSize, \
3248  idQubit1,idQubit2,cosAngle,sinAngle ) \
3249  private (index,bit1,bit2,stateRealLo,stateImagLo) \
3250  schedule (static)
3251 # endif
3252  for (index=0; index<stateVecSize; index++) {
3253  bit1 = extractBit (idQubit1, index+chunkId*chunkSize);
3254  bit2 = extractBit (idQubit2, index+chunkId*chunkSize);
3255  if (bit1 && bit2) {
3256 
3257  stateRealLo = stateVecReal[index];
3258  stateImagLo = stateVecImag[index];
3259 
3260  stateVecReal[index] = cosAngle*stateRealLo - sinAngle*stateImagLo;
3261  stateVecImag[index] = sinAngle*stateRealLo + cosAngle*stateImagLo;
3262  }
3263  }
3264 }
3265 
3266 void statevec_multiControlledPhaseShift(Qureg qureg, int *controlQubits, int numControlQubits, qreal angle)
3267 {
3268  long long int index;
3269  long long int stateVecSize;
3270 
3271  long long int chunkSize=qureg.numAmpsPerChunk;
3272  long long int chunkId=qureg.chunkId;
3273 
3274  long long int mask = getQubitBitMask(controlQubits, numControlQubits);
3275 
3276  stateVecSize = qureg.numAmpsPerChunk;
3277  qreal *stateVecReal = qureg.stateVec.real;
3278  qreal *stateVecImag = qureg.stateVec.imag;
3279 
3280  qreal stateRealLo, stateImagLo;
3281  qreal cosAngle = cos(angle);
3282  qreal sinAngle = sin(angle);
3283 
3284 # ifdef _OPENMP
3285 # pragma omp parallel \
3286  default (none) \
3287  shared (stateVecSize, stateVecReal, stateVecImag, mask, chunkId,chunkSize,cosAngle,sinAngle) \
3288  private (index, stateRealLo, stateImagLo)
3289 # endif
3290  {
3291 # ifdef _OPENMP
3292 # pragma omp for schedule (static)
3293 # endif
3294  for (index=0; index<stateVecSize; index++) {
3295  if (mask == (mask & (index+chunkId*chunkSize)) ){
3296 
3297  stateRealLo = stateVecReal[index];
3298  stateImagLo = stateVecImag[index];
3299 
3300  stateVecReal[index] = cosAngle*stateRealLo - sinAngle*stateImagLo;
3301  stateVecImag[index] = sinAngle*stateRealLo + cosAngle*stateImagLo;
3302  }
3303  }
3304  }
3305 }
3306 
3307 int getBitMaskParity(long long int mask) {
3308  int parity = 0;
3309  while (mask) {
3310  parity = !parity;
3311  mask = mask & (mask-1);
3312  }
3313  return parity;
3314 }
3315 
3316 void statevec_multiRotateZ(Qureg qureg, long long int mask, qreal angle)
3317 {
3318  long long int index;
3319  long long int stateVecSize;
3320 
3321  long long int chunkSize=qureg.numAmpsPerChunk;
3322  long long int chunkId=qureg.chunkId;
3323 
3324  stateVecSize = qureg.numAmpsPerChunk;
3325  qreal *stateVecReal = qureg.stateVec.real;
3326  qreal *stateVecImag = qureg.stateVec.imag;
3327 
3328  qreal stateReal, stateImag;
3329  qreal cosAngle = cos(angle/2.0);
3330  qreal sinAngle = sin(angle/2.0);
3331 
3332  // = +-1, to flip sinAngle based on target qubit parity, to effect
3333  // exp(-angle/2 i fac_j)|j>
3334  int fac;
3335 
3336 # ifdef _OPENMP
3337 # pragma omp parallel \
3338  default (none) \
3339  shared (stateVecSize, stateVecReal, stateVecImag, mask, chunkId,chunkSize,cosAngle,sinAngle) \
3340  private (index, fac, stateReal, stateImag)
3341 # endif
3342  {
3343 # ifdef _OPENMP
3344 # pragma omp for schedule (static)
3345 # endif
3346  for (index=0; index<stateVecSize; index++) {
3347  stateReal = stateVecReal[index];
3348  stateImag = stateVecImag[index];
3349 
3350  // odd-parity target qubits get fac_j = -1
3351  fac = getBitMaskParity(mask & (index+chunkId*chunkSize))? -1 : 1;
3352  stateVecReal[index] = cosAngle*stateReal + fac * sinAngle*stateImag;
3353  stateVecImag[index] = - fac * sinAngle*stateReal + cosAngle*stateImag;
3354  }
3355  }
3356 }
3357 
3358 void statevec_multiControlledMultiRotateZ(Qureg qureg, long long int ctrlMask, long long int targMask, qreal angle)
3359 {
3360  long long int offset = qureg.chunkId * qureg.numAmpsPerChunk;
3361 
3362  long long int stateVecSize = qureg.numAmpsPerChunk;
3363  qreal *stateVecReal = qureg.stateVec.real;
3364  qreal *stateVecImag = qureg.stateVec.imag;
3365 
3366  qreal stateReal, stateImag;
3367  qreal cosAngle = cos(angle/2.0);
3368  qreal sinAngle = sin(angle/2.0);
3369 
3370  // = +-1, to flip sinAngle based on target qubit parity, to effect
3371  // exp(-angle/2 i fac_j)|j>
3372  int fac;
3373  long long int index, globalIndex;
3374 
3375 # ifdef _OPENMP
3376 # pragma omp parallel \
3377  default (none) \
3378  shared (offset, stateVecSize, stateVecReal,stateVecImag, ctrlMask,targMask, cosAngle,sinAngle) \
3379  private (index,globalIndex, fac, stateReal,stateImag)
3380 # endif
3381  {
3382 # ifdef _OPENMP
3383 # pragma omp for schedule (static)
3384 # endif
3385  for (index=0; index<stateVecSize; index++) {
3386  stateReal = stateVecReal[index];
3387  stateImag = stateVecImag[index];
3388 
3389  // states with not-all-one control qubits are unmodified
3390  globalIndex = index + offset;
3391  if (ctrlMask && ((ctrlMask & globalIndex) != ctrlMask))
3392  continue;
3393 
3394  // odd-parity target qubits get fac_j = -1 (avoid thread divergence)
3395  fac = 1-2*getBitMaskParity(targMask & globalIndex);
3396  stateVecReal[index] = cosAngle*stateReal + fac * sinAngle*stateImag;
3397  stateVecImag[index] = - fac * sinAngle*stateReal + cosAngle*stateImag;
3398  }
3399  }
3400 }
3401 
3403 
3404  // computes first local index containing a diagonal element
3405  long long int localNumAmps = qureg.numAmpsPerChunk;
3406  long long int densityDim = (1LL << qureg.numQubitsRepresented);
3407  long long int diagSpacing = 1LL + densityDim;
3408  long long int maxNumDiagsPerChunk = 1 + localNumAmps / diagSpacing;
3409  long long int numPrevDiags = (qureg.chunkId>0)? 1+(qureg.chunkId*localNumAmps)/diagSpacing : 0;
3410  long long int globalIndNextDiag = diagSpacing * numPrevDiags;
3411  long long int localIndNextDiag = globalIndNextDiag % localNumAmps;
3412 
3413  // computes how many diagonals are contained in this chunk
3414  long long int numDiagsInThisChunk = maxNumDiagsPerChunk;
3415  if (localIndNextDiag + (numDiagsInThisChunk-1)*diagSpacing >= localNumAmps)
3416  numDiagsInThisChunk -= 1;
3417 
3418  long long int visitedDiags; // number of visited diagonals in this chunk so far
3419  long long int basisStateInd; // current diagonal index being considered
3420  long long int index; // index in the local chunk
3421 
3422  qreal zeroProb = 0;
3423  qreal *stateVecReal = qureg.stateVec.real;
3424 
3425 # ifdef _OPENMP
3426 # pragma omp parallel \
3427  shared (localIndNextDiag, numPrevDiags, diagSpacing, stateVecReal, numDiagsInThisChunk) \
3428  private (visitedDiags, basisStateInd, index) \
3429  reduction ( +:zeroProb )
3430 # endif
3431  {
3432 # ifdef _OPENMP
3433 # pragma omp for schedule (static)
3434 # endif
3435  // sums the diagonal elems of the density matrix where measureQubit=0
3436  for (visitedDiags = 0; visitedDiags < numDiagsInThisChunk; visitedDiags++) {
3437 
3438  basisStateInd = numPrevDiags + visitedDiags;
3439  index = localIndNextDiag + diagSpacing * visitedDiags;
3440 
3441  if (extractBit(measureQubit, basisStateInd) == 0)
3442  zeroProb += stateVecReal[index]; // assume imag[diagonls] ~ 0
3443 
3444  }
3445  }
3446 
3447  return zeroProb;
3448 }
3449 
3458  int measureQubit)
3459 {
3460  // ----- sizes
3461  long long int sizeBlock, // size of blocks
3462  sizeHalfBlock; // size of blocks halved
3463  // ----- indices
3464  long long int thisBlock, // current block
3465  index; // current index for first half block
3466  // ----- measured probability
3467  qreal totalProbability; // probability (returned) value
3468  // ----- temp variables
3469  long long int thisTask;
3470  long long int numTasks=qureg.numAmpsPerChunk>>1;
3471 
3472  // ---------------------------------------------------------------- //
3473  // dimensions //
3474  // ---------------------------------------------------------------- //
3475  sizeHalfBlock = 1LL << (measureQubit); // number of state vector elements to sum,
3476  // and then the number to skip
3477  sizeBlock = 2LL * sizeHalfBlock; // size of blocks (pairs of measure and skip entries)
3478 
3479  // initialise returned value
3480  totalProbability = 0.0;
3481 
3482  qreal *stateVecReal = qureg.stateVec.real;
3483  qreal *stateVecImag = qureg.stateVec.imag;
3484 
3485 # ifdef _OPENMP
3486 # pragma omp parallel \
3487  shared (numTasks,sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag) \
3488  private (thisTask,thisBlock,index) \
3489  reduction ( +:totalProbability )
3490 # endif
3491  {
3492 # ifdef _OPENMP
3493 # pragma omp for schedule (static)
3494 # endif
3495  for (thisTask=0; thisTask<numTasks; thisTask++) {
3496  thisBlock = thisTask / sizeHalfBlock;
3497  index = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
3498 
3499  totalProbability += stateVecReal[index]*stateVecReal[index]
3500  + stateVecImag[index]*stateVecImag[index];
3501  }
3502  }
3503  return totalProbability;
3504 }
3505 
3514  // ----- measured probability
3515  qreal totalProbability; // probability (returned) value
3516  // ----- temp variables
3517  long long int thisTask; // task based approach for expose loop with small granularity
3518  long long int numTasks=qureg.numAmpsPerChunk;
3519 
3520  // ---------------------------------------------------------------- //
3521  // find probability //
3522  // ---------------------------------------------------------------- //
3523 
3524  // initialise returned value
3525  totalProbability = 0.0;
3526 
3527  qreal *stateVecReal = qureg.stateVec.real;
3528  qreal *stateVecImag = qureg.stateVec.imag;
3529 
3530 # ifdef _OPENMP
3531 # pragma omp parallel \
3532  shared (numTasks,stateVecReal,stateVecImag) \
3533  private (thisTask) \
3534  reduction ( +:totalProbability )
3535 # endif
3536  {
3537 # ifdef _OPENMP
3538 # pragma omp for schedule (static)
3539 # endif
3540  for (thisTask=0; thisTask<numTasks; thisTask++) {
3541  totalProbability += stateVecReal[thisTask]*stateVecReal[thisTask]
3542  + stateVecImag[thisTask]*stateVecImag[thisTask];
3543  }
3544  }
3545 
3546  return totalProbability;
3547 }
3548 
3549 void statevec_calcProbOfAllOutcomesLocal(qreal* outcomeProbs, Qureg qureg, int* qubits, int numQubits) {
3550 
3551  /* Below, we manually reduce amplitudes into outcomeProbs by using atomic update.
3552  * This maintains OpenMP 3.1 compatibility. An alternative is to use array reduction
3553  * (requires OpenMP 4.5, limits #qubits since outcomeProbs must be a local stack array)
3554  * or a dynamic list of omp locks (duplicates memory cost of outcomeProbs).
3555  * Using locks was always slower than the method below. Using reduction was only
3556  * faster for very few threads, or very few outcomeProbs.
3557  * Finally, we exclude the 'update' clause after 'atomic' to maintain MSVC compatibility
3558  */
3559 
3560  long long int numOutcomeProbs = (1 << numQubits);
3561  long long int j;
3562 
3563  // clear outcomeProbs (in parallel, in case it's large)
3564 # ifdef _OPENMP
3565 # pragma omp parallel \
3566  default (none) \
3567  shared (numOutcomeProbs,outcomeProbs) \
3568  private (j)
3569 # endif
3570  {
3571 # ifdef _OPENMP
3572 # pragma omp for schedule (static)
3573 # endif
3574  for (j=0; j<numOutcomeProbs; j++)
3575  outcomeProbs[j] = 0;
3576  }
3577 
3578  long long int numTasks = qureg.numAmpsPerChunk;
3579  long long int offset = qureg.chunkId*qureg.numAmpsPerChunk;
3580  qreal* stateRe = qureg.stateVec.real;
3581  qreal* stateIm = qureg.stateVec.imag;
3582 
3583  long long int i;
3584  long long int outcomeInd;
3585  int q;
3586  qreal prob;
3587 
3588 # ifdef _OPENMP
3589 # pragma omp parallel \
3590  shared (numTasks,offset, qubits,numQubits, stateRe,stateIm, outcomeProbs) \
3591  private (i, q, outcomeInd, prob)
3592 # endif
3593  {
3594 # ifdef _OPENMP
3595 # pragma omp for schedule (static)
3596 # endif
3597  // every amplitude contributes to a single element of retProbs
3598  for (i=0; i<numTasks; i++) {
3599 
3600  // determine index informed by qubits outcome
3601  outcomeInd = 0;
3602  for (q=0; q<numQubits; q++)
3603  outcomeInd += extractBit(qubits[q], i + offset) * (1LL << q);
3604 
3605  prob = stateRe[i]*stateRe[i] + stateIm[i]*stateIm[i];
3606 
3607  // atomicly update corresponding outcome array element
3608  # ifdef _OPENMP
3609  # pragma omp atomic
3610  # endif
3611  outcomeProbs[outcomeInd] += prob;
3612  }
3613  }
3614 }
3615 
3616 void densmatr_calcProbOfAllOutcomesLocal(qreal* outcomeProbs, Qureg qureg, int* qubits, int numQubits) {
3617 
3618  // clear outcomeProbs (in parallel, in case it's large)
3619  long long int numOutcomeProbs = (1 << numQubits);
3620  long long int j;
3621 
3622 # ifdef _OPENMP
3623 # pragma omp parallel \
3624  default (none) \
3625  shared (numOutcomeProbs,outcomeProbs) \
3626  private (j)
3627 # endif
3628  {
3629 # ifdef _OPENMP
3630 # pragma omp for schedule (static)
3631 # endif
3632  for (j=0; j<numOutcomeProbs; j++)
3633  outcomeProbs[j] = 0;
3634  }
3635 
3636  // compute first local index containing a diagonal element
3637  long long int localNumAmps = qureg.numAmpsPerChunk;
3638  long long int densityDim = (1LL << qureg.numQubitsRepresented);
3639  long long int diagSpacing = 1LL + densityDim;
3640  long long int maxNumDiagsPerChunk = 1 + localNumAmps / diagSpacing;
3641  long long int numPrevDiags = (qureg.chunkId>0)? 1+(qureg.chunkId*localNumAmps)/diagSpacing : 0;
3642  long long int globalIndNextDiag = diagSpacing * numPrevDiags;
3643  long long int localIndNextDiag = globalIndNextDiag % localNumAmps;
3644 
3645  // computes how many diagonals are contained in this chunk
3646  long long int numDiagsInThisChunk = maxNumDiagsPerChunk;
3647  if (localIndNextDiag + (numDiagsInThisChunk-1)*diagSpacing >= localNumAmps)
3648  numDiagsInThisChunk -= 1;
3649 
3650  long long int visitedDiags; // number of visited diagonals in this chunk so far
3651  long long int basisStateInd; // current diagonal index being considered
3652  long long int index; // index in the local chunk
3653 
3654  int q;
3655  long long int outcomeInd;
3656  qreal *stateRe = qureg.stateVec.real;
3657 
3658 # ifdef _OPENMP
3659 # pragma omp parallel \
3660  shared (localIndNextDiag, numPrevDiags, diagSpacing, stateRe, numDiagsInThisChunk, qubits,numQubits, outcomeProbs) \
3661  private (visitedDiags, basisStateInd, index, q,outcomeInd)
3662 # endif
3663  {
3664 # ifdef _OPENMP
3665 # pragma omp for schedule (static)
3666 # endif
3667  // sums the diagonal elems of the density matrix where measureQubit=0
3668  for (visitedDiags = 0; visitedDiags < numDiagsInThisChunk; visitedDiags++) {
3669 
3670  basisStateInd = numPrevDiags + visitedDiags;
3671  index = localIndNextDiag + diagSpacing * visitedDiags;
3672 
3673  // determine outcome implied by basisStateInd
3674  outcomeInd = 0;
3675  for (q=0; q<numQubits; q++)
3676  outcomeInd += extractBit(qubits[q], basisStateInd) * (1LL << q);
3677 
3678  // atomicly update corresponding outcome array element
3679  # ifdef _OPENMP
3680  # pragma omp atomic
3681  # endif
3682  outcomeProbs[outcomeInd] += stateRe[index];
3683  }
3684  }
3685 }
3686 
3687 void statevec_controlledPhaseFlip (Qureg qureg, int idQubit1, int idQubit2)
3688 {
3689  long long int index;
3690  long long int stateVecSize;
3691  int bit1, bit2;
3692 
3693  long long int chunkSize=qureg.numAmpsPerChunk;
3694  long long int chunkId=qureg.chunkId;
3695 
3696  // dimension of the state vector
3697  stateVecSize = qureg.numAmpsPerChunk;
3698  qreal *stateVecReal = qureg.stateVec.real;
3699  qreal *stateVecImag = qureg.stateVec.imag;
3700 
3701 # ifdef _OPENMP
3702 # pragma omp parallel for \
3703  default (none) \
3704  shared (stateVecSize, stateVecReal,stateVecImag, chunkId,chunkSize,idQubit1,idQubit2 ) \
3705  private (index,bit1,bit2) \
3706  schedule (static)
3707 # endif
3708  for (index=0; index<stateVecSize; index++) {
3709  bit1 = extractBit (idQubit1, index+chunkId*chunkSize);
3710  bit2 = extractBit (idQubit2, index+chunkId*chunkSize);
3711  if (bit1 && bit2) {
3712  stateVecReal [index] = - stateVecReal [index];
3713  stateVecImag [index] = - stateVecImag [index];
3714  }
3715  }
3716 }
3717 
3718 void statevec_multiControlledPhaseFlip(Qureg qureg, int *controlQubits, int numControlQubits)
3719 {
3720  long long int index;
3721  long long int stateVecSize;
3722 
3723  long long int chunkSize=qureg.numAmpsPerChunk;
3724  long long int chunkId=qureg.chunkId;
3725 
3726  long long int mask = getQubitBitMask(controlQubits, numControlQubits);
3727 
3728  stateVecSize = qureg.numAmpsPerChunk;
3729  qreal *stateVecReal = qureg.stateVec.real;
3730  qreal *stateVecImag = qureg.stateVec.imag;
3731 
3732 # ifdef _OPENMP
3733 # pragma omp parallel \
3734  default (none) \
3735  shared (stateVecSize, stateVecReal,stateVecImag, mask, chunkId,chunkSize ) \
3736  private (index)
3737 # endif
3738  {
3739 # ifdef _OPENMP
3740 # pragma omp for schedule (static)
3741 # endif
3742  for (index=0; index<stateVecSize; index++) {
3743  if (mask == (mask & (index+chunkId*chunkSize)) ){
3744  stateVecReal [index] = - stateVecReal [index];
3745  stateVecImag [index] = - stateVecImag [index];
3746  }
3747  }
3748  }
3749 }
3750 
3767 void statevec_collapseToKnownProbOutcomeLocal(Qureg qureg, int measureQubit, int outcome, qreal totalProbability)
3768 {
3769  // ----- sizes
3770  long long int sizeBlock, // size of blocks
3771  sizeHalfBlock; // size of blocks halved
3772  // ----- indices
3773  long long int thisBlock, // current block
3774  index; // current index for first half block
3775  // ----- measured probability
3776  qreal renorm; // probability (returned) value
3777  // ----- temp variables
3778  long long int thisTask; // task based approach for expose loop with small granularity
3779  // (good for shared memory parallelism)
3780  long long int numTasks=qureg.numAmpsPerChunk>>1;
3781 
3782  // ---------------------------------------------------------------- //
3783  // dimensions //
3784  // ---------------------------------------------------------------- //
3785  sizeHalfBlock = 1LL << (measureQubit); // number of state vector elements to sum,
3786  // and then the number to skip
3787  sizeBlock = 2LL * sizeHalfBlock; // size of blocks (pairs of measure and skip entries)
3788 
3789  renorm=1/sqrt(totalProbability);
3790  qreal *stateVecReal = qureg.stateVec.real;
3791  qreal *stateVecImag = qureg.stateVec.imag;
3792 
3793 
3794 # ifdef _OPENMP
3795 # pragma omp parallel \
3796  default (none) \
3797  shared (numTasks,sizeBlock,sizeHalfBlock, stateVecReal,stateVecImag,renorm,outcome) \
3798  private (thisTask,thisBlock,index)
3799 # endif
3800  {
3801  if (outcome==0){
3802  // measure qubit is 0
3803 # ifdef _OPENMP
3804 # pragma omp for schedule (static)
3805 # endif
3806  for (thisTask=0; thisTask<numTasks; thisTask++) {
3807  thisBlock = thisTask / sizeHalfBlock;
3808  index = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
3809  stateVecReal[index]=stateVecReal[index]*renorm;
3810  stateVecImag[index]=stateVecImag[index]*renorm;
3811 
3812  stateVecReal[index+sizeHalfBlock]=0;
3813  stateVecImag[index+sizeHalfBlock]=0;
3814  }
3815  } else {
3816  // measure qubit is 1
3817 # ifdef _OPENMP
3818 # pragma omp for schedule (static)
3819 # endif
3820  for (thisTask=0; thisTask<numTasks; thisTask++) {
3821  thisBlock = thisTask / sizeHalfBlock;
3822  index = thisBlock*sizeBlock + thisTask%sizeHalfBlock;
3823  stateVecReal[index]=0;
3824  stateVecImag[index]=0;
3825 
3826  stateVecReal[index+sizeHalfBlock]=stateVecReal[index+sizeHalfBlock]*renorm;
3827  stateVecImag[index+sizeHalfBlock]=stateVecImag[index+sizeHalfBlock]*renorm;
3828  }
3829  }
3830  }
3831 
3832 }
3833 
3849 void statevec_collapseToKnownProbOutcomeDistributedRenorm (Qureg qureg, int measureQubit, qreal totalProbability)
3850 {
3851  // ----- temp variables
3852  long long int thisTask;
3853  long long int numTasks=qureg.numAmpsPerChunk;
3854 
3855  qreal renorm=1/sqrt(totalProbability);
3856 
3857  qreal *stateVecReal = qureg.stateVec.real;
3858  qreal *stateVecImag = qureg.stateVec.imag;
3859 
3860 # ifdef _OPENMP
3861 # pragma omp parallel \
3862  shared (numTasks,stateVecReal,stateVecImag) \
3863  private (thisTask)
3864 # endif
3865  {
3866 # ifdef _OPENMP
3867 # pragma omp for schedule (static)
3868 # endif
3869  for (thisTask=0; thisTask<numTasks; thisTask++) {
3870  stateVecReal[thisTask] = stateVecReal[thisTask]*renorm;
3871  stateVecImag[thisTask] = stateVecImag[thisTask]*renorm;
3872  }
3873  }
3874 }
3875 
3876 /* Set all amplitudes in one chunk to 0.
3877  * Measure in Zero performs an irreversible change to the state vector: it updates the vector according
3878  * to the event that a zero have been measured on the qubit indicated by measureQubit (where
3879  * this label starts from 0, of course). It achieves this by setting all inconsistent amplitudes to 0 and
3880  * then renormalising based on the total probability of measuring measureQubit=0 or 1.
3881  * In the distributed version, one block (with measureQubit=0 in the first half of the block and
3882  * measureQubit=1 in the second half of the block) is spread over multiple chunks, meaning that each chunks performs
3883  * only renormalisation or only setting amplitudes to 0. This function handles setting amplitudes to 0.
3884  *
3885  * @param[in,out] qureg object representing the set of qubits
3886  */
3888 {
3889  // ----- temp variables
3890  long long int thisTask;
3891  long long int numTasks=qureg.numAmpsPerChunk;
3892 
3893  // ---------------------------------------------------------------- //
3894  // find probability //
3895  // ---------------------------------------------------------------- //
3896 
3897  qreal *stateVecReal = qureg.stateVec.real;
3898  qreal *stateVecImag = qureg.stateVec.imag;
3899 
3900 # ifdef _OPENMP
3901 # pragma omp parallel \
3902  shared (numTasks,stateVecReal,stateVecImag) \
3903  private (thisTask)
3904 # endif
3905  {
3906 # ifdef _OPENMP
3907 # pragma omp for schedule (static)
3908 # endif
3909  for (thisTask=0; thisTask<numTasks; thisTask++) {
3910  stateVecReal[thisTask] = 0;
3911  stateVecImag[thisTask] = 0;
3912  }
3913  }
3914 }
3915 
3922 void statevec_swapQubitAmpsLocal(Qureg qureg, int qb1, int qb2) {
3923 
3924  // can't use qureg.stateVec as a private OMP var
3925  qreal *reVec = qureg.stateVec.real;
3926  qreal *imVec = qureg.stateVec.imag;
3927 
3928  long long int numTasks = qureg.numAmpsPerChunk >> 2; // each iteration updates 2 amps and skips 2 amps
3929  long long int thisTask;
3930  long long int ind00, ind01, ind10;
3931  qreal re01, re10;
3932  qreal im01, im10;
3933 
3934 # ifdef _OPENMP
3935 # pragma omp parallel \
3936  default (none) \
3937  shared (reVec,imVec,numTasks,qb1,qb2) \
3938  private (thisTask, ind00,ind01,ind10, re01,re10, im01,im10)
3939 # endif
3940  {
3941 # ifdef _OPENMP
3942 # pragma omp for schedule (static)
3943 # endif
3944  for (thisTask=0; thisTask<numTasks; thisTask++) {
3945  // determine ind00 of |..0..0..>, |..0..1..> and |..1..0..>
3946  ind00 = insertTwoZeroBits(thisTask, qb1, qb2);
3947  ind01 = flipBit(ind00, qb1);
3948  ind10 = flipBit(ind00, qb2);
3949 
3950  // extract statevec amplitudes
3951  re01 = reVec[ind01]; im01 = imVec[ind01];
3952  re10 = reVec[ind10]; im10 = imVec[ind10];
3953 
3954  // swap 01 and 10 amps
3955  reVec[ind01] = re10; reVec[ind10] = re01;
3956  imVec[ind01] = im10; imVec[ind10] = im01;
3957  }
3958  }
3959 }
3960 
3965 void statevec_swapQubitAmpsDistributed(Qureg qureg, int pairRank, int qb1, int qb2) {
3966 
3967  // can't use qureg.stateVec as a private OMP var
3968  qreal *reVec = qureg.stateVec.real;
3969  qreal *imVec = qureg.stateVec.imag;
3970  qreal *rePairVec = qureg.pairStateVec.real;
3971  qreal *imPairVec = qureg.pairStateVec.imag;
3972 
3973  long long int numLocalAmps = qureg.numAmpsPerChunk;
3974  long long int globalStartInd = qureg.chunkId * numLocalAmps;
3975  long long int pairGlobalStartInd = pairRank * numLocalAmps;
3976 
3977  long long int localInd, globalInd;
3978  long long int pairLocalInd, pairGlobalInd;
3979 
3980 # ifdef _OPENMP
3981 # pragma omp parallel \
3982  default (none) \
3983  shared (reVec,imVec,rePairVec,imPairVec,numLocalAmps,globalStartInd,pairGlobalStartInd,qb1,qb2) \
3984  private (localInd,globalInd, pairLocalInd,pairGlobalInd)
3985 # endif
3986  {
3987 # ifdef _OPENMP
3988 # pragma omp for schedule (static)
3989 # endif
3990  for (localInd=0; localInd < numLocalAmps; localInd++) {
3991 
3992  globalInd = globalStartInd + localInd;
3993  if (isOddParity(globalInd, qb1, qb2)) {
3994 
3995  pairGlobalInd = flipBit(flipBit(globalInd, qb1), qb2);
3996  pairLocalInd = pairGlobalInd - pairGlobalStartInd;
3997 
3998  reVec[localInd] = rePairVec[pairLocalInd];
3999  imVec[localInd] = imPairVec[pairLocalInd];
4000  }
4001  }
4002  }
4003 }
4004 
4005 void statevec_setWeightedQureg(Complex fac1, Qureg qureg1, Complex fac2, Qureg qureg2, Complex facOut, Qureg out) {
4006 
4007  long long int numAmps = qureg1.numAmpsPerChunk;
4008 
4009  qreal *vecRe1 = qureg1.stateVec.real;
4010  qreal *vecIm1 = qureg1.stateVec.imag;
4011  qreal *vecRe2 = qureg2.stateVec.real;
4012  qreal *vecIm2 = qureg2.stateVec.imag;
4013  qreal *vecReOut = out.stateVec.real;
4014  qreal *vecImOut = out.stateVec.imag;
4015 
4016  qreal facRe1 = fac1.real;
4017  qreal facIm1 = fac1.imag;
4018  qreal facRe2 = fac2.real;
4019  qreal facIm2 = fac2.imag;
4020  qreal facReOut = facOut.real;
4021  qreal facImOut = facOut.imag;
4022 
4023  qreal re1,im1, re2,im2, reOut,imOut;
4024  long long int index;
4025 
4026 # ifdef _OPENMP
4027 # pragma omp parallel \
4028  shared (vecRe1,vecIm1, vecRe2,vecIm2, vecReOut,vecImOut, facRe1,facIm1,facRe2,facIm2, numAmps) \
4029  private (index, re1,im1, re2,im2, reOut,imOut)
4030 # endif
4031  {
4032 # ifdef _OPENMP
4033 # pragma omp for schedule (static)
4034 # endif
4035  for (index=0LL; index<numAmps; index++) {
4036  re1 = vecRe1[index]; im1 = vecIm1[index];
4037  re2 = vecRe2[index]; im2 = vecIm2[index];
4038  reOut = vecReOut[index];
4039  imOut = vecImOut[index];
4040 
4041  vecReOut[index] = (facReOut*reOut - facImOut*imOut) + (facRe1*re1 - facIm1*im1) + (facRe2*re2 - facIm2*im2);
4042  vecImOut[index] = (facReOut*imOut + facImOut*reOut) + (facRe1*im1 + facIm1*re1) + (facRe2*im2 + facIm2*re2);
4043  }
4044  }
4045 }
4046 
4048 
4049  // each node/chunk modifies only its values in an embarrassingly parallelisable way
4050  long long int numAmps = qureg.numAmpsPerChunk;
4051 
4052  qreal* stateRe = qureg.stateVec.real;
4053  qreal* stateIm = qureg.stateVec.imag;
4054  qreal* opRe = op.real;
4055  qreal* opIm = op.imag;
4056 
4057  qreal a,b,c,d;
4058  long long int index;
4059 
4060 # ifdef _OPENMP
4061 # pragma omp parallel \
4062  shared (stateRe,stateIm, opRe,opIm, numAmps) \
4063  private (index, a,b,c,d)
4064 # endif
4065  {
4066 # ifdef _OPENMP
4067 # pragma omp for schedule (static)
4068 # endif
4069  for (index=0LL; index<numAmps; index++) {
4070  a = stateRe[index];
4071  b = stateIm[index];
4072  c = opRe[index];
4073  d = opIm[index];
4074 
4075  // (a + b i)(c + d i) = (a c - b d) + i (a d + b c)
4076  stateRe[index] = a*c - b*d;
4077  stateIm[index] = a*d + b*c;
4078  }
4079  }
4080 }
4081 
4083 
4084  /* ALL values of op are pre-loaded into qureg.pairStateVector (on every node).
4085  * Furthermore, since it's gauranteed each node contains an integer number of
4086  * columns of qureg (because op upperlimits the number of nodes; 1 per element),
4087  * then we know iteration below begins at the 'top' of a column, and there is
4088  * no offset for op (pairStateVector)
4089  */
4090 
4091  long long int numAmps = qureg.numAmpsPerChunk;
4092  int opDim = (1 << op.numQubits);
4093 
4094  qreal* stateRe = qureg.stateVec.real;
4095  qreal* stateIm = qureg.stateVec.imag;
4096  qreal* opRe = qureg.pairStateVec.real;
4097  qreal* opIm = qureg.pairStateVec.imag;
4098 
4099  qreal a,b,c,d;
4100  long long int index;
4101 
4102 # ifdef _OPENMP
4103 # pragma omp parallel \
4104  shared (stateRe,stateIm, opRe,opIm, numAmps,opDim) \
4105  private (index, a,b,c,d)
4106 # endif
4107  {
4108 # ifdef _OPENMP
4109 # pragma omp for schedule (static)
4110 # endif
4111  for (index=0LL; index<numAmps; index++) {
4112  a = stateRe[index];
4113  b = stateIm[index];
4114  c = opRe[index % opDim];
4115  d = opIm[index % opDim];
4116 
4117  // (a + b i)(c + d i) = (a c - b d) + i (a d + b c)
4118  stateRe[index] = a*c - b*d;
4119  stateIm[index] = a*d + b*c;
4120  }
4121  }
4122 }
4123 
4125 
4126  qreal expecRe = 0;
4127  qreal expecIm = 0;
4128 
4129  long long int index;
4130  long long int numAmps = qureg.numAmpsPerChunk;
4131  qreal *stateReal = qureg.stateVec.real;
4132  qreal *stateImag = qureg.stateVec.imag;
4133  qreal *opReal = op.real;
4134  qreal *opImag = op.imag;
4135 
4136  qreal vecRe,vecIm,vecAbs, opRe, opIm;
4137 
4138 # ifdef _OPENMP
4139 # pragma omp parallel \
4140  shared (stateReal, stateImag, opReal, opImag, numAmps) \
4141  private (index, vecRe,vecIm,vecAbs, opRe,opIm) \
4142  reduction ( +:expecRe, expecIm )
4143 # endif
4144  {
4145 # ifdef _OPENMP
4146 # pragma omp for schedule (static)
4147 # endif
4148  for (index=0; index < numAmps; index++) {
4149  vecRe = stateReal[index];
4150  vecIm = stateImag[index];
4151  opRe = opReal[index];
4152  opIm = opImag[index];
4153 
4154  // abs(vec)^2 op
4155  vecAbs = vecRe*vecRe + vecIm*vecIm;
4156  expecRe += vecAbs*opRe;
4157  expecIm += vecAbs*opIm;
4158  }
4159  }
4160 
4161  Complex innerProd;
4162  innerProd.real = expecRe;
4163  innerProd.imag = expecIm;
4164  return innerProd;
4165 }
4166 
4168 
4169  /* since for every 1 element in \p op, there exists a column in \p qureg,
4170  * we know that the elements in \p op live on the same node as the
4171  * corresponding diagonal elements of \p qureg. This means, the problem is
4172  * embarrassingly parallelisable, and the code below works for both
4173  * serial and distributed modes.
4174  */
4175 
4176  // computes first local index containing a diagonal element
4177  long long int diagSpacing = 1LL + (1LL << qureg.numQubitsRepresented);
4178  long long int numPrevDiags = (qureg.chunkId>0)? 1+(qureg.chunkId*qureg.numAmpsPerChunk)/diagSpacing : 0;
4179  long long int globalIndNextDiag = diagSpacing * numPrevDiags;
4180  long long int localIndNextDiag = globalIndNextDiag % qureg.numAmpsPerChunk;
4181  long long int numAmps = qureg.numAmpsPerChunk;
4182 
4183  qreal* stateReal = qureg.stateVec.real;
4184  qreal* stateImag = qureg.stateVec.imag;
4185  qreal* opReal = op.real;
4186  qreal* opImag = op.imag;
4187 
4188  qreal expecRe = 0;
4189  qreal expecIm = 0;
4190 
4191  long long int stateInd;
4192  long long int opInd;
4193  qreal matRe, matIm, opRe, opIm;
4194 
4195  // visits every diagonal element with global index (2^n + 1)i for i in [0, 2^n-1]
4196 
4197 # ifdef _OPENMP
4198 # pragma omp parallel \
4199  shared (stateReal,stateImag, opReal,opImag, localIndNextDiag,diagSpacing,numAmps) \
4200  private (stateInd,opInd, matRe,matIm, opRe,opIm) \
4201  reduction ( +:expecRe, expecIm )
4202 # endif
4203  {
4204 # ifdef _OPENMP
4205 # pragma omp for schedule (static)
4206 # endif
4207  for (stateInd=localIndNextDiag; stateInd < numAmps; stateInd += diagSpacing) {
4208 
4209  matRe = stateReal[stateInd];
4210  matIm = stateImag[stateInd];
4211  opInd = (stateInd - localIndNextDiag) / diagSpacing;
4212  opRe = opReal[opInd];
4213  opIm = opImag[opInd];
4214 
4215  // (matRe + matIm i)(opRe + opIm i) =
4216  // (matRe opRe - matIm opIm) + i (matRe opIm + matIm opRe)
4217  expecRe += matRe * opRe - matIm * opIm;
4218  expecIm += matRe * opIm + matIm * opRe;
4219  }
4220  }
4221 
4222  Complex expecVal;
4223  expecVal.real = expecRe;
4224  expecVal.imag = expecIm;
4225  return expecVal;
4226 }
4227 
4228 void agnostic_setDiagonalOpElems(DiagonalOp op, long long int startInd, qreal* real, qreal* imag, long long int numElems) {
4229 
4230  // local start/end indices of the given amplitudes, assuming they fit in this chunk
4231  // these may be negative or above qureg.numAmpsPerChunk
4232  long long int localStartInd = startInd - op.chunkId*op.numElemsPerChunk;
4233  long long int localEndInd = localStartInd + numElems; // exclusive
4234 
4235  // add this to a local index to get corresponding elem in reals & imags
4236  long long int offset = op.chunkId*op.numElemsPerChunk - startInd;
4237 
4238  // restrict these indices to fit into this chunk
4239  if (localStartInd < 0)
4240  localStartInd = 0;
4241  if (localEndInd > op.numElemsPerChunk)
4242  localEndInd = op.numElemsPerChunk;
4243  // they may now be out of order = no iterations
4244 
4245  // unpacking OpenMP vars
4246  long long int index;
4247  qreal* vecRe = op.real;
4248  qreal* vecIm = op.imag;
4249 
4250 # ifdef _OPENMP
4251 # pragma omp parallel \
4252  default (none) \
4253  shared (localStartInd,localEndInd, vecRe,vecIm, real,imag, offset) \
4254  private (index)
4255 # endif
4256  {
4257 # ifdef _OPENMP
4258 # pragma omp for schedule (static)
4259 # endif
4260  // iterate these local inds - this might involve no iterations
4261  for (index=localStartInd; index < localEndInd; index++) {
4262  vecRe[index] = real[index + offset];
4263  vecIm[index] = imag[index + offset];
4264  }
4265  }
4266 }
4267 
4269  Qureg qureg, int* qubits, int numQubits, enum bitEncoding encoding,
4270  qreal* coeffs, qreal* exponents, int numTerms,
4271  long long int* overrideInds, qreal* overridePhases, int numOverrides,
4272  int conj)
4273 {
4274  // each node/chunk modifies only local values in an embarrassingly parallel way
4275 
4276  // thread shared vars
4277  int chunkId = qureg.chunkId;
4278  long long int numAmps = qureg.numAmpsPerChunk;
4279  qreal* stateRe = qureg.stateVec.real;
4280  qreal* stateIm = qureg.stateVec.imag;
4281 
4282  // thread private vars
4283  long long int index, globalAmpInd, phaseInd;
4284  int i, t, q;
4285  qreal phase, c, s, re, im;
4286 
4287 # ifdef _OPENMP
4288 # pragma omp parallel \
4289  default (none) \
4290  shared (chunkId,numAmps, stateRe,stateIm, qubits,numQubits,encoding, coeffs,exponents,numTerms, overrideInds,overridePhases,numOverrides, conj) \
4291  private (index, globalAmpInd, phaseInd, i,t,q, phase, c,s,re,im)
4292 # endif
4293  {
4294 # ifdef _OPENMP
4295 # pragma omp for schedule (static)
4296 # endif
4297  for (index=0LL; index<numAmps; index++) {
4298 
4299  // determine global amplitude index
4300  globalAmpInd = chunkId * numAmps + index;
4301 
4302  // determine phase index of {qubits}
4303  phaseInd = 0LL;
4304  if (encoding == UNSIGNED) {
4305  for (q=0; q<numQubits; q++) // use significance order specified by {qubits}
4306  phaseInd += (1LL << q) * extractBit(qubits[q], globalAmpInd);
4307  }
4308  else if (encoding == TWOS_COMPLEMENT) {
4309  for (q=0; q<numQubits-1; q++) // use final qubit to indicate sign
4310  phaseInd += (1LL << q) * extractBit(qubits[q], globalAmpInd);
4311  if (extractBit(qubits[numQubits-1], globalAmpInd) == 1)
4312  phaseInd -= (1LL << (numQubits-1));
4313  }
4314 
4315  // determine if this phase index has an overriden value (i < numOverrides)
4316  for (i=0; i<numOverrides; i++)
4317  if (phaseInd == overrideInds[i])
4318  break;
4319 
4320  // determine phase from {coeffs}, {exponents} (unless overriden)
4321  phase = 0;
4322  if (i < numOverrides)
4323  phase = overridePhases[i];
4324  else
4325  for (t=0; t<numTerms; t++)
4326  phase += coeffs[t] * pow(phaseInd, exponents[t]);
4327 
4328  // negate phase to conjugate operator
4329  if (conj)
4330  phase *= -1;
4331 
4332  // modify amp to amp * exp(i phase)
4333  c = cos(phase);
4334  s = sin(phase);
4335  re = stateRe[index];
4336  im = stateIm[index];
4337 
4338  // = {re[amp] cos(phase) - im[amp] sin(phase)} + i {re[amp] sin(phase) + im[amp] cos(phase)}
4339  stateRe[index] = re*c - im*s;
4340  stateIm[index] = re*s + im*c;
4341  }
4342  }
4343 }
4344 
4346  Qureg qureg, int* qubits, int* numQubitsPerReg, int numRegs, enum bitEncoding encoding,
4347  qreal* coeffs, qreal* exponents, int* numTermsPerReg,
4348  long long int* overrideInds, qreal* overridePhases, int numOverrides,
4349  int conj)
4350 {
4351  // each node/chunk modifies only local values in an embarrassingly parallel way
4352 
4353  // note partitions of qubits, coeffs, exponents and overrideInds are stored flat
4354 
4355  // thread-shared vaes
4356  int chunkId = qureg.chunkId;
4357  long long int numAmps = qureg.numAmpsPerChunk;
4358  qreal* stateRe = qureg.stateVec.real;
4359  qreal* stateIm = qureg.stateVec.imag;
4360 
4361  // thread-private vars
4362  long long int index, globalAmpInd;
4363  int r, q, i, t, found, flatInd;
4364  qreal phase, c, s, re, im;
4365 
4366  // each thread has a private static array of length >= numRegs (private var-length is illegal)
4367  long long int phaseInds[MAX_NUM_REGS_APPLY_ARBITRARY_PHASE];
4368 
4369 # ifdef _OPENMP
4370 # pragma omp parallel \
4371  default (none) \
4372  shared (chunkId,numAmps, stateRe,stateIm, qubits,numQubitsPerReg,numRegs,encoding, coeffs,exponents,numTermsPerReg, overrideInds,overridePhases,numOverrides, conj) \
4373  private (index,globalAmpInd, r,q,i,t,flatInd, found, phaseInds,phase, c,s,re,im)
4374 # endif
4375  {
4376 # ifdef _OPENMP
4377 # pragma omp for schedule (static)
4378 # endif
4379  for (index=0LL; index<numAmps; index++) {
4380 
4381  // determine global amplitude index
4382  globalAmpInd = chunkId * numAmps + index;
4383 
4384  // determine phase indices
4385  flatInd = 0;
4386  for (r=0; r<numRegs; r++) {
4387  phaseInds[r] = 0LL;
4388 
4389  if (encoding == UNSIGNED) {
4390  for (q=0; q<numQubitsPerReg[r]; q++)
4391  phaseInds[r] += (1LL << q) * extractBit(qubits[flatInd++], globalAmpInd); // qubits[flatInd] ~ qubits[r][q]
4392  }
4393  else if (encoding == TWOS_COMPLEMENT) {
4394  for (q=0; q<numQubitsPerReg[r]-1; q++)
4395  phaseInds[r] += (1LL << q) * extractBit(qubits[flatInd++], globalAmpInd);
4396  // use final qubit to indicate sign
4397  if (extractBit(qubits[flatInd++], globalAmpInd) == 1)
4398  phaseInds[r] -= (1LL << (numQubitsPerReg[r]-1));
4399  }
4400  }
4401 
4402  // determine if this phase index has an overriden value (i < numOverrides)
4403  for (i=0; i<numOverrides; i++) {
4404  found = 1;
4405  for (r=0; r<numRegs; r++) {
4406  if (phaseInds[r] != overrideInds[i*numRegs+r]) {
4407  found = 0;
4408  break;
4409  }
4410  }
4411  if (found)
4412  break;
4413  }
4414 
4415  // compute the phase (unless overriden)
4416  phase = 0;
4417  if (i < numOverrides)
4418  phase = overridePhases[i];
4419  else {
4420  flatInd = 0;
4421  for (r=0; r<numRegs; r++) {
4422  for (t=0; t<numTermsPerReg[r]; t++) {
4423  phase += coeffs[flatInd] * pow(phaseInds[r], exponents[flatInd]);
4424  flatInd++;
4425  }
4426  }
4427  }
4428 
4429  // negate phase to conjugate operator
4430  if (conj)
4431  phase *= -1;
4432 
4433  // modify amp to amp * exp(i phase)
4434  c = cos(phase);
4435  s = sin(phase);
4436  re = stateRe[index];
4437  im = stateIm[index];
4438 
4439  // = {re[amp] cos(phase) - im[amp] sin(phase)} + i {re[amp] sin(phase) + im[amp] cos(phase)}
4440  stateRe[index] = re*c - im*s;
4441  stateIm[index] = re*s + im*c;
4442  }
4443  }
4444 }
4445 
4447  Qureg qureg, int* qubits, int* numQubitsPerReg, int numRegs, enum bitEncoding encoding,
4448  enum phaseFunc phaseFuncName,
4449  qreal* params, int numParams,
4450  long long int* overrideInds, qreal* overridePhases, int numOverrides,
4451  int conj
4452 ) {
4453  // each node/chunk modifies only local values in an embarrassingly parallel way
4454 
4455  // note partitions of qubits, overrideInds are stored flat
4456 
4457  // thread-shared vaes
4458  int chunkId = qureg.chunkId;
4459  long long int numAmps = qureg.numAmpsPerChunk;
4460  qreal* stateRe = qureg.stateVec.real;
4461  qreal* stateIm = qureg.stateVec.imag;
4462 
4463  // thread-private vars
4464  long long int index, globalAmpInd;
4465  int r, q, i, found, flatInd;
4466  qreal phase, norm, prod, dist, c, s, re, im;
4467 
4468  // each thread has a private static array of length >= numRegs (private var-length is illegal)
4469  long long int phaseInds[MAX_NUM_REGS_APPLY_ARBITRARY_PHASE];
4470 
4471 # ifdef _OPENMP
4472 # pragma omp parallel \
4473  default (none) \
4474  shared (chunkId,numAmps, stateRe,stateIm, qubits,numQubitsPerReg,numRegs,encoding, phaseFuncName,params,numParams, overrideInds,overridePhases,numOverrides, conj) \
4475  private (index,globalAmpInd, r,q,i,flatInd, found, phaseInds,phase,norm,prod,dist, c,s,re,im)
4476 # endif
4477  {
4478 # ifdef _OPENMP
4479 # pragma omp for schedule (static)
4480 # endif
4481  for (index=0LL; index<numAmps; index++) {
4482 
4483  // determine global amplitude index
4484  globalAmpInd = chunkId * numAmps + index;
4485 
4486  // determine phase indices
4487  flatInd = 0;
4488  for (r=0; r<numRegs; r++) {
4489  phaseInds[r] = 0LL;
4490 
4491  if (encoding == UNSIGNED) {
4492  for (q=0; q<numQubitsPerReg[r]; q++)
4493  phaseInds[r] += (1LL << q) * extractBit(qubits[flatInd++], globalAmpInd); // qubits[flatInd] ~ qubits[r][q]
4494  }
4495  else if (encoding == TWOS_COMPLEMENT) {
4496  for (q=0; q<numQubitsPerReg[r]-1; q++)
4497  phaseInds[r] += (1LL << q) * extractBit(qubits[flatInd++], globalAmpInd);
4498  // use final qubit to indicate sign
4499  if (extractBit(qubits[flatInd++], globalAmpInd) == 1)
4500  phaseInds[r] -= (1LL << (numQubitsPerReg[r]-1));
4501  }
4502  }
4503 
4504  // determine if this phase index has an overriden value (i < numOverrides)
4505  for (i=0; i<numOverrides; i++) {
4506  found = 1;
4507  for (r=0; r<numRegs; r++) {
4508  if (phaseInds[r] != overrideInds[i*numRegs+r]) {
4509  found = 0;
4510  break;
4511  }
4512  }
4513  if (found)
4514  break;
4515  }
4516 
4517  // compute the phase (unless overriden)
4518  phase = 0;
4519  if (i < numOverrides)
4520  phase = overridePhases[i];
4521  else {
4522  // compute norm related phases
4523  if (phaseFuncName == NORM || phaseFuncName == INVERSE_NORM ||
4524  phaseFuncName == SCALED_NORM || phaseFuncName == SCALED_INVERSE_NORM ||
4525  phaseFuncName == SCALED_INVERSE_SHIFTED_NORM) {
4526 
4527  norm = 0;
4528  if (phaseFuncName == SCALED_INVERSE_SHIFTED_NORM) {
4529  for (r=0; r<numRegs; r++)
4530  norm += (phaseInds[r] - params[2+r])*(phaseInds[r] - params[2+r]);
4531  }
4532  else
4533  for (r=0; r<numRegs; r++)
4534  norm += phaseInds[r]*phaseInds[r];
4535  norm = sqrt(norm);
4536 
4537  if (phaseFuncName == NORM)
4538  phase = norm;
4539  else if (phaseFuncName == INVERSE_NORM)
4540  phase = (norm == 0.)? params[0] : 1/norm; // smallest non-zero norm is 1
4541  else if (phaseFuncName == SCALED_NORM)
4542  phase = params[0] * norm;
4543  else if (phaseFuncName == SCALED_INVERSE_NORM || phaseFuncName == SCALED_INVERSE_SHIFTED_NORM)
4544  phase = (norm <= REAL_EPS)? params[1] : params[0] / norm; // unless shifted closer to zero
4545  }
4546  // compute product related phases
4547  else if (phaseFuncName == PRODUCT || phaseFuncName == INVERSE_PRODUCT ||
4548  phaseFuncName == SCALED_PRODUCT || phaseFuncName == SCALED_INVERSE_PRODUCT) {
4549 
4550  prod = 1;
4551  for (r=0; r<numRegs; r++)
4552  prod *= phaseInds[r];
4553 
4554  if (phaseFuncName == PRODUCT)
4555  phase = prod;
4556  else if (phaseFuncName == INVERSE_PRODUCT)
4557  phase = (prod == 0.)? params[0] : 1/prod; // smallest non-zero product norm is +- 1
4558  else if (phaseFuncName == SCALED_PRODUCT)
4559  phase = params[0] * prod;
4560  else if (phaseFuncName == SCALED_INVERSE_PRODUCT)
4561  phase = (prod == 0.)? params[1] : params[0] / prod;
4562  }
4563  // compute Euclidean distance related phases
4564  else if (phaseFuncName == DISTANCE || phaseFuncName == INVERSE_DISTANCE ||
4565  phaseFuncName == SCALED_DISTANCE || phaseFuncName == SCALED_INVERSE_DISTANCE ||
4566  phaseFuncName == SCALED_INVERSE_SHIFTED_DISTANCE) {
4567 
4568  dist = 0;
4569  if (phaseFuncName == SCALED_INVERSE_SHIFTED_DISTANCE) {
4570  for (r=0; r<numRegs; r+=2)
4571  dist += (phaseInds[r] - phaseInds[r+1] - params[2+r/2])*(phaseInds[r] - phaseInds[r+1] - params[2+r/2]);
4572  }
4573  else
4574  for (r=0; r<numRegs; r+=2)
4575  dist += (phaseInds[r+1] - phaseInds[r])*(phaseInds[r+1] - phaseInds[r]);
4576  dist = sqrt(dist);
4577 
4578  if (phaseFuncName == DISTANCE)
4579  phase = dist;
4580  else if (phaseFuncName == INVERSE_DISTANCE)
4581  phase = (dist == 0.)? params[0] : 1/dist; // smallest non-zero dist is 1
4582  else if (phaseFuncName == SCALED_DISTANCE)
4583  phase = params[0] * dist;
4584  else if (phaseFuncName == SCALED_INVERSE_DISTANCE || phaseFuncName == SCALED_INVERSE_SHIFTED_DISTANCE)
4585  phase = (dist <= REAL_EPS)? params[1] : params[0] / dist; // unless shifted closer to 0
4586  }
4587  }
4588 
4589  // negate phase to conjugate operator
4590  if (conj)
4591  phase *= -1;
4592 
4593  // modify amp to amp * exp(i phase)
4594  c = cos(phase);
4595  s = sin(phase);
4596  re = stateRe[index];
4597  im = stateIm[index];
4598 
4599  // = {re[amp] cos(phase) - im[amp] sin(phase)} + i {re[amp] sin(phase) + im[amp] cos(phase)}
4600  stateRe[index] = re*c - im*s;
4601  stateIm[index] = re*s + im*c;
4602  }
4603  }
4604 }
@ INVERSE_PRODUCT
Definition: QuEST.h:233
pauliOpType
Codes for specifying Pauli operators.
Definition: QuEST.h:96
int qsortComp(const void *a, const void *b)
Definition: QuEST_cpu.c:1908
void statevec_multiControlledMultiRotateZ(Qureg qureg, long long int ctrlMask, long long int targMask, qreal angle)
Definition: QuEST_cpu.c:3358
void copyStateFromGPU(Qureg qureg)
In GPU mode, this copies the state-vector (or density matrix) from GPU memory (qureg....
Definition: QuEST_cpu.c:45
void agnostic_initDiagonalOpFromPauliHamil(DiagonalOp op, PauliHamil hamil)
Definition: QuEST_cpu.c:1377
qreal real[4][4]
Definition: QuEST.h:177
void syncQuESTEnv(QuESTEnv env)
Guarantees that all code up to the given point has been executed on all nodes (if running in distribu...
void statevec_controlledNotLocal(Qureg qureg, int controlQubit, int targetQubit)
Definition: QuEST_cpu.c:2679
@ PAULI_Z
Definition: QuEST.h:96
void statevec_controlledPhaseShift(Qureg qureg, int idQubit1, int idQubit2, qreal angle)
Definition: QuEST_cpu.c:3226
void statevec_pauliYLocal(Qureg qureg, int targetQubit, int conjFac)
Definition: QuEST_cpu.c:2887
qreal densmatr_calcHilbertSchmidtDistanceSquaredLocal(Qureg a, Qureg b)
computes Tr((a-b) conjTrans(a-b)) = sum of abs values of (a-b)
Definition: QuEST_cpu.c:934
@ DISTANCE
Definition: QuEST.h:234
int rank
Definition: QuEST.h:364
void statevec_applyPhaseFuncOverrides(Qureg qureg, int *qubits, int numQubits, enum bitEncoding encoding, qreal *coeffs, qreal *exponents, int numTerms, long long int *overrideInds, qreal *overridePhases, int numOverrides, int conj)
Definition: QuEST_cpu.c:4268
int numChunks
The number of nodes between which the elements of this operator are split.
Definition: QuEST.h:304
void agnostic_setDiagonalOpElems(DiagonalOp op, long long int startInd, qreal *real, qreal *imag, long long int numElems)
Definition: QuEST_cpu.c:4228
void densmatr_calcProbOfAllOutcomesLocal(qreal *outcomeProbs, Qureg qureg, int *qubits, int numQubits)
Definition: QuEST_cpu.c:3616
void densmatr_initClassicalState(Qureg qureg, long long int stateInd)
Definition: QuEST_cpu.c:1126
ComplexArray pairStateVec
Temporary storage for a chunk of the state vector received from another process in the MPI version.
Definition: QuEST.h:343
void statevec_initDebugState(Qureg qureg)
Initialise the state vector of probability amplitudes to an (unphysical) state with each component of...
Definition: QuEST_cpu.c:1657
qreal statevec_findProbabilityOfZeroDistributed(Qureg qureg)
Measure the probability of a specified qubit being in the zero state across all amplitudes held in th...
Definition: QuEST_cpu.c:3513
void densmatr_mixDepolarisingDistributed(Qureg qureg, int targetQubit, qreal depolLevel)
Definition: QuEST_cpu.c:230
void densmatr_mixTwoQubitDepolarisingQ1LocalQ2DistributedPart3(Qureg qureg, int targetQubit, int qubit2, qreal delta, qreal gamma)
Definition: QuEST_cpu.c:638
int numChunks
Number of chunks the state vector is broken up into – the number of MPI processes used.
Definition: QuEST.h:338
void statevec_swapQubitAmpsLocal(Qureg qureg, int qb1, int qb2)
It is ensured that all amplitudes needing to be swapped are on this node.
Definition: QuEST_cpu.c:3922
@ TWOS_COMPLEMENT
Definition: QuEST.h:269
int getBitMaskParity(long long int mask)
Definition: QuEST_cpu.c:3307
void statevec_multiControlledUnitaryDistributed(Qureg qureg, int targetQubit, long long int ctrlQubitsMask, long long int ctrlFlipMask, Complex rot1, Complex rot2, ComplexArray stateVecUp, ComplexArray stateVecLo, ComplexArray stateVecOut)
Apply a unitary operation to a single qubit in the state vector of probability amplitudes,...
Definition: QuEST_cpu.c:2542
void statevec_multiControlledMultiQubitNotLocal(Qureg qureg, int ctrlMask, int targMask)
Definition: QuEST_cpu.c:2778
void statevec_controlledUnitaryDistributed(Qureg qureg, int controlQubit, Complex rot1, Complex rot2, ComplexArray stateVecUp, ComplexArray stateVecLo, ComplexArray stateVecOut)
Rotate a single qubit in the state vector of probability amplitudes, given two complex numbers alpha ...
Definition: QuEST_cpu.c:2476
void statevec_collapseToKnownProbOutcomeLocal(Qureg qureg, int measureQubit, int outcome, qreal totalProbability)
Update the state vector to be consistent with measuring measureQubit=0 if outcome=0 and measureQubit=...
Definition: QuEST_cpu.c:3767
void statevec_cloneQureg(Qureg targetQureg, Qureg copyQureg)
Definition: QuEST_cpu.c:1572
int chunkId
The position of the chunk of the operator held by this process in the full operator.
Definition: QuEST.h:306
@ NORM
Definition: QuEST.h:232
void statevec_setAmps(Qureg qureg, long long int startInd, qreal *reals, qreal *imags, long long int numAmps)
Definition: QuEST_cpu.c:1248
void statevec_compactUnitaryLocal(Qureg qureg, int targetQubit, Complex alpha, Complex beta)
Definition: QuEST_cpu.c:1754
qreal densmatr_calcPurityLocal(Qureg qureg)
Definition: QuEST_cpu.c:872
Complex statevec_calcExpecDiagonalOpLocal(Qureg qureg, DiagonalOp op)
Definition: QuEST_cpu.c:4124
Represents a 4x4 matrix of complex numbers.
Definition: QuEST.h:175
@ SCALED_INVERSE_DISTANCE
Definition: QuEST.h:234
Information about the environment the program is running in.
Definition: QuEST.h:362
@ UNSIGNED
Definition: QuEST.h:269
void statevec_multiControlledMultiQubitUnitaryLocal(Qureg qureg, long long int ctrlMask, int *targs, int numTargs, ComplexMatrixN u)
Definition: QuEST_cpu.c:1912
void statevec_initBlankState(Qureg qureg)
Definition: QuEST_cpu.c:1464
void statevec_multiControlledTwoQubitUnitaryLocal(Qureg qureg, long long int ctrlMask, int q1, int q2, ComplexMatrix4 u)
Definition: QuEST_cpu.c:1813
Represents a general 2^N by 2^N matrix of complex numbers.
Definition: QuEST.h:186
void statevec_initClassicalState(Qureg qureg, long long int stateInd)
Definition: QuEST_cpu.c:1536
@ INVERSE_DISTANCE
Definition: QuEST.h:234
void statevec_pauliXDistributed(Qureg qureg, ComplexArray stateVecIn, ComplexArray stateVecOut)
Rotate a single qubit by {{0,1},{1,0}.
Definition: QuEST_cpu.c:2651
void densmatr_mixDephasing(Qureg qureg, int targetQubit, qreal dephase)
Definition: QuEST_cpu.c:85
#define qreal
void agnostic_destroyDiagonalOp(DiagonalOp op)
Definition: QuEST_cpu.c:1368
void statevec_calcProbOfAllOutcomesLocal(qreal *outcomeProbs, Qureg qureg, int *qubits, int numQubits)
Definition: QuEST_cpu.c:3549
__forceinline__ __device__ long long int flipBit(const long long int number, const int bitInd)
Definition: QuEST_gpu.cu:95
void statevec_multiControlledPhaseShift(Qureg qureg, int *controlQubits, int numControlQubits, qreal angle)
Definition: QuEST_cpu.c:3266
int numQubitsInStateVec
Number of qubits in the state-vector - this is double the number represented for mixed states.
Definition: QuEST.h:329
void statevec_applyParamNamedPhaseFuncOverrides(Qureg qureg, int *qubits, int *numQubitsPerReg, int numRegs, enum bitEncoding encoding, enum phaseFunc phaseFuncName, qreal *params, int numParams, long long int *overrideInds, qreal *overridePhases, int numOverrides, int conj)
Definition: QuEST_cpu.c:4446
void statevec_collapseToOutcomeDistributedSetZero(Qureg qureg)
Definition: QuEST_cpu.c:3887
int chunkId
The position of the chunk of the state vector held by this process in the full state vector.
Definition: QuEST.h:336
qreal imag[2][2]
Definition: QuEST.h:140
__forceinline__ __device__ long long int insertZeroBit(const long long int number, const int index)
Definition: QuEST_gpu.cu:99
qreal * imag
The imaginary values of the 2^numQubits complex elements.
Definition: QuEST.h:310
void statevec_controlledPhaseFlip(Qureg qureg, int idQubit1, int idQubit2)
Definition: QuEST_cpu.c:3687
phaseFunc
Flags for specifying named phase functions.
Definition: QuEST.h:231
long long int numAmpsPerChunk
Number of probability amplitudes held in stateVec by this process In the non-MPI version,...
Definition: QuEST.h:332
void densmatr_mixTwoQubitDepolarisingLocal(Qureg qureg, int qubit1, int qubit2, qreal delta, qreal gamma)
Definition: QuEST_cpu.c:393
void statevec_initZeroState(Qureg qureg)
Definition: QuEST_cpu.c:1494
void statevec_initPlusState(Qureg qureg)
Definition: QuEST_cpu.c:1504
qreal * termCoeffs
The real coefficient of each Pauli product. This is an array of length PauliHamil....
Definition: QuEST.h:283
void statevec_createQureg(Qureg *qureg, int numQubits, QuESTEnv env)
Definition: QuEST_cpu.c:1290
void statevec_controlledPauliYLocal(Qureg qureg, int controlQubit, int targetQubit, int conjFac)
Definition: QuEST_cpu.c:2982
void densmatr_oneQubitDegradeOffDiagonal(Qureg qureg, int targetQubit, qreal retain)
Definition: QuEST_cpu.c:54
void densmatr_mixDensityMatrix(Qureg combineQureg, qreal otherProb, Qureg otherQureg)
Definition: QuEST_cpu.c:901
enum pauliOpType * pauliCodes
The Pauli operators acting on each qubit, flattened over every operator.
Definition: QuEST.h:281
void copyStateToGPU(Qureg qureg)
In GPU mode, this copies the state-vector (or density matrix) from RAM (qureg.stateVec) to VRAM / GPU...
Definition: QuEST_cpu.c:42
@ SCALED_PRODUCT
Definition: QuEST.h:233
#define MAX_NUM_REGS_APPLY_ARBITRARY_PHASE
void alternateNormZeroingSomeAmpBlocks(Qureg qureg, qreal norm, int normFirst, long long int startAmpInd, long long int numAmps, long long int blockSize)
Definition: QuEST_cpu.c:760
int numRanks
Definition: QuEST.h:365
void statevec_compactUnitaryDistributed(Qureg qureg, Complex rot1, Complex rot2, ComplexArray stateVecUp, ComplexArray stateVecLo, ComplexArray stateVecOut)
Rotate a single qubit in the state vector of probability amplitudes, given two complex numbers alpha ...
Definition: QuEST_cpu.c:2095
qreal imag[4][4]
Definition: QuEST.h:178
int numQubits
The number of qubits this operator can act on (informing its size)
Definition: QuEST.h:300
int numSumTerms
The number of terms in the weighted sum, or the number of Pauli products.
Definition: QuEST.h:285
void statevec_multiControlledUnitaryLocal(Qureg qureg, int targetQubit, long long int ctrlQubitsMask, long long int ctrlFlipMask, ComplexMatrix2 u)
Definition: QuEST_cpu.c:2268
long long int getQubitBitMask(int *qubits, int numQubits)
Definition: QuEST_common.c:50
void normaliseSomeAmps(Qureg qureg, qreal norm, long long int startInd, long long int numAmps)
Definition: QuEST_cpu.c:750
Represents a diagonal complex operator on the full Hilbert state of a Qureg.
Definition: QuEST.h:297
A Pauli Hamiltonian, expressed as a real-weighted sum of pauli products, and which can hence represen...
Definition: QuEST.h:277
void statevec_pauliYDistributed(Qureg qureg, ComplexArray stateVecIn, ComplexArray stateVecOut, int updateUpper, int conjFac)
Rotate a single qubit by +-{{0,-i},{i,0}.
Definition: QuEST_cpu.c:2945
@ SCALED_INVERSE_SHIFTED_NORM
Definition: QuEST.h:232
void densmatr_mixDampingDistributed(Qureg qureg, int targetQubit, qreal damping)
Definition: QuEST_cpu.c:306
Complex densmatr_calcExpecDiagonalOpLocal(Qureg qureg, DiagonalOp op)
Definition: QuEST_cpu.c:4167
qreal ** real
Definition: QuEST.h:189
void statevec_pauliXLocal(Qureg qureg, int targetQubit)
Definition: QuEST_cpu.c:2593
void agnostic_syncDiagonalOp(DiagonalOp op)
Definition: QuEST_cpu.c:1373
void densmatr_initPureStateLocal(Qureg targetQureg, Qureg copyQureg)
Definition: QuEST_cpu.c:1195
__forceinline__ __device__ int extractBit(const int locationOfBitFromRight, const long long int theEncodedNumber)
Definition: QuEST_gpu.cu:82
void densmatr_mixDampingLocal(Qureg qureg, int targetQubit, qreal damping)
Definition: QuEST_cpu.c:180
@ INVERSE_NORM
Definition: QuEST.h:232
Represents a system of qubits.
Definition: QuEST.h:322
qreal densmatr_calcInnerProductLocal(Qureg a, Qureg b)
computes Tr(conjTrans(a) b) = sum of (a_ij^* b_ij)
Definition: QuEST_cpu.c:969
__forceinline__ __device__ long long int insertTwoZeroBits(const long long int number, const int bit1, const int bit2)
Definition: QuEST_gpu.cu:106
qreal ** imag
Definition: QuEST.h:190
void statevec_multiControlledMultiQubitNotDistributed(Qureg qureg, int ctrlMask, int targMask, ComplexArray stateVecIn, ComplexArray stateVecOut)
Definition: QuEST_cpu.c:2834
void statevec_unitaryDistributed(Qureg qureg, Complex rot1, Complex rot2, ComplexArray stateVecUp, ComplexArray stateVecLo, ComplexArray stateVecOut)
Apply a unitary operation to a single qubit given a subset of the state vector with upper and lower b...
Definition: QuEST_cpu.c:2151
void statevec_controlledCompactUnitaryDistributed(Qureg qureg, int controlQubit, Complex rot1, Complex rot2, ComplexArray stateVecUp, ComplexArray stateVecLo, ComplexArray stateVecOut)
Rotate a single qubit in the state vector of probability amplitudes, given two complex numbers alpha ...
Definition: QuEST_cpu.c:2414
@ PRODUCT
Definition: QuEST.h:233
int statevec_compareStates(Qureg mq1, Qureg mq2, qreal precision)
Definition: QuEST_cpu.c:1741
void statevec_multiControlledPhaseFlip(Qureg qureg, int *controlQubits, int numControlQubits)
Definition: QuEST_cpu.c:3718
ComplexArray stateVec
Computational state amplitudes - a subset thereof in the MPI version.
Definition: QuEST.h:341
void statevec_applyMultiVarPhaseFuncOverrides(Qureg qureg, int *qubits, int *numQubitsPerReg, int numRegs, enum bitEncoding encoding, qreal *coeffs, qreal *exponents, int *numTermsPerReg, long long int *overrideInds, qreal *overridePhases, int numOverrides, int conj)
Definition: QuEST_cpu.c:4345
qreal real[2][2]
Definition: QuEST.h:139
void densmatr_mixDepolarisingLocal(Qureg qureg, int targetQubit, qreal depolLevel)
Definition: QuEST_cpu.c:131
void statevec_collapseToKnownProbOutcomeDistributedRenorm(Qureg qureg, int measureQubit, qreal totalProbability)
Renormalise parts of the state vector where measureQubit=0 or 1, based on the total probability of th...
Definition: QuEST_cpu.c:3849
long long int numElemsPerChunk
The number of the 2^numQubits amplitudes stored on each distributed node.
Definition: QuEST.h:302
int isDensityMatrix
Whether this instance is a density-state representation.
Definition: QuEST.h:325
void statevec_hadamardLocal(Qureg qureg, int targetQubit)
Definition: QuEST_cpu.c:3078
int numQubits
Definition: QuEST.h:188
void densmatr_applyDiagonalOpLocal(Qureg qureg, DiagonalOp op)
Definition: QuEST_cpu.c:4082
void statevec_controlledUnitaryLocal(Qureg qureg, int controlQubit, int targetQubit, ComplexMatrix2 u)
Definition: QuEST_cpu.c:2336
static int isOddParity(const long long int number, const int qb1, const int qb2)
void densmatr_collapseToKnownProbOutcome(Qureg qureg, int measureQubit, int outcome, qreal totalStateProb)
Renorms (/prob) every | * outcome * >< * outcome * | state, setting all others to zero.
Definition: QuEST_cpu.c:791
void statevec_destroyQureg(Qureg qureg, QuESTEnv env)
Definition: QuEST_cpu.c:1328
void densmatr_mixTwoQubitDepolarisingLocalPart1(Qureg qureg, int qubit1, int qubit2, qreal delta)
Definition: QuEST_cpu.c:494
void statevec_multiRotateZ(Qureg qureg, long long int mask, qreal angle)
Definition: QuEST_cpu.c:3316
int numQubits
The number of qubits informing the Hilbert dimension of the Hamiltonian.
Definition: QuEST.h:287
void statevec_controlledNotDistributed(Qureg qureg, int controlQubit, ComplexArray stateVecIn, ComplexArray stateVecOut)
Rotate a single qubit by {{0,1},{1,0}.
Definition: QuEST_cpu.c:2742
int numQubitsRepresented
The number of qubits represented in either the state-vector or density matrix.
Definition: QuEST.h:327
long long int numAmpsTotal
Total number of amplitudes, which are possibly distributed among machines.
Definition: QuEST.h:334
@ SCALED_DISTANCE
Definition: QuEST.h:234
qreal * real
The real values of the 2^numQubits complex elements.
Definition: QuEST.h:308
qreal real
Definition: QuEST.h:105
void statevec_swapQubitAmpsDistributed(Qureg qureg, int pairRank, int qb1, int qb2)
qureg.pairStateVec contains the entire set of amplitudes of the paired node which includes the set of...
Definition: QuEST_cpu.c:3965
int statevec_initStateFromSingleFile(Qureg *qureg, char filename[200], QuESTEnv env)
Definition: QuEST_cpu.c:1691
qreal imag
Definition: QuEST.h:106
void densmatr_mixTwoQubitDepolarisingDistributed(Qureg qureg, int targetQubit, int qubit2, qreal delta, qreal gamma)
Definition: QuEST_cpu.c:547
void statevec_unitaryLocal(Qureg qureg, int targetQubit, ComplexMatrix2 u)
Definition: QuEST_cpu.c:2026
void densmatr_mixTwoQubitDephasing(Qureg qureg, int qubit1, int qubit2, qreal dephase)
Definition: QuEST_cpu.c:90
qreal densmatr_findProbabilityOfZeroLocal(Qureg qureg, int measureQubit)
Definition: QuEST_cpu.c:3402
@ SCALED_INVERSE_SHIFTED_DISTANCE
Definition: QuEST.h:234
Represents one complex number.
Definition: QuEST.h:103
void statevec_hadamardDistributed(Qureg qureg, ComplexArray stateVecUp, ComplexArray stateVecLo, ComplexArray stateVecOut, int updateUpper)
Rotate a single qubit by {{1,1},{1,-1}}/sqrt2.
Definition: QuEST_cpu.c:3139
@ SCALED_NORM
Definition: QuEST.h:232
@ SCALED_INVERSE_PRODUCT
Definition: QuEST.h:233
void statevec_reportStateToScreen(Qureg qureg, QuESTEnv env, int reportRank)
Definition: QuEST_cpu.c:1439
void statevec_setWeightedQureg(Complex fac1, Qureg qureg1, Complex fac2, Qureg qureg2, Complex facOut, Qureg out)
Definition: QuEST_cpu.c:4005
void densmatr_initPlusState(Qureg qureg)
Definition: QuEST_cpu.c:1165
void statevec_initStateOfSingleQubit(Qureg *qureg, int qubitId, int outcome)
Initialise the state vector of probability amplitudes such that one qubit is set to 'outcome' and all...
Definition: QuEST_cpu.c:1611
void statevec_controlledPauliYDistributed(Qureg qureg, int controlQubit, ComplexArray stateVecIn, ComplexArray stateVecOut, int conjFac)
Definition: QuEST_cpu.c:3036
void statevec_applyDiagonalOp(Qureg qureg, DiagonalOp op)
Definition: QuEST_cpu.c:4047
void statevec_controlledCompactUnitaryLocal(Qureg qureg, int controlQubit, int targetQubit, Complex alpha, Complex beta)
Definition: QuEST_cpu.c:2196
Complex statevec_calcInnerProductLocal(Qureg bra, Qureg ket)
Definition: QuEST_cpu.c:1082
bitEncoding
Flags for specifying how the bits in sub-register computational basis states are mapped to indices in...
Definition: QuEST.h:269
void statevec_phaseShiftByTerm(Qureg qureg, int targetQubit, Complex term)
Definition: QuEST_cpu.c:3185
Represents a 2x2 matrix of complex numbers.
Definition: QuEST.h:137
void zeroSomeAmps(Qureg qureg, long long int startInd, long long int numAmps)
Definition: QuEST_cpu.c:740
DiagonalOp agnostic_createDiagonalOp(int numQubits, QuESTEnv env)
Definition: QuEST_cpu.c:1346
@ SCALED_INVERSE_NORM
Definition: QuEST.h:232
qreal densmatr_calcFidelityLocal(Qureg qureg, Qureg pureState)
computes a few dens-columns-worth of (vec^*T) dens * vec
Definition: QuEST_cpu.c:1001
qreal statevec_findProbabilityOfZeroLocal(Qureg qureg, int measureQubit)
Measure the total probability of a specified qubit being in the zero state across all amplitudes in t...
Definition: QuEST_cpu.c:3457