13. Example Programs

13.1. Hello World - Simple Bell State

#include <cudaq.h>

struct bell {
  int operator()(int num_iters) __qpu__ {
    cudaq::qarray<2> q;
    int nCorrect = 0;
    for (int i = 0; i < num_iters; i++) {
      h(q[0]);
      x<cudaq::ctrl>(q[0], q[1]);
      auto results = mz(q);
      if (results[0] == results[1])
        nCorrect++;

      reset(q[0]);
      reset(q[1]);
    }
    return nCorrect;
  }
};

int main() { printf("N Correct = %d\n", bell{}(100)); }
import cudaq

@cudaq.kernel()
def bell(num_iters : int) -> int:
    q = cudaq.qvector(2)
    nCorrect = 0
    for i in range(num_iters):
        h(q[0])
        x.ctrl(q[0], q[1])
        results = mz(q)
        if results[0] == results[1]:
           nCorrect = nCorrect + 1

        reset(q)
    return nCorrect

counts = bell(100)
print(f'N Correct = {counts}')
assert counts == 100

13.2. GHZ State Preparation and Sampling

#include <cudaq.h>

__qpu__ ghz(const int n_qubits) {
  cudaq::qvector q(n_qubits);
  h(q[0]);
  for (int i = 0; i < n_qubits - 1; ++i)
    // note use of ctrl modifier
    x<cudaq::ctrl>(q[i], q[i+1]);

  mz(q);
}

int main() {
  // Sample the state produced by the ghz kernel
  auto counts = cudaq::sample(ghz, 10);
  for (auto [bits, count] : counts) {
    printf("Observed %s %lu times.\n", bits.c_str(), count);
  }
  return 0;
}
import cudaq

@cudaq.kernel
def ghz(numQubits:int):
    qubits = cudaq.qvector(numQubits)
    h(qubits.front())
    for i, qubit in enumerate(qubits.front(numQubits - 1)):
        x.ctrl(qubit, qubits[i + 1])

counts = cudaq.sample(ghz, 10)
for bits, count : counts:
    print('Observed {} {} times.'.format(bits, count))

13.3. Quantum Phase Estimation

// Compile and run with:
// ```
// nvq++ phase_estimation.cpp -o qpe.x && ./qpe.x
// ```

#include <cudaq.h>
#include <cudaq/algorithm.h>
#include <stdio.h>

#include <cmath>

// A pure device quantum kernel defined as a free function
// (cannot be called from host code).
__qpu__ void iqft(cudaq::qview<> q) {
  int N = q.size();
  // Swap qubits
  for (int i = 0; i < N / 2; ++i) {
    swap(q[i], q[N - i - 1]);
  }

  for (int i = 0; i < N - 1; ++i) {
    h(q[i]);
    int j = i + 1;
    for (int y = i; y >= 0; --y) {
      double denom = (1UL << (j - y));
      const double theta = -M_PI / denom;
      r1<cudaq::ctrl>(theta, q[j], q[y]);
    }
  }

  h(q[N - 1]);
}

// CUDA-Q kernel call operators can be templated on
// input CUDA-Q kernel expressions. Here we define a general
// Phase Estimation algorithm that is generic on the eigenstate
// preparation and unitary evolution steps.
struct qpe {

  // Define the CUDA-Q call expression to take user-specified eigenstate
  // and unitary evolution kernels, as well as the number of qubits in the
  // counting register and in the eigenstate register.
  template <typename StatePrep, typename Unitary>
  void operator()(const int nCountingQubits, StatePrep &&state_prep,
                  Unitary &&oracle) __qpu__ {
    // Allocate a register of qubits
    cudaq::qvector q(nCountingQubits + 1);

    // Extract sub-registers, one for the counting qubits
    // another for the eigenstate register
    auto counting_qubits = q.front(nCountingQubits);
    auto &state_register = q.back();

    // Prepare the eigenstate
    state_prep(state_register);

    // Put the counting register into uniform superposition
    h(counting_qubits);

    // Perform `ctrl-U^j`
    for (int i = 0; i < nCountingQubits; ++i) {
      for (int j = 0; j < (1 << i); ++j) {
        cudaq::control(oracle, counting_qubits[i], state_register);
      }
    }

    // Apply inverse quantum Fourier transform
    iqft(counting_qubits);

    // Measure to gather sampling statistics
    mz(counting_qubits);

    return;
  }
};

struct r1PiGate {
  void operator()(cudaq::qubit &q) __qpu__ { r1(1., q); }
};

int main() {

  for (auto nQubits : std::vector<int>{2, 4, 6, 8}) {
    auto counts = cudaq::sample(
        qpe{}, nQubits, [](cudaq::qubit &q) __qpu__ { x(q); }, r1PiGate{});
    auto mostProbable = counts.most_probable();
    double theta = cudaq::to_integer(mostProbable) / (double)(1UL << nQubits);
    auto piEstimate = 1. / (2 * theta);
    printf("Pi Estimate(nQubits == %d) = %lf \n", nQubits, piEstimate);
  }
}
import cudaq, numpy as np

# Compute phase for U |psi> = exp(-2 pi phase) |psi>
# This example will consider U = T, and |psi> = |1>
# Define a Inverse Quantum Fourier Transform kernel
@cudaq.kernel
def iqft(qubits: cudaq.qview):
    N = qubits.size()
    for i in range(N // 2):
        swap(qubits[i], qubits[N - i - 1])

    for i in range(N - 1):
        h(qubits[i])
        j = i + 1
        for y in range(i, -1, -1):
            r1.ctrl(-np.pi / 2**(j - y), qubits[j], qubits[y])

    h(qubits[N - 1])


# Define the U kernel
@cudaq.kernel
def tGate(qubit: cudaq.qubit):
    t(qubit)


# Define the state preparation |psi> kernel
@cudaq.kernel
def xGate(qubit: cudaq.qubit):
    x(qubit)

# General Phase Estimation kernel for single qubit
# eigen states.
@cudaq.kernel
def qpe(nC: int, nQ: int, statePrep: Callable[[cudaq.qubit], None],
        oracle: Callable[[cudaq.qubit], None]):
    q = cudaq.qvector(nC + nQ)
    countingQubits = q.front(nC)
    stateRegister = q.back()
    statePrep(stateRegister)
    h(countingQubits)
    for i in range(nC):
        for j in range(2**i):
            cudaq.control(oracle, [countingQubits[i]], stateRegister)
    iqft(countingQubits)
    mz(countingQubits)

# Sample the state to get the phase.
counts = cudaq.sample(qpe, 3, 1, xGate, tGate)
assert len(counts) == 1
assert '100' in counts

13.4. Deuteron Binding Energy Parameter Sweep

#include <cudaq.h>

struct deuteron_n2_ansatz {
  void operator()(double theta) __qpu__ {
    cudaq::qarray<2> q;
    x(q[0]);
    ry(theta, q[1]);
    x<cudaq::ctrl>(q[1], q[0]);
  }
};

int main() {
  using namespace cudaq::spin;
  cudaq::spin_op h = 5.907 - 2.1433 * x(0) * x(1) - 2.1433 * y(0) * y(1) +
              .21829 * z(0) - 6.125 * z(1);

  // Perform parameter sweep for deuteron N=2 Hamiltonian
  const auto param_space = cudaq::linspace(-M_PI, M_PI, 25);
  for (const auto& param : param_space) {
    // KERNEL::observe(...) <==>
    // E(params...) = <psi(params...) | H | psi(params...)>
    double energy_at_param = cudaq::observe(deuteron_n2_ansatz{}, h, param);
    printf("<H>(%lf) = %lf\n", param, energy_at_param);
  }
}
import cudaq
from cudaq import spin

@cudaq.kernel
def ansatz(angle:float):
    q = cudaq.qvector(2)
    x(q[0])
    ry(angle, q[1])
    x.ctrl(q[1], q[0])

hamiltonian = 5.907 - 2.1433 * spin.x(0) * spin.x(1) - 2.1433 * spin.y(
    0) * spin.y(1) + .21829 * spin.z(0) - 6.125 * spin.z(1)

# Perform parameter sweep for deuteron N=2 Hamiltonian
for angle in np.linspace(-np.pi, np.pi, 25):
     # KERNEL::observe(...) <==>
     # E(params...) = <psi(params...) | H | psi(params...)>
     energyAtParam = cudaq.observe(ansatz, hamiltonian, .59)
     print('<H>({}) = {}'.format(angle, energyAtParam))

13.5. Grover’s Algorithm

// Compile and run with:
// ```
// nvq++ grover.cpp -o grover.x && ./grover.x
// ```

#include <cmath>
#include <cudaq.h>
#include <numbers>

__qpu__ void reflect_about_uniform(cudaq::qvector<> &qs) {
  auto ctrlQubits = qs.front(qs.size() - 1);
  auto &lastQubit = qs.back();

  // Compute (U) Action (V) produces
  // U V U::Adjoint
  cudaq::compute_action(
      [&]() {
        h(qs);
        x(qs);
      },
      [&]() { z<cudaq::ctrl>(ctrlQubits, lastQubit); });
}

struct run_grover {
  template <typename CallableKernel>
  __qpu__ auto operator()(const int n_qubits, CallableKernel &&oracle) {
    int n_iterations = round(0.25 * std::numbers::pi * sqrt(2 ^ n_qubits));

    cudaq::qvector qs(n_qubits);
    h(qs);
    for (int i = 0; i < n_iterations; i++) {
      oracle(qs);
      reflect_about_uniform(qs);
    }
    mz(qs);
  }
};

struct oracle {
  const long target_state;

  void operator()(cudaq::qvector<> &qs) __qpu__ {
    cudaq::compute_action(
        [&]() {
          for (int i = 1; i <= qs.size(); ++i) {
            auto target_bit_set = (1 << (qs.size() - i)) & target_state;
            if (!target_bit_set)
              x(qs[i - 1]);
          }
        },
        [&]() {
          auto ctrlQubits = qs.front(qs.size() - 1);
          z<cudaq::ctrl>(ctrlQubits, qs.back());
        });
  }
};

int main(int argc, char *argv[]) {
  auto secret = 1 < argc ? strtol(argv[1], NULL, 2) : 0b1011;
  oracle compute_oracle{.target_state = secret};
  auto counts = cudaq::sample(run_grover{}, 4, compute_oracle);
  printf("Found string %s\n", counts.most_probable().c_str());
}
@cudaq.kernel
def reflect(qubits: cudaq.qview):
    ctrls = qubits.front(qubits.size() - 1)
    last = qubits.back()
    cudaq.compute_action(lambda: (h(qubits), x(qubits)),
                          lambda: z.ctrl(ctrls, last))

@cudaq.kernel
def oracle(q: cudaq.qview):
    z.ctrl(q[0], q[2])
    z.ctrl(q[1], q[2])

@cudaq.kernel
def grover(N: int, M: int, oracle: Callable[[cudaq.qview], None]):
    q = cudaq.qvector(N)
    h(q)
    for i in range(M):
        oracle(q)
        reflect(q)
    mz(q)

counts = cudaq.sample(grover, 3, 1, oracle)
assert len(counts) == 2
assert '101' in counts
assert '011' in counts

13.6. Iterative Phase Estimation

// Compile and run with:
// ```
// nvq++ iterative_qpe.cpp -o qpe.x && ./qpe.x
// ```

#include <cudaq.h>

struct iqpe {
  void operator()() __qpu__ {
    cudaq::qarray<2> q;
    h(q[0]);
    x(q[1]);
    for (int i = 0; i < 8; i++)
      r1<cudaq::ctrl>(-5 * M_PI / 8., q[0], q[1]);

    h(q[0]);
    auto cr0 = mz(q[0]);
    reset(q[0]);

    h(q[0]);
    for (int i = 0; i < 4; i++)
      r1<cudaq::ctrl>(-5 * M_PI / 8., q[0], q[1]);

    if (cr0)
      rz(-M_PI / 2., q[0]);

    h(q[0]);
    auto cr1 = mz(q[0]);
    reset(q[0]);

    h(q[0]);
    for (int i = 0; i < 2; i++)
      r1<cudaq::ctrl>(-5 * M_PI / 8., q[0], q[1]);

    if (cr0)
      rz(-M_PI / 4., q[0]);

    if (cr1)
      rz(-M_PI / 2., q[0]);

    h(q[0]);
    auto cr2 = mz(q[0]);
    reset(q[0]);
    h(q[0]);
    r1<cudaq::ctrl>(-5 * M_PI / 8., q[0], q[1]);

    if (cr0)
      rz(-M_PI / 8., q[0]);

    if (cr1)
      rz(-M_PI_4, q[0]);

    if (cr2)
      rz(-M_PI_2, q[0]);

    h(q[0]);
    mz(q[0]);
  }
};

int main() {
  auto counts = cudaq::sample(/*shots*/ 10, iqpe{});
  counts.dump();

  return 0;
}