Executing Kernels¶
In CUDA-Q, there are 3 ways in which one can execute quantum kernels:
sample
: yields measurement countsobserve
: yields expectation valuesget_state
: yields the quantum statevector of the computation
Sample¶
Quantum states collapse upon measurement and hence need to be sampled many times to gather statistics. The CUDA-Q sample
call enables this:
import cudaq
import numpy as np
qubit_count = 2
# Define the simulation target.
cudaq.set_target("qpp-cpu")
# Define a quantum kernel function.
@cudaq.kernel
def kernel(qubit_count: int):
qvector = cudaq.qvector(qubit_count)
# 2-qubit GHZ state.
h(qvector[0])
for i in range(1, qubit_count):
x.ctrl(qvector[0], qvector[i])
# If we do not specify measurements, all qubits are measured in
# the Z-basis by default or we can manually specify it also
mz(qvector)
print(cudaq.draw(kernel, qubit_count))
result = cudaq.sample(kernel, qubit_count, shots_count=1000)
print(result)
#include <cstdio>
#include <cudaq.h>
#include <cudaq/algorithms/draw.h>
// Define a quantum kernel function.
__qpu__ void kernel(int qubit_count) {
cudaq::qvector qvector(qubit_count);
// 2-qubit GHZ state.
h(qvector[0]);
for (auto qubit : cudaq::range(qubit_count - 1)) {
x<cudaq::ctrl>(qvector[qubit], qvector[qubit + 1]);
}
// If we do not specify measurements, all qubits are measured in
// the Z-basis by default or we can manually specify it also
mz(qvector);
}
int main() {
int qubit_count = 2;
auto produced_str = cudaq::draw(kernel, qubit_count);
auto result = cudaq::sample(kernel, qubit_count);
result.dump();
return 0;
}
╭───╮
q0 : ┤ h ├──●──
╰───╯╭─┴─╮
q1 : ─────┤ x ├
╰───╯
{ 11:506 00:494 }
Note that there is a subtle difference between how sample
is executed with the target device set to a simulator or with the target device set to a QPU. In simulation mode, the quantum state is built once and then sampled \(s\) times where \(s\) equals the shots_count
. In hardware execution mode, the quantum state collapses upon measurement and hence needs to be rebuilt over and over again.
There are a number of helpful tools that can be found in the API docs to process the Sample_Result
object produced by sample
.
Observe¶
The observe
function allows us to calculate expectation values. We must supply a spin operator in the form of a Hamiltonian, \(H\), from which we would like to calculate \(\langle\psi|H|\psi\rangle\).
from cudaq import spin
# Define a Hamiltonian in terms of Pauli Spin operators.
hamiltonian = spin.z(0) + spin.y(1) + spin.x(0) * spin.z(0)
@cudaq.kernel
def kernel1(n_qubits: int):
qubits = cudaq.qvector(n_qubits)
h(qubits[0])
for i in range(1, n_qubits):
x.ctrl(qubits[0], qubits[i])
# Compute the expectation value given the state prepared by the kernel.
result = cudaq.observe(kernel1, hamiltonian, qubit_count).expectation()
print('<H> =', result)
<H> = 0.0
#include <cstdio>
#include <cudaq.h>
#include <cudaq/algorithms/draw.h>
// Define a quantum kernel function.
__qpu__ void kernel(int qubit_count) {
cudaq::qvector qvector(qubit_count);
// 2-qubit GHZ state.
h(qvector[0]);
for (auto qubit : cudaq::range(qubit_count - 1)) {
x<cudaq::ctrl>(qvector[qubit], qvector[qubit + 1]);
}
}
int main() {
int qubit_count = 2;
// Define a Hamiltonian in terms of Pauli Spin operators.
auto hamiltonian = cudaq::spin::z(0) + cudaq::spin::y(1) +
cudaq::spin::x(0) * cudaq::spin::z(0);
// Compute the expectation value given the state prepared by the kernel.
auto result = cudaq::observe(kernel, hamiltonian, qubit_count);
printf("%.6lf\n", result.expectation());
return 0;
}
<H> = 0.000000
Get State¶
The get_state
function gives us access to the quantum statevector of the computation. Remember, that this is only feasible in simulation mode.
# Compute the statevector of the kernel
result = cudaq.get_state(kernel, qubit_count)
print(np.array(result))
[0.+0.j 0.+0.j 0.+0.j 1.+0.j]
#include <cstdio>
#include <cudaq.h>
#include <cudaq/algorithms/draw.h>
// Define a quantum kernel function.
__qpu__ void kernel(int qubit_count) {
cudaq::qvector qvector(qubit_count);
// 2-qubit GHZ state.
h(qvector[0]);
for (auto qubit : cudaq::range(qubit_count - 1)) {
x<cudaq::ctrl>(qvector[qubit], qvector[qubit + 1]);
}
}
int main() {
int qubit_count = 2;
// Compute the statevector of the kernel
cudaq::state t = cudaq::get_state(kernel, qubit_count);
t.dump();
return 0;
}
(0,0)
(0,0)
(0,0)
(1,0)
The statevector generated by the get_state
command follows Big-endian convention for associating numbers with their binary representations, which places the least significant bit on the left. That is, for the example of a 2-bit system, we have the following translation between integers and bits:
Parallelization Techniques¶
The most intensive task in the computation is the execution of the quantum kernel hence each execution function: sample
, observe
and get_state
can be parallelized given access to multiple quantum processing units (multi-QPU).
Since multi-QPU platforms are not yet feasible, we emulate each QPU with a GPU.
Observe Asynchronous¶
Asynchronous programming is a technique that enables your program to start a potentially long-running task and still be able to be responsive to other events while that task runs, rather than having to wait until that task has finished. Once that task has finished, your program is presented with the result.
observe
can be a time intensive task. We can parallelize the execution of observe
via the arguments it accepts.
# Define a quantum kernel function.
@cudaq.kernel
def kernel1(qubit_count: int):
qvector = cudaq.qvector(qubit_count)
# 2-qubit GHZ state.
h(qvector[0])
for i in range(1, qubit_count):
x.ctrl(qvector[0], qvector[i])
# Measuring the expectation value of 2 different Hamiltonians in parallel
hamiltonian_1 = spin.x(0) + spin.y(1) + spin.z(0) * spin.y(1)
# Asynchronous execution on multiple `qpus` via `nvidia` `gpus`.
result_1 = cudaq.observe_async(kernel1, hamiltonian_1, qubit_count, qpu_id=0)
# Retrieve results
print(result_1.get().expectation())
1.1102230246251565e-16
#include <cstdio>
#include <cudaq.h>
#include <cudaq/algorithms/draw.h>
// Define a quantum kernel function.
__qpu__ void kernel(int qubit_count) {
cudaq::qvector qvector(qubit_count);
// 2-qubit GHZ state.
h(qvector[0]);
for (auto qubit : cudaq::range(qubit_count - 1)) {
x<cudaq::ctrl>(qvector[qubit], qvector[qubit + 1]);
}
}
int main() {
int qubit_count = 2;
// Define a Hamiltonian in terms of Pauli Spin operators.
// Measuring the expectation value of 2 different Hamiltonians in parallel
auto hamiltonian = cudaq::spin::x(0) + cudaq::spin::y(1) +
cudaq::spin::z(0) * cudaq::spin::y(1);
// Asynchronous execution on multiple `qpus` via `nvidia` `gpus`.
auto future = cudaq::observe_async(0, kernel, hamiltonian, qubit_count);
auto result = future.get();
printf("%.6lf\n", result.expectation());
return 0;
}
0.000000
Above we parallelized the observe
call over the hamiltonian
parameter; however, we can parallelize over any of the arguments it accepts by just iterating over the qpu_id
.
Sample Asynchronous¶
Similar to observe_async
above, sample also supports asynchronous execution for the arguments it accepts. One can parallelize over various kernels, variational parameters or even distribute shots counts over multiple QPUs.
result_async = cudaq.sample_async(kernel, qubit_count, shots_count=1000)
print(result_async.get())
{ 00:498 11:502 }
#include <cstdio>
#include <cudaq.h>
#include <cudaq/algorithms/draw.h>
// Define a quantum kernel function.
__qpu__ void kernel(int qubit_count) {
cudaq::qvector qvector(qubit_count);
// 2-qubit GHZ state.
h(qvector[0]);
for (auto qubit : cudaq::range(qubit_count - 1)) {
x<cudaq::ctrl>(qvector[qubit], qvector[qubit + 1]);
}
// If we do not specify measurements, all qubits are measured in
// the Z-basis by default or we can manually specify it also
mz(qvector);
}
int main() {
int qubit_count = 2;
auto result = cudaq::sample_async(kernel, qubit_count);
result.get().dump();
return 0;
}
{ 00:510 11:490 }
Get State Asynchronous¶
Similar to sample_async
above, get_state_async
also supports asynchronous execution for the input arguments it accepts.