When to Use sample vs. run¶
Introduction¶
Starting with CUDA-Q 0.14.0, sample no longer supports kernels that branch
on measurement results (measurement-dependent control flow). Kernels containing
patterns such as if mz(q): or if (result) { ... } where result
comes from a measurement must now use run instead.
This breaking change creates a clearer API separation:
Use
samplefor aggregate measurement statistics (counts dictionaries).Use
runfor shot-by-shot execution with measurement-dependent control flow and individual return values.
Usage Guidelines¶
Use sample when:
You want aggregate measurement statistics (histograms).
Your kernel has no measurement-dependent control flow.
You only need final measurement distributions.
You are using the
explicit_measurementsoption, which concatenates all measurement results in execution order rather than re-measuring qubits at the end of the kernel. See the sample specification for details.
Use run when:
You need shot-by-shot measurement values.
Your kernel has conditionals based on measurement results.
You want to return computed values from the kernel.
You need to store or analyze individual shot data.
For the full API specification, see the sample and run sections in the Algorithmic Primitives documentation. For a usage guide, see Running your first CUDA-Q Program.
What Is Supported with sample¶
Kernels without measurement-dependent control flow continue to work exactly as
before. This includes implicit measurements, explicit measurements without
conditionals, partial qubit measurement, mid-circuit measurement for
reset patterns, and the explicit_measurements option (which concatenates all
measurement results in execution order rather than re-measuring at the end of
the kernel – see the sample specification for
details).
@cudaq.kernel
def bell():
q = cudaq.qvector(2)
h(q[0])
x.ctrl(q[0], q[1])
@cudaq.kernel
def reset_pattern():
q = cudaq.qubit()
h(q)
mz(q)
reset(q)
x(q)
print("Implicit measurements:")
cudaq.sample(bell).dump()
print("\nMid-circuit measurement with reset:")
cudaq.sample(reset_pattern).dump()
print("\nWith explicit_measurements option:")
cudaq.sample(reset_pattern, explicit_measurements=True).dump()
__qpu__ void bell() {
cudaq::qvector q(2);
h(q[0]);
x<cudaq::ctrl>(q[0], q[1]);
}
__qpu__ void reset_pattern() {
cudaq::qubit q;
h(q);
mz(q);
reset(q);
x(q);
}
printf("Implicit measurements:\n");
cudaq::sample(bell).dump();
printf("\nMid-circuit measurement with reset:\n");
cudaq::sample(reset_pattern).dump();
cudaq::sample_options options{.explicit_measurements = true};
printf("\nWith explicit_measurements option:\n");
cudaq::sample(options, reset_pattern).dump();
What Is Not Supported with sample¶
Kernels that branch on measurement results can no longer be used with
sample or sample_async. Attempting to do so will raise a runtime error.
This includes both inline conditionals on measurements and conditionals on variables holding measurement results:
@cudaq.kernel
def kernel():
q = cudaq.qvector(2)
h(q[0])
r = mz(q[0])
if r: # ERROR
x(q[1])
cudaq.sample(kernel) # raises RuntimeError
auto kernel = []() __qpu__ {
cudaq::qvector q(2);
h(q[0]);
auto r = mz(q[0]);
if (r) { // ERROR
x(q[1]);
}
};
cudaq::sample(kernel); // throws std::runtime_error
The error message will read:
`cudaq::sample` and `cudaq::sample_async` no longer support kernels that
branch on measurement results. Kernel '<name>' uses conditional feedback.
Use `cudaq::run` or `cudaq::run_async` instead. See CUDA-Q documentation
for migration guide.
How to Migrate¶
Migrating a kernel from sample to run requires three changes.
Step 1: Add a return type to the kernel¶
run requires kernels to return a non-void value. Instead of relying on
implicit measurement at the end of the circuit, explicitly return the
measurement results you need.
# Before (no return type, used with sample)
@cudaq.kernel
def kernel():
q = cudaq.qvector(2)
h(q[0])
r = mz(q[0])
if r:
x(q[1])
# After (returns a value, used with run)
@cudaq.kernel
def kernel() -> bool:
q = cudaq.qvector(2)
h(q[0])
r = mz(q[0])
if r:
x(q[1])
return mz(q[1])
// Before (void kernel, used with sample)
auto kernel = []() __qpu__ {
cudaq::qvector q(2);
h(q[0]);
auto r = mz(q[0]);
if (r) { x(q[1]); }
};
// After (returns a value, used with run)
struct kernel {
auto operator()() __qpu__ {
cudaq::qvector q(2);
h(q[0]);
auto r = mz(q[0]);
if (r) { x(q[1]); }
return mz(q[1]);
}
};
Step 2: Replace sample with run¶
# Before
counts = cudaq.sample(kernel, shots_count=1000)
# After
results = cudaq.run(kernel, shots_count=1000)
// Before
auto counts = cudaq::sample(1000, kernel);
// After
auto results = cudaq::run(1000, kernel{});
Note
The default shots_count for run is 100, compared to 1000 for
sample. Specify shots_count explicitly if you need a particular
number of shots.
Step 3: Update result processing¶
sample returns a sample_result (a counts dictionary mapping bit strings
to frequencies). run returns a list (Python) or std::vector (C++) of
individual return values – one per shot. If you need a counts-dictionary view,
you can reconstruct it from the individual results:
from collections import Counter
results = cudaq.run(multi_measure, shots_count=1000)
counts = Counter(
''.join('1' if bit else '0' for bit in result) for result in results)
print(dict(counts))
auto results = cudaq::run(1000, multi_measure{});
std::map<std::string, std::size_t> counts;
for (const auto &shot : results) {
std::string bits;
for (auto b : shot)
bits += b ? '1' : '0';
counts[bits]++;
}
for (const auto &[bits, count] : counts)
printf("%s : %zu\n", bits.c_str(), count);
Migration Examples¶
Example 1: Simple conditional logic¶
A kernel that measures one qubit and conditionally applies a gate on another.
@cudaq.kernel
def simple_conditional() -> bool:
q = cudaq.qvector(2)
h(q[0])
r = mz(q[0])
if r:
x(q[1])
return mz(q[1])
results = cudaq.run(simple_conditional, shots_count=100)
n_ones = sum(results)
print(f"Measured |1> {n_ones} out of {len(results)} shots")
struct simple_conditional {
auto operator()() __qpu__ {
cudaq::qvector q(2);
h(q[0]);
auto r = mz(q[0]);
if (r) {
x(q[1]);
}
return mz(q[1]);
}
};
auto results1 = cudaq::run(100, simple_conditional{});
std::size_t nOnes = std::count_if(results1.begin(), results1.end(),
[](const auto &r) { return (bool)r; });
printf("Measured |1> %zu out of %zu shots\n", nOnes, results1.size());
Example 2: Returning multiple measurement results¶
A kernel that performs multiple mid-circuit measurements with conditional logic
and returns all results as a list. When returning a std::vector<bool> in
C++, pre-allocate the result vector and assign elements individually for
broadest target compatibility.
@cudaq.kernel
def multi_measure() -> list[bool]:
q = cudaq.qvector(3)
h(q)
r0 = mz(q[0])
r1 = mz(q[1])
if r0 and r1:
x(q[2])
r2 = mz(q[2])
return [r0, r1, r2]
results = cudaq.run(multi_measure, shots_count=100)
for shot in results[:5]:
print(''.join('1' if b else '0' for b in shot))
struct multi_measure {
std::vector<bool> operator()() __qpu__ {
std::vector<bool> results(3);
cudaq::qvector q(3);
h(q);
results[0] = mz(q[0]);
results[1] = mz(q[1]);
if (results[0] && results[1]) {
x(q[2]);
}
results[2] = mz(q[2]);
return results;
}
};
auto results2 = cudaq::run(100, multi_measure{});
for (std::size_t i = 0; i < 5 && i < results2.size(); ++i) {
for (auto b : results2[i])
printf("%d", (int)b);
printf("\n");
}
Example 3: Quantum teleportation¶
Teleportation of a qubit state requires conditional corrections based on Bell-basis measurements.
@cudaq.kernel
def teleport() -> list[bool]:
results = [False, False, False]
q = cudaq.qvector(3)
x(q[0])
h(q[1])
x.ctrl(q[1], q[2])
x.ctrl(q[0], q[1])
h(q[0])
results[0] = mz(q[0])
results[1] = mz(q[1])
if results[1]:
x(q[2])
if results[0]:
z(q[2])
results[2] = mz(q[2])
return results
runs = cudaq.run(teleport, shots_count=100)
assert all(r[2] for r in runs), "Teleportation failed"
print(f"Teleportation succeeded on all {len(runs)} shots")
struct teleport {
std::vector<bool> operator()() __qpu__ {
std::vector<bool> results(3);
cudaq::qvector q(3);
x(q[0]);
h(q[1]);
x<cudaq::ctrl>(q[1], q[2]);
x<cudaq::ctrl>(q[0], q[1]);
h(q[0]);
results[0] = mz(q[0]);
results[1] = mz(q[1]);
if (results[1])
x(q[2]);
if (results[0])
z(q[2]);
results[2] = mz(q[2]);
return results;
}
};
auto results3 = cudaq::run(100, teleport{});
assert(std::ranges::all_of(results3, [](const auto &r) { return r[2]; }));
printf("Teleportation succeeded on all %zu shots\n", results3.size());
Additional Notes¶
Users of
sample_asyncwith conditional-feedback kernels should migrate torun_async. See the run specification for the asynchronous API.runsupports a variety of return types including scalars, vectors/lists, and user-defined data structures. See the run specification for the complete list of supported types and their requirements.Assigning measurement results to named variables in kernels passed to
sampleis deprecated and will be removed in a future release. Userunto retrieve individual measurement results.