NIXL Bench#

This workload (test_template_name is NIXLBench) runs NIXL benchmarking suite for network and interconnect performance testing.

Usage Examples#

Test TOML example:

name = "my_nixl_bench_test"
description = "Example NIXL Bench test"
test_template_name = "NIXLBench"

[cmd_args]
docker_image_url = "<docker container url here>"
path_to_benchmark = "/workspace/nixlbench/build/nixlbench"
backend = "UCX"
initiator_seg_type = "VRAM"
target_seg_type = "VRAM"
op_type = "READ"
filepath = "/data"
device_list = "11:F:/store0.bin"
# one could also use <num>kb, <num>mb, <num>gb shortcuts
total_buffer_size = 8000000000

Test Scenario example:

name = "nixl-bench-test"

[[Tests]]
id = "bench.1"
num_nodes = 1
time_limit = "00:10:00"

test_name = "my_nixl_bench_test"

Test-in-Scenario example:

name = "nixl-bench-test"

[[Tests]]
id = "bench.1"
num_nodes = 1
time_limit = "00:10:00"

name = "my_nixl_bench_test"
description = "Example NIXL Bench test"
test_template_name = "NIXLBench"

  [Tests.cmd_args]
  docker_image_url = "<docker container url here>"
  path_to_benchmark = "/workspace/nixlbench/build/nixlbench"
  backend = "UCX"
  initiator_seg_type = "DRAM"
  target_seg_type = "DRAM"
  op_type = "WRITE"

API Documentation#

Command Arguments#

pydantic model cloudai.workloads.nixl_bench.nixl_bench.NIXLBenchCmdArgs[source]#

Command line arguments for a NIXL Bench test.

field path_to_benchmark: str [Required]#

field etcd_endpoints: str = 'http://$NIXL_ETCD_ENDPOINTS'#

field docker_image_url: str [Required]#: URL of the Docker image to use for the benchmark.

field etcd_path: str = 'etcd'#: Path to the etcd executable.

field wait_etcd_for: int = 60#: Number of seconds to wait for etcd to become healthy.

field etcd_image_url: str | None = None#: Optional URL of the Docker image to use for etcd, by default etcd will be run from the same image as the benchmark.

field filepath: str | None = None#: Directory path (in container) for storage operations. Example: /data

field total_buffer_size: str | list[str] | None = None#: Total buffer size in bytes. Examples: 1024, 1kb, 1mb, 1gb. Use with device_list. The size will be passed into NIXL as integer (bytes)

field device_list: str | list[str] | None = None#: Device specs in format ‘id:type:path’ (e.g., ‘11:F:/store0.bin,27:K:/dev/nvme0n1’)

Test Definition#

class cloudai.workloads.nixl_bench.nixl_bench.NIXLBenchTestDefinition( *, name: str, description: str, test_template_name: str, cmd_args: NIXLBenchCmdArgs, extra_env_vars: dict[str, str | List[str]] = {}, extra_cmd_args: dict[str, str] = {}, extra_container_mounts: list[str] = [], git_repos: list[GitRepo] = [], nsys: NsysConfiguration | None = None, predictor: PredictorConfig | None = None, agent: str = 'grid_search', agent_steps: int = 1, agent_metrics: list[str] = ['default'], agent_reward_function: str = 'inverse', agent_config: dict[str, Any] | None = None, )[source]#

Bases: NIXLBaseTestDefinition[NIXLBenchCmdArgs]

Test definition for a NIXL Bench test.