Skip to content

Infer amplify

get_converted_hf_checkpoint(hf_model_name, results_path)

Convert a HuggingFace model to a NeMo checkpoint and return the path.

Source code in bionemo/amplify/infer_amplify.py
152
153
154
155
156
157
158
159
def get_converted_hf_checkpoint(hf_model_name: str, results_path: Path) -> str:
    """Convert a HuggingFace model to a NeMo checkpoint and return the path."""
    maybe_mock_xformers()

    # Import the model from HuggingFace
    initial_ckpt_path = str(results_path / "converted_nemo_checkpoint")
    io.import_ckpt(biobert_lightning_module(config=AMPLIFYConfig()), f"hf://{hf_model_name}", Path(initial_ckpt_path))
    return initial_ckpt_path

main(data_path, hf_model_name=None, initial_ckpt_path=None, results_path=Path('results'), seq_length=1024, include_hiddens=False, include_embeddings=False, include_logits=False, include_input_ids=False, micro_batch_size=64, precision='bf16-mixed', tensor_model_parallel_size=1, pipeline_model_parallel_size=1, devices=1, num_nodes=1, prediction_interval='epoch')

Runs inference on a BioNeMo AMPLIFY model using PyTorch Lightning.

Parameters:

Name Type Description Default
data_path Path

Path to the input data CSV file

required
hf_model_name str | None

HuggingFace model name/path to load

None
initial_ckpt_path str | None

Path to the initial checkpoint to load. Only one of hf_model_name or initial_ckpt_path should be provided.

None
results_path Path

Path to save inference results

Path('results')
seq_length int

Mix/max sequence length for padding

1024
include_hiddens bool

Whether to include hidden states in output

False
include_embeddings bool

Whether to include embeddings in output

False
include_logits bool

Whether to include token logits in output

False
include_input_ids bool

Whether to include input IDs in output

False
micro_batch_size int

Micro batch size for inference

64
precision str

Precision type for inference

'bf16-mixed'
tensor_model_parallel_size int

Tensor model parallel size

1
pipeline_model_parallel_size int

Pipeline model parallel size

1
devices int

Number of devices to use

1
num_nodes int

Number of nodes for distributed inference

1
prediction_interval str

Intervals to write predictions to disk

'epoch'
Source code in bionemo/amplify/infer_amplify.py
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
@app.command()
def main(
    data_path: Path,
    hf_model_name: str | None = None,
    initial_ckpt_path: str | None = None,
    results_path: Path = Path("results"),
    seq_length: int = 1024,
    include_hiddens: bool = False,
    include_embeddings: bool = False,
    include_logits: bool = False,
    include_input_ids: bool = False,
    micro_batch_size: int = 64,
    precision: str = "bf16-mixed",
    tensor_model_parallel_size: int = 1,
    pipeline_model_parallel_size: int = 1,
    devices: int = 1,
    num_nodes: int = 1,
    prediction_interval: str = "epoch",
) -> None:
    """Runs inference on a BioNeMo AMPLIFY model using PyTorch Lightning.

    Args:
        data_path: Path to the input data CSV file
        hf_model_name: HuggingFace model name/path to load
        initial_ckpt_path: Path to the initial checkpoint to load. Only one of hf_model_name or initial_ckpt_path should
            be provided.
        results_path: Path to save inference results
        seq_length: Mix/max sequence length for padding
        include_hiddens: Whether to include hidden states in output
        include_embeddings: Whether to include embeddings in output
        include_logits: Whether to include token logits in output
        include_input_ids: Whether to include input IDs in output
        micro_batch_size: Micro batch size for inference
        precision: Precision type for inference
        tensor_model_parallel_size: Tensor model parallel size
        pipeline_model_parallel_size: Pipeline model parallel size
        devices: Number of devices to use
        num_nodes: Number of nodes for distributed inference
        prediction_interval: Intervals to write predictions to disk
    """
    # Create results directory
    os.makedirs(results_path, exist_ok=True)

    # Setup strategy and trainer
    global_batch_size = infer_global_batch_size(
        micro_batch_size=micro_batch_size,
        num_nodes=num_nodes,
        devices=devices,
        tensor_model_parallel_size=tensor_model_parallel_size,
        pipeline_model_parallel_size=pipeline_model_parallel_size,
    )

    strategy = nl.MegatronStrategy(
        tensor_model_parallel_size=tensor_model_parallel_size,
        pipeline_model_parallel_size=pipeline_model_parallel_size,
        ddp="megatron",
        find_unused_parameters=True,
    )

    prediction_writer = PredictionWriter(output_dir=results_path, write_interval=prediction_interval)

    # Cast precision to literal type expected by MegatronMixedPrecision
    assert precision in ["16-mixed", "bf16-mixed", "32"], (
        f"Precision must be one of: 16-mixed, bf16-mixed, 32, got {precision}"
    )
    precision_literal: Literal["16-mixed", "bf16-mixed", "32"] = cast(
        Literal["16-mixed", "bf16-mixed", "32"], precision
    )

    trainer = nl.Trainer(
        accelerator="gpu",
        devices=devices,
        strategy=strategy,
        num_nodes=num_nodes,
        callbacks=[prediction_writer],
        plugins=nl.MegatronMixedPrecision(precision=precision_literal),
    )

    # Setup data
    tokenizer = BioNeMoAMPLIFYTokenizer()
    dataset = InMemoryProteinDataset.from_csv(data_path, tokenizer=tokenizer, ignore_labels=True)
    datamodule = MockDataModule(
        predict_dataset=dataset,
        pad_token_id=int(tokenizer.pad_token_id),  # type: ignore
        micro_batch_size=micro_batch_size,
        global_batch_size=global_batch_size,
        min_seq_length=seq_length,
        max_seq_length=seq_length,
    )

    if initial_ckpt_path is None:
        assert hf_model_name is not None, "hf_model_name must be provided if initial_ckpt_path is not provided"
        initial_ckpt_path = get_converted_hf_checkpoint(hf_model_name, results_path)

    # Initialize model config and model
    config = AMPLIFYConfig(
        params_dtype=get_autocast_dtype(cast(PrecisionTypes, precision)),
        pipeline_dtype=get_autocast_dtype(cast(PrecisionTypes, precision)),
        autocast_dtype=get_autocast_dtype(cast(PrecisionTypes, precision)),
        include_hiddens=include_hiddens,
        include_embeddings=include_embeddings,
        include_input_ids=include_input_ids,
        skip_logits=not include_logits,
        tensor_model_parallel_size=tensor_model_parallel_size,
        pipeline_model_parallel_size=pipeline_model_parallel_size,
        initial_ckpt_path=initial_ckpt_path,
    )

    module = biobert_lightning_module(config=config, tokenizer=tokenizer)  # type: ignore

    # Run inference
    trainer.predict(module, datamodule=datamodule)