Skip to content

Recipes

ESM2Recipes

Bases: BaseModel

Pre-baked recipes for ESM2.

THIS PYDANTIC MODEL IS NOT MEANT FOR SERIALIZATION. Only used to facilitate argparse. Each recipe should take args as the only argument. We use partials so we can provide this information at runtime. Add new recipes to this model.

Source code in bionemo/esm2/run/recipes.py
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
class ESM2Recipes(BaseModel):
    """Pre-baked recipes for ESM2.

    THIS PYDANTIC MODEL IS NOT MEANT FOR SERIALIZATION. Only used to facilitate argparse. Each recipe should take `args`
    as the only argument. We use partials so we can provide this information at runtime. Add new recipes to this model.
    """

    # Use partials so we can still parameterize the recipes from the CLI (e.g. data paths.)
    esm2_tiny_test_recipe: Callable[[argparse.Namespace], MainConfig[ExposedESM2PretrainConfig, ESM2DataConfig]] = (
        partial(esm2_tiny_test_recipe)
    )
    esm2_8m_recipe: Callable[[argparse.Namespace], MainConfig[ExposedESM2PretrainConfig, ESM2DataConfig]] = partial(
        esm2_8m_recipe
    )
    esm2_650m_recipe: Callable[[argparse.Namespace], MainConfig[ExposedESM2PretrainConfig, ESM2DataConfig]] = partial(
        esm2_650m_recipe
    )
    esm2_3b_recipe: Callable[[argparse.Namespace], MainConfig[ExposedESM2PretrainConfig, ESM2DataConfig]] = partial(
        esm2_3b_recipe
    )

default_adam_optimizer_with_cosine_annealing_recipe(max_steps=None)

Default optimizer scheduler config for ESM2.

Source code in bionemo/esm2/run/recipes.py
291
292
293
def default_adam_optimizer_with_cosine_annealing_recipe(max_steps: Optional[int] = None) -> OptimizerSchedulerConfig:
    """Default optimizer scheduler config for ESM2."""
    return OptimizerSchedulerConfig(max_steps=max_steps)

esm2_3b_experiment_config(result_dir)

Experiment config for ESM2 650m.

Source code in bionemo/esm2/run/recipes.py
244
245
246
247
248
249
250
251
252
def esm2_3b_experiment_config(result_dir) -> ExperimentConfig:
    """Experiment config for ESM2 650m."""
    return ExperimentConfig(
        save_every_n_steps=50,
        result_dir=result_dir,
        experiment_name="esm2-3b-pretraining",
        # TODO should this be exposed?
        restore_from_checkpoint_path=None,
    )

esm2_3b_model_config(initial_ckpt_path=None)

Model config for ESM2 3b.

Source code in bionemo/esm2/run/recipes.py
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
def esm2_3b_model_config(initial_ckpt_path=None) -> ExposedESM2PretrainConfig:
    """Model config for ESM2 3b."""
    return ExposedESM2PretrainConfig(
        num_layers=36,
        hidden_size=2560,
        ffn_hidden_size=2560 * 4,
        num_attention_heads=40,
        seq_length=1024,
        biobert_spec_option=BiobertSpecOption.esm2_bert_layer_with_transformer_engine_spec,
        initial_ckpt_path=initial_ckpt_path,
        get_attention_mask_from_fusion=True,
        params_dtype="bf16-mixed",
        pipeline_dtype="bf16-mixed",
        autocast_dtype="bf16-mixed",
    )

esm2_3b_parallel_config()

Parallel config for ESM2 3b.

Source code in bionemo/esm2/run/recipes.py
200
201
202
203
204
205
206
207
208
209
210
def esm2_3b_parallel_config() -> ParallelConfig:
    """Parallel config for ESM2 3b."""
    return ParallelConfig(
        tensor_model_parallel_size=2,
        pipeline_model_parallel_size=1,
        # TODO: is this correct?
        accumulate_grad_batches=1,
        ddp="megatron",
        # NOTE assumes 8xGPU node. Can always edit the config.
        num_devices=8,
    )

esm2_3b_recipe(args)

Recipe for ESM2 3b.

Source code in bionemo/esm2/run/recipes.py
255
256
257
258
259
260
261
262
263
264
265
def esm2_3b_recipe(args) -> MainConfig[ExposedESM2PretrainConfig, ESM2DataConfig]:
    """Recipe for ESM2 3b."""
    return MainConfig(
        data_config=esm2_base_data_config(args),
        parallel_config=esm2_3b_parallel_config(),
        training_config=esm2_base_training_config(max_steps=args.max_steps),  # no changes for 8m
        bionemo_model_config=esm2_3b_model_config(args.initial_ckpt_path),
        optim_config=esm2_base_optimizer_scheduler_config(max_steps=args.scheduler_max_steps),  # no changes for 8m
        experiment_config=esm2_3b_experiment_config(args.result_dir),
        wandb_config=esm2_3b_wandb_config(),
    )

esm2_3b_wandb_config()

Wandb config for ESM2 3b.

Source code in bionemo/esm2/run/recipes.py
230
231
232
233
234
235
236
237
238
239
240
241
def esm2_3b_wandb_config() -> WandbConfig:
    """Wandb config for ESM2 3b."""
    return WandbConfig(
        entity="esm2-3b_pretraining",
        project="esm2-3b_pretraining",
        group="esm2-3b",
        tags=["esm2-650m"],
        offline=True,
        anonymous=True,
        id="1",
        log_model=False,
    )

esm2_650m_experiment_config(result_dir)

Experiment config for ESM2 650m.

Source code in bionemo/esm2/run/recipes.py
176
177
178
179
180
181
182
183
184
def esm2_650m_experiment_config(result_dir) -> ExperimentConfig:
    """Experiment config for ESM2 650m."""
    return ExperimentConfig(
        save_every_n_steps=50,
        result_dir=result_dir,
        experiment_name="esm2-650m-pretraining",
        # TODO should this be exposed?
        restore_from_checkpoint_path=None,
    )

esm2_650m_model_config(initial_ckpt_path=None)

Model config for ESM2 650m.

Source code in bionemo/esm2/run/recipes.py
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
def esm2_650m_model_config(initial_ckpt_path=None) -> ExposedESM2PretrainConfig:
    """Model config for ESM2 650m."""
    return ExposedESM2PretrainConfig(
        num_layers=33,
        hidden_size=1280,
        ffn_hidden_size=1280 * 4,
        seq_length=1024,
        num_attention_heads=20,
        biobert_spec_option=BiobertSpecOption.esm2_bert_layer_with_transformer_engine_spec,
        initial_ckpt_path=initial_ckpt_path,
        get_attention_mask_from_fusion=True,
        params_dtype="bf16-mixed",
        pipeline_dtype="bf16-mixed",
        autocast_dtype="bf16-mixed",
    )

esm2_650m_recipe(args)

Recipe for ESM2 650m.

Source code in bionemo/esm2/run/recipes.py
187
188
189
190
191
192
193
194
195
196
197
def esm2_650m_recipe(args) -> MainConfig[ExposedESM2PretrainConfig, ESM2DataConfig]:
    """Recipe for ESM2 650m."""
    return MainConfig(
        data_config=esm2_base_data_config(args),
        parallel_config=esm2_base_parallel_config(),
        training_config=esm2_base_training_config(max_steps=args.max_steps),  # no changes for 8m
        bionemo_model_config=esm2_650m_model_config(args.initial_ckpt_path),
        optim_config=esm2_base_optimizer_scheduler_config(max_steps=args.scheduler_max_steps),  # no changes for 8m
        experiment_config=esm2_650m_experiment_config(args.result_dir),
        wandb_config=esm2_650m_wandb_config(),
    )

esm2_650m_wandb_config()

Wandb config for ESM2 650m.

Source code in bionemo/esm2/run/recipes.py
162
163
164
165
166
167
168
169
170
171
172
173
def esm2_650m_wandb_config() -> WandbConfig:
    """Wandb config for ESM2 650m."""
    return WandbConfig(
        entity="esm2-650m_pretraining",
        project="esm2-650m_pretraining",
        group="esm2-650m",
        tags=["esm2", "pretraining"],
        offline=True,
        anonymous=True,
        id="1",
        log_model=False,
    )

esm2_8m_experiment_config(result_dir)

Experiment config for ESM2 8m.

Source code in bionemo/esm2/run/recipes.py
105
106
107
108
109
110
111
112
def esm2_8m_experiment_config(result_dir) -> ExperimentConfig:
    """Experiment config for ESM2 8m."""
    return ExperimentConfig(
        save_every_n_steps=50,  # default set in previous script.
        result_dir=result_dir,
        experiment_name="esm2-8m-pretraining",
        restore_from_checkpoint_path=None,
    )

esm2_8m_model_config(initial_ckpt_path=None)

Model config for ESM2 8m.

Source code in bionemo/esm2/run/recipes.py
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
def esm2_8m_model_config(initial_ckpt_path=None) -> ExposedESM2PretrainConfig:
    """Model config for ESM2 8m."""
    return ExposedESM2PretrainConfig(
        num_layers=6,
        hidden_size=320,
        ffn_hidden_size=320 * 4,
        num_attention_heads=20,
        seq_length=1024,
        biobert_spec_option=BiobertSpecOption.esm2_bert_layer_with_transformer_engine_spec,
        initial_ckpt_path=initial_ckpt_path,
        get_attention_mask_from_fusion=True,
        params_dtype="bf16-mixed",
        pipeline_dtype="bf16-mixed",
        autocast_dtype="bf16-mixed",
    )

esm2_8m_recipe(args)

Recipe for ESM2 8m.

Source code in bionemo/esm2/run/recipes.py
132
133
134
135
136
137
138
139
140
141
142
def esm2_8m_recipe(args) -> MainConfig[ExposedESM2PretrainConfig, ESM2DataConfig]:
    """Recipe for ESM2 8m."""
    return MainConfig(
        data_config=esm2_base_data_config(args),
        parallel_config=esm2_base_parallel_config(),
        training_config=esm2_base_training_config(max_steps=args.max_steps),  # no changes for 8m
        bionemo_model_config=esm2_8m_model_config(args.initial_ckpt_path),
        optim_config=esm2_base_optimizer_scheduler_config(max_steps=args.scheduler_max_steps),  # no changes for 8m
        experiment_config=esm2_8m_experiment_config(args.result_dir),
        wandb_config=esm2_8m_wandb_config(),
    )

esm2_8m_wandb_config()

Wandb config for ESM2 8m.

Source code in bionemo/esm2/run/recipes.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
def esm2_8m_wandb_config() -> WandbConfig:
    """Wandb config for ESM2 8m."""
    wandb_config = WandbConfig(
        entity="esm2-8m_pretraining",
        project="esm2-8m_pretraining",
        group="esm2-8m",
        tags=["esm2", "pretraining"],
        offline=True,
        anonymous=True,
        id="1",
        log_model=False,
    )
    return wandb_config

esm2_base_data_config(args)

Base data config for ESM2.

Source code in bionemo/esm2/run/recipes.py
75
76
77
78
79
80
81
82
83
84
85
86
87
def esm2_base_data_config(args) -> ESM2DataConfig:
    """Base data config for ESM2."""
    data_config = ESM2DataConfig(
        min_seq_length=1024,
        max_seq_length=1024,
        micro_batch_size=1,
        num_dataset_workers=8,
        train_cluster_path=args.train_cluster_path,
        train_database_path=args.train_database_path,
        valid_cluster_path=args.valid_cluster_path,
        valid_database_path=args.valid_database_path,
    )
    return data_config

esm2_base_optimizer_scheduler_config(max_steps=None)

Base optimizer scheduler config for ESM2.

Source code in bionemo/esm2/run/recipes.py
50
51
52
53
54
55
56
57
58
59
60
def esm2_base_optimizer_scheduler_config(max_steps: Optional[int] = None) -> OptimizerSchedulerConfig:
    """Base optimizer scheduler config for ESM2."""
    return OptimizerSchedulerConfig(
        optimizer="adam",
        lr=4e-4,
        interval="step",
        monitor="val_loss",
        lr_scheduler="warmup_anneal",
        warmup_steps=2000,
        max_steps=max_steps,
    )

esm2_base_parallel_config()

Base parallel config for ESM2.

Source code in bionemo/esm2/run/recipes.py
63
64
65
66
67
68
69
70
71
72
def esm2_base_parallel_config() -> ParallelConfig:
    """Base parallel config for ESM2."""
    return ParallelConfig(
        tensor_model_parallel_size=1,
        pipeline_model_parallel_size=1,
        accumulate_grad_batches=1,
        ddp="megatron",
        num_devices=1,
        num_nodes=1,
    )

esm2_base_training_config(max_steps=500000)

Base training config for ESM2.

Source code in bionemo/esm2/run/recipes.py
39
40
41
42
43
44
45
46
47
def esm2_base_training_config(max_steps: int = 500000) -> TrainingConfig:
    """Base training config for ESM2."""
    return TrainingConfig(
        max_steps=max_steps,
        limit_val_batches=1.0,
        val_check_interval=10_000,
        precision="bf16-mixed",
        include_perplexity=True,
    )

esm2_tiny_model_config(seq_length=2048, precision='bf16-mixed', nemo1_init_path=None, initial_ckpt_path=None, biobert_spec_option=BiobertSpecOption.esm2_bert_layer_with_transformer_engine_spec, variable_seq_lengths=False)

Model config for ESM2 tiny, used for testing.

Source code in bionemo/esm2/run/recipes.py
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
def esm2_tiny_model_config(
    seq_length: int = 2048,
    precision: PrecisionTypes = "bf16-mixed",
    nemo1_init_path: Optional[str] = None,
    initial_ckpt_path: Optional[str] = None,
    biobert_spec_option: BiobertSpecOption = BiobertSpecOption.esm2_bert_layer_with_transformer_engine_spec,
    variable_seq_lengths: bool = False,
) -> ExposedESM2PretrainConfig:
    """Model config for ESM2 tiny, used for testing."""
    return ExposedESM2PretrainConfig(
        seq_length=seq_length,
        num_layers=2,
        hidden_size=32,
        num_attention_heads=2,
        ffn_hidden_size=4 * 32,
        params_dtype=precision,
        pipeline_dtype=precision,
        autocast_dtype=precision,
        biobert_spec_option=biobert_spec_option,
        get_attention_mask_from_fusion=True,
        nemo1_ckpt_path=str(nemo1_init_path) if nemo1_init_path is not None else None,
        # handle checkpoint resumption here rather than auto-resume so this supports fine-tuning capabilities
        initial_ckpt_path=str(initial_ckpt_path) if initial_ckpt_path is not None else None,
        variable_seq_lengths=variable_seq_lengths,
    )

esm2_tiny_test_recipe(args)

Test recipe for ESM2 tiny, used for testing.

Source code in bionemo/esm2/run/recipes.py
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
def esm2_tiny_test_recipe(args):
    """Test recipe for ESM2 tiny, used for testing."""
    parallel_config = simple_parallel_recipe()
    training_config = tiny_train_config_recipe()

    data_config = ESM2DataConfig(
        min_seq_length=128,
        max_seq_length=128,
        micro_batch_size=2,
        num_dataset_workers=1,
        train_cluster_path=args.train_cluster_path,
        train_database_path=args.train_database_path,
        valid_cluster_path=args.valid_cluster_path,
        valid_database_path=args.valid_database_path,
    )
    bionemo_model_config = esm2_tiny_model_config(
        seq_length=data_config.max_seq_length, initial_ckpt_path=args.initial_ckpt_path
    )

    optim_config = default_adam_optimizer_with_cosine_annealing_recipe(max_steps=args.scheduler_max_steps)
    experiment_config = experiment_config_recipe(args.result_dir)
    wandb_config = WandbConfig(
        project="bionemo2-demo",
        entity="nvidia",
        offline=True,
        tags=[],
        group="dev",
        id="dev",
        log_model=False,
        anonymous=True,
    )
    main_config = MainConfig[ExposedESM2PretrainConfig, ESM2DataConfig](
        data_config=data_config,
        parallel_config=parallel_config,
        training_config=training_config,
        bionemo_model_config=bionemo_model_config,
        optim_config=optim_config,
        experiment_config=experiment_config,
        wandb_config=wandb_config,
    )
    return main_config

experiment_config_recipe(result_dir='./results')

Experiment config for ESM2.

Source code in bionemo/esm2/run/recipes.py
296
297
298
299
300
301
302
303
304
305
306
307
def experiment_config_recipe(result_dir="./results") -> ExperimentConfig:
    """Experiment config for ESM2."""
    return ExperimentConfig(
        save_every_n_steps=100,
        result_dir=result_dir,
        experiment_name="default_experiment",
        restore_from_checkpoint_path=None,
        save_last_checkpoint=True,
        metric_to_monitor_for_checkpoints="val_loss",
        save_top_k=2,
        create_tensorboard_logger=False,
    )

simple_parallel_recipe(tensor_model_parallel_size=1, pipeline_model_parallel_size=1, num_devices=1, accumulate_grad_batches=1)

Simple parallel recipe for ESM2.

Source code in bionemo/esm2/run/recipes.py
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
def simple_parallel_recipe(
    tensor_model_parallel_size: int = 1,
    pipeline_model_parallel_size: int = 1,
    num_devices: int = 1,
    accumulate_grad_batches: int = 1,
) -> ParallelConfig:
    """Simple parallel recipe for ESM2."""
    assert (
        num_devices >= tensor_model_parallel_size * pipeline_model_parallel_size
    ), "devices must be divisible by tensor_model_parallel_size * pipeline_model_parallel_size"
    return ParallelConfig(
        tensor_model_parallel_size=tensor_model_parallel_size,
        pipeline_model_parallel_size=pipeline_model_parallel_size,
        num_devices=num_devices,
        accumulate_grad_batches=accumulate_grad_batches,
    )

tiny_train_config_recipe()

Tiny training config for ESM2.

Source code in bionemo/esm2/run/recipes.py
286
287
288
def tiny_train_config_recipe() -> TrainingConfig:
    """Tiny training config for ESM2."""
    return TrainingConfig(max_steps=10, limit_val_batches=2, val_check_interval=2)