Skip to content

Recipes

default_adam_optimizer_with_cosine_annealing_recipe()

Default optimizer scheduler config for ESM2.

Source code in bionemo/esm2/run/recipes.py
282
283
284
def default_adam_optimizer_with_cosine_annealing_recipe() -> OptimizerSchedulerConfig:
    """Default optimizer scheduler config for ESM2."""
    return OptimizerSchedulerConfig()

esm2_3b_experiment_config(result_dir)

Experiment config for ESM2 650m.

Source code in bionemo/esm2/run/recipes.py
235
236
237
238
239
240
241
242
243
def esm2_3b_experiment_config(result_dir) -> ExperimentConfig:
    """Experiment config for ESM2 650m."""
    return ExperimentConfig(
        save_every_n_steps=50,
        result_dir=result_dir,
        experiment_name="esm2-3b-pretraining",
        # TODO should this be exposed?
        restore_from_checkpoint_path=None,
    )

esm2_3b_model_config(initial_ckpt_path=None)

Model config for ESM2 3b.

Source code in bionemo/esm2/run/recipes.py
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
def esm2_3b_model_config(initial_ckpt_path=None) -> ExposedESM2PretrainConfig:
    """Model config for ESM2 3b."""
    return ExposedESM2PretrainConfig(
        num_layers=36,
        hidden_size=2560,
        ffn_hidden_size=2560 * 4,
        num_attention_heads=40,
        seq_length=1024,
        biobert_spec_option=BiobertSpecOption.esm2_bert_layer_with_transformer_engine_spec,
        initial_ckpt_path=initial_ckpt_path,
        get_attention_mask_from_fusion=True,
        params_dtype="bf16-mixed",
        pipeline_dtype="bf16-mixed",
        autocast_dtype="bf16-mixed",
    )

esm2_3b_parallel_config()

Parallel config for ESM2 3b.

Source code in bionemo/esm2/run/recipes.py
191
192
193
194
195
196
197
198
199
200
201
def esm2_3b_parallel_config() -> ParallelConfig:
    """Parallel config for ESM2 3b."""
    return ParallelConfig(
        tensor_model_parallel_size=2,
        pipeline_model_parallel_size=1,
        # TODO: is this correct?
        accumulate_grad_batches=1,
        ddp="megatron",
        # NOTE assumes 8xGPU node. Can always edit the config.
        num_devices=8,
    )

esm2_3b_recipe(args)

Recipe for ESM2 3b.

Source code in bionemo/esm2/run/recipes.py
246
247
248
249
250
251
252
253
254
255
256
def esm2_3b_recipe(args) -> MainConfig[ExposedESM2PretrainConfig, ESM2DataConfig]:
    """Recipe for ESM2 3b."""
    return MainConfig(
        data_config=esm2_base_data_config(args),
        parallel_config=esm2_3b_parallel_config(),
        training_config=esm2_base_training_config(),  # no changes for 8m
        bionemo_model_config=esm2_3b_model_config(args.initial_ckpt_path),
        optim_config=esm2_base_optimizer_scheduler_config(),  # no changes for 8m
        experiment_config=esm2_3b_experiment_config(args.result_dir),
        wandb_config=esm2_3b_wandb_config(),
    )

esm2_3b_wandb_config()

Wandb config for ESM2 3b.

Source code in bionemo/esm2/run/recipes.py
221
222
223
224
225
226
227
228
229
230
231
232
def esm2_3b_wandb_config() -> WandbConfig:
    """Wandb config for ESM2 3b."""
    return WandbConfig(
        entity="esm2-3b_pretraining",
        project="esm2-3b_pretraining",
        group="esm2-3b",
        tags=["esm2-650m"],
        offline=True,
        anonymous=True,
        id="1",
        log_model=False,
    )

esm2_650m_experiment_config(result_dir)

Experiment config for ESM2 650m.

Source code in bionemo/esm2/run/recipes.py
167
168
169
170
171
172
173
174
175
def esm2_650m_experiment_config(result_dir) -> ExperimentConfig:
    """Experiment config for ESM2 650m."""
    return ExperimentConfig(
        save_every_n_steps=50,
        result_dir=result_dir,
        experiment_name="esm2-650m-pretraining",
        # TODO should this be exposed?
        restore_from_checkpoint_path=None,
    )

esm2_650m_model_config(initial_ckpt_path=None)

Model config for ESM2 650m.

Source code in bionemo/esm2/run/recipes.py
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
def esm2_650m_model_config(initial_ckpt_path=None) -> ExposedESM2PretrainConfig:
    """Model config for ESM2 650m."""
    return ExposedESM2PretrainConfig(
        num_layers=33,
        hidden_size=1280,
        ffn_hidden_size=1280 * 4,
        seq_length=1024,
        num_attention_heads=20,
        biobert_spec_option=BiobertSpecOption.esm2_bert_layer_with_transformer_engine_spec,
        initial_ckpt_path=initial_ckpt_path,
        get_attention_mask_from_fusion=True,
        params_dtype="bf16-mixed",
        pipeline_dtype="bf16-mixed",
        autocast_dtype="bf16-mixed",
    )

esm2_650m_recipe(args)

Recipe for ESM2 650m.

Source code in bionemo/esm2/run/recipes.py
178
179
180
181
182
183
184
185
186
187
188
def esm2_650m_recipe(args) -> MainConfig[ExposedESM2PretrainConfig, ESM2DataConfig]:
    """Recipe for ESM2 650m."""
    return MainConfig(
        data_config=esm2_base_data_config(args),
        parallel_config=esm2_base_parallel_config(),
        training_config=esm2_base_training_config(),  # no changes for 8m
        bionemo_model_config=esm2_650m_model_config(args.initial_ckpt_path),
        optim_config=esm2_base_optimizer_scheduler_config(),  # no changes for 8m
        experiment_config=esm2_650m_experiment_config(args.result_dir),
        wandb_config=esm2_650m_wandb_config(),
    )

esm2_650m_wandb_config()

Wandb config for ESM2 650m.

Source code in bionemo/esm2/run/recipes.py
153
154
155
156
157
158
159
160
161
162
163
164
def esm2_650m_wandb_config() -> WandbConfig:
    """Wandb config for ESM2 650m."""
    return WandbConfig(
        entity="esm2-650m_pretraining",
        project="esm2-650m_pretraining",
        group="esm2-650m",
        tags=["esm2", "pretraining"],
        offline=True,
        anonymous=True,
        id="1",
        log_model=False,
    )

esm2_8m_experiment_config(result_dir)

Experiment config for ESM2 8m.

Source code in bionemo/esm2/run/recipes.py
 96
 97
 98
 99
100
101
102
103
def esm2_8m_experiment_config(result_dir) -> ExperimentConfig:
    """Experiment config for ESM2 8m."""
    return ExperimentConfig(
        save_every_n_steps=50,  # default set in previous script.
        result_dir=result_dir,
        experiment_name="esm2-8m-pretraining",
        restore_from_checkpoint_path=None,
    )

esm2_8m_model_config(initial_ckpt_path=None)

Model config for ESM2 8m.

Source code in bionemo/esm2/run/recipes.py
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
def esm2_8m_model_config(initial_ckpt_path=None) -> ExposedESM2PretrainConfig:
    """Model config for ESM2 8m."""
    return ExposedESM2PretrainConfig(
        num_layers=6,
        hidden_size=320,
        ffn_hidden_size=320 * 4,
        num_attention_heads=20,
        seq_length=1024,
        biobert_spec_option=BiobertSpecOption.esm2_bert_layer_with_transformer_engine_spec,
        initial_ckpt_path=initial_ckpt_path,
        get_attention_mask_from_fusion=True,
        params_dtype="bf16-mixed",
        pipeline_dtype="bf16-mixed",
        autocast_dtype="bf16-mixed",
    )

esm2_8m_recipe(args)

Recipe for ESM2 8m.

Source code in bionemo/esm2/run/recipes.py
123
124
125
126
127
128
129
130
131
132
133
def esm2_8m_recipe(args) -> MainConfig[ExposedESM2PretrainConfig, ESM2DataConfig]:
    """Recipe for ESM2 8m."""
    return MainConfig(
        data_config=esm2_base_data_config(args),
        parallel_config=esm2_base_parallel_config(),
        training_config=esm2_base_training_config(),  # no changes for 8m
        bionemo_model_config=esm2_8m_model_config(args.initial_ckpt_path),
        optim_config=esm2_base_optimizer_scheduler_config(),  # no changes for 8m
        experiment_config=esm2_8m_experiment_config(args.result_dir),
        wandb_config=esm2_8m_wandb_config(),
    )

esm2_8m_wandb_config()

Wandb config for ESM2 8m.

Source code in bionemo/esm2/run/recipes.py
81
82
83
84
85
86
87
88
89
90
91
92
93
def esm2_8m_wandb_config() -> WandbConfig:
    """Wandb config for ESM2 8m."""
    wandb_config = WandbConfig(
        entity="esm2-8m_pretraining",
        project="esm2-8m_pretraining",
        group="esm2-8m",
        tags=["esm2", "pretraining"],
        offline=True,
        anonymous=True,
        id="1",
        log_model=False,
    )
    return wandb_config

esm2_base_data_config(args)

Base data config for ESM2.

Source code in bionemo/esm2/run/recipes.py
66
67
68
69
70
71
72
73
74
75
76
77
78
def esm2_base_data_config(args) -> ESM2DataConfig:
    """Base data config for ESM2."""
    data_config = ESM2DataConfig(
        min_seq_length=1024,
        max_seq_length=1024,
        micro_batch_size=1,
        num_dataset_workers=8,
        train_cluster_path=args.train_cluster_path,
        train_database_path=args.train_database_path,
        valid_cluster_path=args.valid_cluster_path,
        valid_database_path=args.valid_database_path,
    )
    return data_config

esm2_base_optimizer_scheduler_config()

Base optimizer scheduler config for ESM2.

Source code in bionemo/esm2/run/recipes.py
47
48
49
50
51
def esm2_base_optimizer_scheduler_config() -> OptimizerSchedulerConfig:
    """Base optimizer scheduler config for ESM2."""
    return OptimizerSchedulerConfig(
        optimizer="adam", lr=4e-4, interval="step", monitor="val_loss", lr_scheduler="warmup_anneal", warmup_steps=2000
    )

esm2_base_parallel_config()

Base parallel config for ESM2.

Source code in bionemo/esm2/run/recipes.py
54
55
56
57
58
59
60
61
62
63
def esm2_base_parallel_config() -> ParallelConfig:
    """Base parallel config for ESM2."""
    return ParallelConfig(
        tensor_model_parallel_size=1,
        pipeline_model_parallel_size=1,
        accumulate_grad_batches=1,
        ddp="megatron",
        num_devices=1,
        num_nodes=1,
    )

esm2_base_training_config()

Base training config for ESM2.

Source code in bionemo/esm2/run/recipes.py
36
37
38
39
40
41
42
43
44
def esm2_base_training_config() -> TrainingConfig:
    """Base training config for ESM2."""
    return TrainingConfig(
        max_steps=500000,
        limit_val_batches=1.0,
        val_check_interval=10_000,
        precision="bf16-mixed",
        include_perplexity=True,
    )

esm2_tiny_model_config(seq_length=2048, precision='bf16-mixed', nemo1_init_path=None, initial_ckpt_path=None, biobert_spec_option=BiobertSpecOption.esm2_bert_layer_with_transformer_engine_spec, variable_seq_lengths=False)

Model config for ESM2 tiny, used for testing.

Source code in bionemo/esm2/run/recipes.py
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
def esm2_tiny_model_config(
    seq_length: int = 2048,
    precision: PrecisionTypes = "bf16-mixed",
    nemo1_init_path: Optional[str] = None,
    initial_ckpt_path: Optional[str] = None,
    biobert_spec_option: BiobertSpecOption = BiobertSpecOption.esm2_bert_layer_with_transformer_engine_spec,
    variable_seq_lengths: bool = False,
) -> ExposedESM2PretrainConfig:
    """Model config for ESM2 tiny, used for testing."""
    return ExposedESM2PretrainConfig(
        seq_length=seq_length,
        num_layers=2,
        hidden_size=32,
        num_attention_heads=2,
        ffn_hidden_size=4 * 32,
        params_dtype=precision,
        pipeline_dtype=precision,
        autocast_dtype=precision,
        biobert_spec_option=biobert_spec_option,
        get_attention_mask_from_fusion=True,
        nemo1_ckpt_path=str(nemo1_init_path) if nemo1_init_path is not None else None,
        # handle checkpoint resumption here rather than auto-resume so this supports fine-tuning capabilities
        initial_ckpt_path=str(initial_ckpt_path) if initial_ckpt_path is not None else None,
        variable_seq_lengths=variable_seq_lengths,
    )

esm2_tiny_test_recipe(args)

Test recipe for ESM2 tiny, used for testing.

Source code in bionemo/esm2/run/recipes.py
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
def esm2_tiny_test_recipe(args):
    """Test recipe for ESM2 tiny, used for testing."""
    parallel_config = simple_parallel_recipe()
    training_config = tiny_train_config_recipe()

    data_config = ESM2DataConfig(
        min_seq_length=128,
        max_seq_length=128,
        micro_batch_size=2,
        num_dataset_workers=1,
        train_cluster_path=args.train_cluster_path,
        train_database_path=args.train_database_path,
        valid_cluster_path=args.valid_cluster_path,
        valid_database_path=args.valid_database_path,
    )
    bionemo_model_config = esm2_tiny_model_config(
        seq_length=data_config.max_seq_length, initial_ckpt_path=args.initial_ckpt_path
    )

    optim_config = default_adam_optimizer_with_cosine_annealing_recipe()
    experiment_config = experiment_config_recipe(args.result_dir)
    wandb_config = WandbConfig(
        project="bionemo2-demo",
        entity="nvidia",
        offline=True,
        tags=[],
        group="dev",
        id="dev",
        log_model=False,
        anonymous=True,
    )
    main_config = MainConfig[ExposedESM2PretrainConfig, ESM2DataConfig](
        data_config=data_config,
        parallel_config=parallel_config,
        training_config=training_config,
        bionemo_model_config=bionemo_model_config,
        optim_config=optim_config,
        experiment_config=experiment_config,
        wandb_config=wandb_config,
    )
    return main_config

experiment_config_recipe(result_dir='./results')

Experiment config for ESM2.

Source code in bionemo/esm2/run/recipes.py
287
288
289
290
291
292
293
294
295
296
297
298
def experiment_config_recipe(result_dir="./results") -> ExperimentConfig:
    """Experiment config for ESM2."""
    return ExperimentConfig(
        save_every_n_steps=100,
        result_dir=result_dir,
        experiment_name="default_experiment",
        restore_from_checkpoint_path=None,
        save_last_checkpoint=True,
        metric_to_monitor_for_checkpoints="val_loss",
        save_top_k=2,
        create_tensorboard_logger=False,
    )

simple_parallel_recipe(tensor_model_parallel_size=1, pipeline_model_parallel_size=1, num_devices=1, accumulate_grad_batches=1)

Simple parallel recipe for ESM2.

Source code in bionemo/esm2/run/recipes.py
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
def simple_parallel_recipe(
    tensor_model_parallel_size: int = 1,
    pipeline_model_parallel_size: int = 1,
    num_devices: int = 1,
    accumulate_grad_batches: int = 1,
) -> ParallelConfig:
    """Simple parallel recipe for ESM2."""
    assert (
        num_devices >= tensor_model_parallel_size * pipeline_model_parallel_size
    ), "devices must be divisible by tensor_model_parallel_size * pipeline_model_parallel_size"
    return ParallelConfig(
        tensor_model_parallel_size=tensor_model_parallel_size,
        pipeline_model_parallel_size=pipeline_model_parallel_size,
        num_devices=num_devices,
        accumulate_grad_batches=accumulate_grad_batches,
    )

tiny_train_config_recipe()

Tiny training config for ESM2.

Source code in bionemo/esm2/run/recipes.py
277
278
279
def tiny_train_config_recipe() -> TrainingConfig:
    """Tiny training config for ESM2."""
    return TrainingConfig(max_steps=10, limit_val_batches=2, val_check_interval=2)