Skip to content

Sequence model

ESM2FineTuneSeqConfig dataclass

Bases: ESM2GenericConfig[ESM2FineTuneSeqModel, BERTMLMLossWithReduction], IOMixinWithGettersSetters

ExampleConfig is a dataclass that is used to configure the model.

Timers from ModelParallelConfig are required for megatron forward compatibility.

Source code in bionemo/esm2/model/finetune/sequence_model.py
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
@dataclass
class ESM2FineTuneSeqConfig(
    ESM2GenericConfig[ESM2FineTuneSeqModel, BERTMLMLossWithReduction], iom.IOMixinWithGettersSetters
):
    """ExampleConfig is a dataclass that is used to configure the model.

    Timers from ModelParallelConfig are required for megatron forward compatibility.
    """

    model_cls: Type[ESM2FineTuneSeqModel] = ESM2FineTuneSeqModel
    # typical case is fine-tune the base biobert that doesn't have this head. If you are instead loading a checkpoint
    # that has this new head and want to keep using these weights, please drop this next line or set to []
    initial_ckpt_skip_keys_with_these_prefixes: List[str] = field(default_factory=lambda: ["regression_head"])

    task_type: Literal["classification", "regression"] = "regression"
    encoder_frozen: bool = True  # freeze encoder parameters
    mlp_ft_dropout: float = 0.25  # MLP layer dropout
    mlp_hidden_size: int = 256
    mlp_target_size: int = 1

    def get_loss_reduction_class(self) -> Type[BERTMLMLossWithReduction]:
        """Returns RegressorLossReduction class."""
        if self.task_type == "regression":
            return RegressorLossReduction
        elif self.task_type == "classification":
            return ClassifierLossReduction
        else:
            raise ValueError(f"Unsupported task_type: {self.task_type}")

get_loss_reduction_class()

Returns RegressorLossReduction class.

Source code in bionemo/esm2/model/finetune/sequence_model.py
132
133
134
135
136
137
138
139
def get_loss_reduction_class(self) -> Type[BERTMLMLossWithReduction]:
    """Returns RegressorLossReduction class."""
    if self.task_type == "regression":
        return RegressorLossReduction
    elif self.task_type == "classification":
        return ClassifierLossReduction
    else:
        raise ValueError(f"Unsupported task_type: {self.task_type}")

ESM2FineTuneSeqModel

Bases: ESM2Model

ESM2 model that is suitable for fine-tuning on downstream tasks.

Source code in bionemo/esm2/model/finetune/sequence_model.py
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
class ESM2FineTuneSeqModel(ESM2Model):
    """ESM2 model that is suitable for fine-tuning on downstream tasks."""

    def __init__(self, config, *args, post_process: bool = True, include_embeddings: bool = False, **kwargs):
        """Constructs an instance of the ESM2 model suitable for fine-tuning."""
        super().__init__(config, *args, post_process=post_process, include_embeddings=True, **kwargs)

        # freeze encoder parameters
        if config.encoder_frozen:
            for _, param in self.named_parameters():
                param.requires_grad = False

        self.include_embeddings_finetuning = (
            include_embeddings  # this include_embeddings is for the final output of fine-tuning
        )
        # If post_process is True that means that we are at the last megatron parallelism stage and we can
        #   apply the head.
        if post_process:
            self.task_type = config.task_type
            # if we are doing post process (eg pipeline last stage) then we need to add the output layers
            self.head_name = f"{self.task_type}_head"  # Example: 'regression_head' or 'classification_head'
            # Set the attribute dynamically
            setattr(self, self.head_name, MegatronMLPHead(config))

    def forward(self, *args, **kwargs) -> BioBertOutput | Tensor:
        """Inference."""
        output = super().forward(*args, **kwargs)
        # Stop early if we are not in post_process mode (for example if we are in the middle of model parallelism)
        if not self.post_process:
            return output  # we are not at the last pipeline stage so just return what the parent has
        # Double check that the output from the parent has everything we need to do prediction in this head.
        if not isinstance(output, dict) or "embeddings" not in output:
            raise ValueError(
                f"Expected to find 'embeddings' in the output, and output to be dictionary-like, found {output},\n"
                "Make sure include_embeddings=True in the call to super().__init__"
            )
        # Get the embeddings from the parent output, and pull out the [CLS] token for this task
        embeddings: Tensor = output["embeddings"]
        # Predict our 1d regression target
        task_head = getattr(self, self.head_name)
        output[f"{self.task_type}_output"] = task_head(embeddings)
        if not self.include_embeddings_finetuning:
            del output["embeddings"]
        return output

__init__(config, *args, post_process=True, include_embeddings=False, **kwargs)

Constructs an instance of the ESM2 model suitable for fine-tuning.

Source code in bionemo/esm2/model/finetune/sequence_model.py
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def __init__(self, config, *args, post_process: bool = True, include_embeddings: bool = False, **kwargs):
    """Constructs an instance of the ESM2 model suitable for fine-tuning."""
    super().__init__(config, *args, post_process=post_process, include_embeddings=True, **kwargs)

    # freeze encoder parameters
    if config.encoder_frozen:
        for _, param in self.named_parameters():
            param.requires_grad = False

    self.include_embeddings_finetuning = (
        include_embeddings  # this include_embeddings is for the final output of fine-tuning
    )
    # If post_process is True that means that we are at the last megatron parallelism stage and we can
    #   apply the head.
    if post_process:
        self.task_type = config.task_type
        # if we are doing post process (eg pipeline last stage) then we need to add the output layers
        self.head_name = f"{self.task_type}_head"  # Example: 'regression_head' or 'classification_head'
        # Set the attribute dynamically
        setattr(self, self.head_name, MegatronMLPHead(config))

forward(*args, **kwargs)

Inference.

Source code in bionemo/esm2/model/finetune/sequence_model.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
def forward(self, *args, **kwargs) -> BioBertOutput | Tensor:
    """Inference."""
    output = super().forward(*args, **kwargs)
    # Stop early if we are not in post_process mode (for example if we are in the middle of model parallelism)
    if not self.post_process:
        return output  # we are not at the last pipeline stage so just return what the parent has
    # Double check that the output from the parent has everything we need to do prediction in this head.
    if not isinstance(output, dict) or "embeddings" not in output:
        raise ValueError(
            f"Expected to find 'embeddings' in the output, and output to be dictionary-like, found {output},\n"
            "Make sure include_embeddings=True in the call to super().__init__"
        )
    # Get the embeddings from the parent output, and pull out the [CLS] token for this task
    embeddings: Tensor = output["embeddings"]
    # Predict our 1d regression target
    task_head = getattr(self, self.head_name)
    output[f"{self.task_type}_output"] = task_head(embeddings)
    if not self.include_embeddings_finetuning:
        del output["embeddings"]
    return output

MegatronMLPHead

Bases: MegatronModule

An MLP class for sequence-level regression.

Source code in bionemo/esm2/model/finetune/sequence_model.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
class MegatronMLPHead(MegatronModule):
    """An MLP class for sequence-level regression."""

    def __init__(self, config: TransformerConfig):
        """Constructor."""
        super().__init__(config)

        layer_sizes = [config.hidden_size, config.mlp_hidden_size, config.mlp_target_size]
        self.linear_layers = torch.nn.ModuleList(
            [torch.nn.Linear(i, o) for i, o in zip(layer_sizes[:-1], layer_sizes[1:])]  # noqa: RUF007
        )
        self.act = torch.nn.ReLU()
        self.dropout = torch.nn.Dropout(p=config.mlp_ft_dropout)

    def forward(self, hidden_states: Tensor) -> List[Tensor]:
        """Inference."""
        # [b, s, h]
        for layer in self.linear_layers[:-1]:
            hidden_states = self.dropout(self.act(layer(hidden_states)))

        output = self.linear_layers[-1](hidden_states)
        return output

__init__(config)

Constructor.

Source code in bionemo/esm2/model/finetune/sequence_model.py
45
46
47
48
49
50
51
52
53
54
def __init__(self, config: TransformerConfig):
    """Constructor."""
    super().__init__(config)

    layer_sizes = [config.hidden_size, config.mlp_hidden_size, config.mlp_target_size]
    self.linear_layers = torch.nn.ModuleList(
        [torch.nn.Linear(i, o) for i, o in zip(layer_sizes[:-1], layer_sizes[1:])]  # noqa: RUF007
    )
    self.act = torch.nn.ReLU()
    self.dropout = torch.nn.Dropout(p=config.mlp_ft_dropout)

forward(hidden_states)

Inference.

Source code in bionemo/esm2/model/finetune/sequence_model.py
56
57
58
59
60
61
62
63
def forward(self, hidden_states: Tensor) -> List[Tensor]:
    """Inference."""
    # [b, s, h]
    for layer in self.linear_layers[:-1]:
        hidden_states = self.dropout(self.act(layer(hidden_states)))

    output = self.linear_layers[-1](hidden_states)
    return output