Skip to content

Convert

HFAMPLIFYImporter

Bases: ModelConnector[AutoModel, BionemoLightningModule]

Converts a Hugging Face AMPLIFY model to a NeMo AMPLIFY model.

Source code in bionemo/amplify/convert.py
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
@io.model_importer(BionemoLightningModule, "hf")
class HFAMPLIFYImporter(io.ModelConnector[AutoModel, BionemoLightningModule]):
    """Converts a Hugging Face AMPLIFY model to a NeMo AMPLIFY model."""

    def init(self) -> BionemoLightningModule:
        """Initialize the converted model."""
        return biobert_lightning_module(self.config, tokenizer=self.tokenizer)

    def apply(self, output_path: Path) -> Path:
        """Applies the transformation."""
        source = AutoModel.from_pretrained(str(self), trust_remote_code=True, torch_dtype="auto")
        target = self.init()
        trainer = self.nemo_setup(target)
        self.convert_state(source, target)
        self.nemo_save(output_path, trainer)
        teardown(trainer, target)
        return output_path

    def convert_state(self, source, target):
        """Converting HF state dict to NeMo state dict."""
        mapping = {
            "transformer_encoder.*.wo.weight": "encoder.layers.*.self_attention.linear_proj.weight",
            "transformer_encoder.*.ffn.w12.weight": "encoder.layers.*.mlp.linear_fc1.weight",
            "transformer_encoder.*.ffn.w3.weight": "encoder.layers.*.mlp.linear_fc2.weight",
            "transformer_encoder.*.attention_norm.weight": "encoder.layers.*.self_attention.linear_qkv.layer_norm_weight",
            "transformer_encoder.*.ffn_norm.weight": "encoder.layers.*.mlp.linear_fc1.layer_norm_weight",
            "layer_norm_2.weight": "encoder.final_layernorm.weight",
        }

        # lm_head.bias
        return io.apply_transforms(
            source,
            target,
            mapping=mapping,
            transforms=[_import_qkv_weight, _pad_embeddings, _pad_bias, _pad_output_weights],
        )

    @property
    def tokenizer(self) -> BioNeMoAMPLIFYTokenizer:
        """We just have the one tokenizer for AMPLIFY."""
        return BioNeMoAMPLIFYTokenizer()

    @property
    def config(self) -> AMPLIFYConfig:
        """Returns the transformed AMPLIFY config given the model tag."""
        source = HFAutoConfig.from_pretrained(str(self), trust_remote_code=True)
        output = AMPLIFYConfig(
            num_layers=source.num_hidden_layers,
            hidden_size=source.hidden_size,
            ffn_hidden_size=source.intermediate_size,
            position_embedding_type="rope",
            num_attention_heads=source.num_attention_heads,
            seq_length=source.max_length,
            fp16=(dtype_from_hf(source) == torch.float16),
            bf16=(dtype_from_hf(source) == torch.bfloat16),
            params_dtype=dtype_from_hf(source),
        )

        return output

config property

Returns the transformed AMPLIFY config given the model tag.

tokenizer property

We just have the one tokenizer for AMPLIFY.

apply(output_path)

Applies the transformation.

Source code in bionemo/amplify/convert.py
41
42
43
44
45
46
47
48
49
def apply(self, output_path: Path) -> Path:
    """Applies the transformation."""
    source = AutoModel.from_pretrained(str(self), trust_remote_code=True, torch_dtype="auto")
    target = self.init()
    trainer = self.nemo_setup(target)
    self.convert_state(source, target)
    self.nemo_save(output_path, trainer)
    teardown(trainer, target)
    return output_path

convert_state(source, target)

Converting HF state dict to NeMo state dict.

Source code in bionemo/amplify/convert.py
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
def convert_state(self, source, target):
    """Converting HF state dict to NeMo state dict."""
    mapping = {
        "transformer_encoder.*.wo.weight": "encoder.layers.*.self_attention.linear_proj.weight",
        "transformer_encoder.*.ffn.w12.weight": "encoder.layers.*.mlp.linear_fc1.weight",
        "transformer_encoder.*.ffn.w3.weight": "encoder.layers.*.mlp.linear_fc2.weight",
        "transformer_encoder.*.attention_norm.weight": "encoder.layers.*.self_attention.linear_qkv.layer_norm_weight",
        "transformer_encoder.*.ffn_norm.weight": "encoder.layers.*.mlp.linear_fc1.layer_norm_weight",
        "layer_norm_2.weight": "encoder.final_layernorm.weight",
    }

    # lm_head.bias
    return io.apply_transforms(
        source,
        target,
        mapping=mapping,
        transforms=[_import_qkv_weight, _pad_embeddings, _pad_bias, _pad_output_weights],
    )

init()

Initialize the converted model.

Source code in bionemo/amplify/convert.py
37
38
39
def init(self) -> BionemoLightningModule:
    """Initialize the converted model."""
    return biobert_lightning_module(self.config, tokenizer=self.tokenizer)

SwiGLU

Bases: Module

Mock SwiGLU module.

This module is a mock implementation of the SwiGLU module that is only used to ensure we can load weight matrices correctly from the huggingface checkpoint without installing xformers in the framework container.

Source code in bionemo/amplify/convert.py
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
class SwiGLU(nn.Module):
    """Mock SwiGLU module.

    This module is a mock implementation of the SwiGLU module that is only used to ensure we can load weight matrices
    correctly from the huggingface checkpoint without installing xformers in the framework container.
    """

    def __init__(
        self,
        in_features: int,
        hidden_features: int,
        out_features: int | None = None,
        bias: bool = True,
        *,
        _pack_weights: bool = True,
    ) -> None:
        """Create a SwiGLU module."""
        super().__init__()
        out_features = out_features or in_features
        hidden_features = hidden_features or in_features

        self.w12: nn.Linear | None = None
        if _pack_weights:
            self.w12 = nn.Linear(in_features, 2 * hidden_features, bias=bias)
        else:
            self.w12 = None
            self.w1 = nn.Linear(in_features, hidden_features, bias=bias)
            self.w2 = nn.Linear(in_features, hidden_features, bias=bias)
        self.w3 = nn.Linear(hidden_features, out_features, bias=bias)

        self.hidden_features = hidden_features
        self.out_features = out_features
        self.in_features = in_features

    def forward(self, x):  # noqa: D102
        raise NotImplementedError("This SwiGLU is a mock module and should not be used.")

__init__(in_features, hidden_features, out_features=None, bias=True, *, _pack_weights=True)

Create a SwiGLU module.

Source code in bionemo/amplify/convert.py
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
def __init__(
    self,
    in_features: int,
    hidden_features: int,
    out_features: int | None = None,
    bias: bool = True,
    *,
    _pack_weights: bool = True,
) -> None:
    """Create a SwiGLU module."""
    super().__init__()
    out_features = out_features or in_features
    hidden_features = hidden_features or in_features

    self.w12: nn.Linear | None = None
    if _pack_weights:
        self.w12 = nn.Linear(in_features, 2 * hidden_features, bias=bias)
    else:
        self.w12 = None
        self.w1 = nn.Linear(in_features, hidden_features, bias=bias)
        self.w2 = nn.Linear(in_features, hidden_features, bias=bias)
    self.w3 = nn.Linear(hidden_features, out_features, bias=bias)

    self.hidden_features = hidden_features
    self.out_features = out_features
    self.in_features = in_features

maybe_mock_xformers()

Optionally mock the xformers library to import amplify without the dependency.

Source code in bionemo/amplify/convert.py
196
197
198
199
200
201
202
203
204
def maybe_mock_xformers():
    """Optionally mock the xformers library to import amplify without the dependency."""
    if "xformers" not in sys.modules:
        ops_mock = MagicMock()
        ops_mock.memory_efficient_attention = MagicMock()
        ops_mock.SwiGLU = SwiGLU

        sys.modules["xformers"] = MagicMock()
        sys.modules["xformers.ops"] = ops_mock