Convert

`HFAMPLIFYImporter`

Bases: ModelConnector[AutoModel, BionemoLightningModule]

Converts a Hugging Face AMPLIFY model to a NeMo AMPLIFY model.

Source code in bionemo/amplify/convert.py

@io.model_importer(BionemoLightningModule, "hf")
class HFAMPLIFYImporter(io.ModelConnector[AutoModel, BionemoLightningModule]):
    """Converts a Hugging Face AMPLIFY model to a NeMo AMPLIFY model."""

    def init(self) -> BionemoLightningModule:
        """Initialize the converted model."""
        return biobert_lightning_module(self.config, tokenizer=self.tokenizer)

    def apply(self, output_path: Path) -> Path:
        """Applies the transformation."""
        source = AutoModel.from_pretrained(str(self), trust_remote_code=True, torch_dtype="auto")
        target = self.init()
        trainer = self.nemo_setup(target)
        self.convert_state(source, target)
        self.nemo_save(output_path, trainer)
        teardown(trainer, target)
        return output_path

    def convert_state(self, source, target):
        """Converting HF state dict to NeMo state dict."""
        mapping = {
            "transformer_encoder.*.wo.weight": "encoder.layers.*.self_attention.linear_proj.weight",
            "transformer_encoder.*.ffn.w12.weight": "encoder.layers.*.mlp.linear_fc1.weight",
            "transformer_encoder.*.ffn.w3.weight": "encoder.layers.*.mlp.linear_fc2.weight",
            "transformer_encoder.*.attention_norm.weight": "encoder.layers.*.self_attention.linear_qkv.layer_norm_weight",
            "transformer_encoder.*.ffn_norm.weight": "encoder.layers.*.mlp.linear_fc1.layer_norm_weight",
            "layer_norm_2.weight": "encoder.final_layernorm.weight",
        }

        # lm_head.bias
        return io.apply_transforms(
            source,
            target,
            mapping=mapping,
            transforms=[_import_qkv_weight, _pad_embeddings, _pad_bias, _pad_output_weights],
        )

    @property
    def tokenizer(self) -> BioNeMoAMPLIFYTokenizer:
        """We just have the one tokenizer for AMPLIFY."""
        return BioNeMoAMPLIFYTokenizer()

    @property
    def config(self) -> AMPLIFYConfig:
        """Returns the transformed AMPLIFY config given the model tag."""
        source = HFAutoConfig.from_pretrained(str(self), trust_remote_code=True)
        output = AMPLIFYConfig(
            num_layers=source.num_hidden_layers,
            hidden_size=source.hidden_size,
            ffn_hidden_size=source.intermediate_size,
            position_embedding_type="rope",
            num_attention_heads=source.num_attention_heads,
            seq_length=source.max_length,
            fp16=(dtype_from_hf(source) == torch.float16),
            bf16=(dtype_from_hf(source) == torch.bfloat16),
            params_dtype=dtype_from_hf(source),
        )

        return output

`config` `property`

Returns the transformed AMPLIFY config given the model tag.

`tokenizer` `property`

We just have the one tokenizer for AMPLIFY.

`apply(output_path)`

Applies the transformation.

Source code in bionemo/amplify/convert.py

def apply(self, output_path: Path) -> Path:
    """Applies the transformation."""
    source = AutoModel.from_pretrained(str(self), trust_remote_code=True, torch_dtype="auto")
    target = self.init()
    trainer = self.nemo_setup(target)
    self.convert_state(source, target)
    self.nemo_save(output_path, trainer)
    teardown(trainer, target)
    return output_path

`convert_state(source, target)`

Converting HF state dict to NeMo state dict.

Source code in bionemo/amplify/convert.py

def convert_state(self, source, target):
    """Converting HF state dict to NeMo state dict."""
    mapping = {
        "transformer_encoder.*.wo.weight": "encoder.layers.*.self_attention.linear_proj.weight",
        "transformer_encoder.*.ffn.w12.weight": "encoder.layers.*.mlp.linear_fc1.weight",
        "transformer_encoder.*.ffn.w3.weight": "encoder.layers.*.mlp.linear_fc2.weight",
        "transformer_encoder.*.attention_norm.weight": "encoder.layers.*.self_attention.linear_qkv.layer_norm_weight",
        "transformer_encoder.*.ffn_norm.weight": "encoder.layers.*.mlp.linear_fc1.layer_norm_weight",
        "layer_norm_2.weight": "encoder.final_layernorm.weight",
    }

    # lm_head.bias
    return io.apply_transforms(
        source,
        target,
        mapping=mapping,
        transforms=[_import_qkv_weight, _pad_embeddings, _pad_bias, _pad_output_weights],
    )

`init()`

Initialize the converted model.

Source code in bionemo/amplify/convert.py

def init(self) -> BionemoLightningModule:
    """Initialize the converted model."""
    return biobert_lightning_module(self.config, tokenizer=self.tokenizer)

`SwiGLU`

Bases: Module

Mock SwiGLU module.

This module is a mock implementation of the SwiGLU module that is only used to ensure we can load weight matrices correctly from the huggingface checkpoint without installing xformers in the framework container.

Source code in bionemo/amplify/convert.py

class SwiGLU(nn.Module):
    """Mock SwiGLU module.

    This module is a mock implementation of the SwiGLU module that is only used to ensure we can load weight matrices
    correctly from the huggingface checkpoint without installing xformers in the framework container.
    """

    def __init__(
        self,
        in_features: int,
        hidden_features: int,
        out_features: int | None = None,
        bias: bool = True,
        *,
        _pack_weights: bool = True,
    ) -> None:
        """Create a SwiGLU module."""
        super().__init__()
        out_features = out_features or in_features
        hidden_features = hidden_features or in_features

        self.w12: nn.Linear | None = None
        if _pack_weights:
            self.w12 = nn.Linear(in_features, 2 * hidden_features, bias=bias)
        else:
            self.w12 = None
            self.w1 = nn.Linear(in_features, hidden_features, bias=bias)
            self.w2 = nn.Linear(in_features, hidden_features, bias=bias)
        self.w3 = nn.Linear(hidden_features, out_features, bias=bias)

        self.hidden_features = hidden_features
        self.out_features = out_features
        self.in_features = in_features

    def forward(self, x):  # noqa: D102
        raise NotImplementedError("This SwiGLU is a mock module and should not be used.")

`init(in_features, hidden_features, out_features=None, bias=True, *, _pack_weights=True)`

Create a SwiGLU module.

Source code in bionemo/amplify/convert.py

def __init__(
    self,
    in_features: int,
    hidden_features: int,
    out_features: int | None = None,
    bias: bool = True,
    *,
    _pack_weights: bool = True,
) -> None:
    """Create a SwiGLU module."""
    super().__init__()
    out_features = out_features or in_features
    hidden_features = hidden_features or in_features

    self.w12: nn.Linear | None = None
    if _pack_weights:
        self.w12 = nn.Linear(in_features, 2 * hidden_features, bias=bias)
    else:
        self.w12 = None
        self.w1 = nn.Linear(in_features, hidden_features, bias=bias)
        self.w2 = nn.Linear(in_features, hidden_features, bias=bias)
    self.w3 = nn.Linear(hidden_features, out_features, bias=bias)

    self.hidden_features = hidden_features
    self.out_features = out_features
    self.in_features = in_features

`maybe_mock_xformers()`

Optionally mock the xformers library to import amplify without the dependency.

Source code in bionemo/amplify/convert.py

def maybe_mock_xformers():
    """Optionally mock the xformers library to import amplify without the dependency."""
    if "xformers" not in sys.modules:
        ops_mock = MagicMock()
        ops_mock.memory_efficient_attention = MagicMock()
        ops_mock.SwiGLU = SwiGLU

        sys.modules["xformers"] = MagicMock()
        sys.modules["xformers.ops"] = ops_mock

Convert

HFAMPLIFYImporter

config property

tokenizer property

apply(output_path)

convert_state(source, target)

init()

SwiGLU

__init__(in_features, hidden_features, out_features=None, bias=True, *, _pack_weights=True)

maybe_mock_xformers()

`HFAMPLIFYImporter`

`config` `property`

`tokenizer` `property`

`apply(output_path)`

`convert_state(source, target)`

`init()`

`SwiGLU`

`init(in_features, hidden_features, out_features=None, bias=True, *, _pack_weights=True)`

`maybe_mock_xformers()`