Skip to content

Utils

Utility functions for Evo2 run functions.

infer_model_type(model_size)

Infer the model type from the model size.

Source code in bionemo/evo2/run/utils.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
def infer_model_type(model_size: str) -> Literal["hyena", "mamba", "llama"]:
    """Infer the model type from the model size."""
    all_keys = set(HYENA_MODEL_OPTIONS.keys()) | set(MAMBA_MODEL_OPTIONS.keys()) | set(LLAMA_MODEL_OPTIONS.keys())
    if len(all_keys) != len(HYENA_MODEL_OPTIONS.keys()) + len(MAMBA_MODEL_OPTIONS.keys()) + len(
        LLAMA_MODEL_OPTIONS.keys()
    ):
        raise ValueError(
            "Duplicate model sizes found in HYENA_MODEL_OPTIONS, MAMBA_MODEL_OPTIONS, and LLAMA_MODEL_OPTIONS."
        )
    if model_size in HYENA_MODEL_OPTIONS:
        return "hyena"
    elif model_size in MAMBA_MODEL_OPTIONS:
        return "mamba"
    elif model_size in LLAMA_MODEL_OPTIONS:
        return "llama"
    else:
        raise ValueError(f"Invalid model size: {model_size}")

patch_eden_tokenizer(tokenizer)

Patch the Eden tokenizer to work with the Evo2 tokenizer.

Source code in bionemo/evo2/run/utils.py
25
26
27
28
29
30
31
32
33
def patch_eden_tokenizer(tokenizer):
    """Patch the Eden tokenizer to work with the Evo2 tokenizer."""
    bos_id, eos_id, sep_id, pad_id = 1, 2, 3, 0

    # Patch the private attrs so tokenizer.bos_id/.eos_id/.pad_id work
    tokenizer._bos_id = bos_id
    tokenizer._eos_id = eos_id
    tokenizer._sep_id = sep_id
    tokenizer._pad_id = pad_id