Skip to content

Scdl constants

Constants and enums shared across SCDL format specification and implementation.

This module provides a single source of truth for: - Array identifiers and their filesystem mappings - Data type specifications - Backend implementations - File and metadata constants

ArrayDType

Bases: IntEnum

Numpy dtype specification for arrays in SCDL archives.

Integer values are used in the binary format for efficient storage.

Source code in bionemo/scdl/util/scdl_constants.py
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
class ArrayDType(IntEnum):
    """Numpy dtype specification for arrays in SCDL archives.

    Integer values are used in the binary format for efficient storage.
    """

    UINT8_ARRAY = 1
    UINT16_ARRAY = 2
    UINT32_ARRAY = 3
    UINT64_ARRAY = 4
    FLOAT16_ARRAY = 5
    FLOAT32_ARRAY = 6
    FLOAT64_ARRAY = 7
    STRING_ARRAY = 8
    FIXED_STRING_ARRAY = 9

    @property
    def numpy_dtype_string(self) -> str:
        """Get the corresponding NumPy dtype string."""
        dtype_map = {
            self.UINT8_ARRAY: "uint8",
            self.UINT16_ARRAY: "uint16",
            self.UINT32_ARRAY: "uint32",
            self.UINT64_ARRAY: "uint64",
            self.FLOAT16_ARRAY: "float16",
            self.FLOAT32_ARRAY: "float32",
            self.FLOAT64_ARRAY: "float64",
            self.STRING_ARRAY: "string",
            self.FIXED_STRING_ARRAY: "fixed_string",
        }
        return dtype_map[self]

    @classmethod
    def from_numpy_dtype(cls, dtype) -> "ArrayDType":
        """Convert a numpy dtype to ArrayDType enum.

        Args:
            dtype: numpy dtype object or string representation

        Returns:
            Corresponding ArrayDType enum value

        Raises:
            ValueError: If dtype is not supported
        """
        # Convert dtype object to string if needed
        if isinstance(dtype, type) and hasattr(dtype, "__name__"):
            # Handle numpy type classes like np.float32, np.uint32
            dtype_str = dtype.__name__
        elif hasattr(dtype, "name"):
            # Handle numpy dtype instances
            dtype_str = dtype.name
        elif hasattr(dtype, "dtype"):
            dtype_str = dtype.dtype.name
        else:
            dtype_str = str(dtype)

        # Map numpy dtype strings to ArrayDType enums
        dtype_map = {
            "uint8": cls.UINT8_ARRAY,
            "uint16": cls.UINT16_ARRAY,
            "uint32": cls.UINT32_ARRAY,
            "uint64": cls.UINT64_ARRAY,
            "float16": cls.FLOAT16_ARRAY,
            "float32": cls.FLOAT32_ARRAY,
            "float64": cls.FLOAT64_ARRAY,
            "object": cls.STRING_ARRAY,  # Object arrays often contain strings
            "str": cls.STRING_ARRAY,
            "<U": cls.FIXED_STRING_ARRAY,  # Unicode string arrays
        }

        # Handle variations and aliases
        if dtype_str.startswith("<U") or dtype_str.startswith(">U"):
            return cls.FIXED_STRING_ARRAY
        elif dtype_str.startswith("<f") or dtype_str.startswith(">f"):
            if "4" in dtype_str:
                return cls.FLOAT32_ARRAY
            elif "8" in dtype_str:
                return cls.FLOAT64_ARRAY
            elif "2" in dtype_str:
                return cls.FLOAT16_ARRAY
        elif dtype_str.startswith(("<u", ">u")):
            if "1" in dtype_str:
                return cls.UINT8_ARRAY
            elif "2" in dtype_str:
                return cls.UINT16_ARRAY
            elif "4" in dtype_str:
                return cls.UINT32_ARRAY
            elif "8" in dtype_str:
                return cls.UINT64_ARRAY
        elif dtype_str.startswith(("<i", ">i")):
            raise ValueError(f"Signed integer dtypes are not supported: {dtype_str}")

        # Try direct mapping
        if dtype_str in dtype_map:
            return dtype_map[dtype_str]

        # Default fallback for common types
        if "float32" in dtype_str or "f4" in dtype_str:
            return cls.FLOAT32_ARRAY
        elif "float64" in dtype_str or "f8" in dtype_str:
            return cls.FLOAT64_ARRAY
        # Do not silently map signed ints; require explicit handling upstream
        elif "int32" in dtype_str or "i4" in dtype_str or "int64" in dtype_str or "i8" in dtype_str:
            raise ValueError(f"Signed integer dtypes are not supported: {dtype_str}")

        raise ValueError(f"Unsupported numpy dtype: {dtype_str} (original: {dtype})")

numpy_dtype_string property

Get the corresponding NumPy dtype string.

from_numpy_dtype(dtype) classmethod

Convert a numpy dtype to ArrayDType enum.

Parameters:

Name Type Description Default
dtype

numpy dtype object or string representation

required

Returns:

Type Description
ArrayDType

Corresponding ArrayDType enum value

Raises:

Type Description
ValueError

If dtype is not supported

Source code in bionemo/scdl/util/scdl_constants.py
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
@classmethod
def from_numpy_dtype(cls, dtype) -> "ArrayDType":
    """Convert a numpy dtype to ArrayDType enum.

    Args:
        dtype: numpy dtype object or string representation

    Returns:
        Corresponding ArrayDType enum value

    Raises:
        ValueError: If dtype is not supported
    """
    # Convert dtype object to string if needed
    if isinstance(dtype, type) and hasattr(dtype, "__name__"):
        # Handle numpy type classes like np.float32, np.uint32
        dtype_str = dtype.__name__
    elif hasattr(dtype, "name"):
        # Handle numpy dtype instances
        dtype_str = dtype.name
    elif hasattr(dtype, "dtype"):
        dtype_str = dtype.dtype.name
    else:
        dtype_str = str(dtype)

    # Map numpy dtype strings to ArrayDType enums
    dtype_map = {
        "uint8": cls.UINT8_ARRAY,
        "uint16": cls.UINT16_ARRAY,
        "uint32": cls.UINT32_ARRAY,
        "uint64": cls.UINT64_ARRAY,
        "float16": cls.FLOAT16_ARRAY,
        "float32": cls.FLOAT32_ARRAY,
        "float64": cls.FLOAT64_ARRAY,
        "object": cls.STRING_ARRAY,  # Object arrays often contain strings
        "str": cls.STRING_ARRAY,
        "<U": cls.FIXED_STRING_ARRAY,  # Unicode string arrays
    }

    # Handle variations and aliases
    if dtype_str.startswith("<U") or dtype_str.startswith(">U"):
        return cls.FIXED_STRING_ARRAY
    elif dtype_str.startswith("<f") or dtype_str.startswith(">f"):
        if "4" in dtype_str:
            return cls.FLOAT32_ARRAY
        elif "8" in dtype_str:
            return cls.FLOAT64_ARRAY
        elif "2" in dtype_str:
            return cls.FLOAT16_ARRAY
    elif dtype_str.startswith(("<u", ">u")):
        if "1" in dtype_str:
            return cls.UINT8_ARRAY
        elif "2" in dtype_str:
            return cls.UINT16_ARRAY
        elif "4" in dtype_str:
            return cls.UINT32_ARRAY
        elif "8" in dtype_str:
            return cls.UINT64_ARRAY
    elif dtype_str.startswith(("<i", ">i")):
        raise ValueError(f"Signed integer dtypes are not supported: {dtype_str}")

    # Try direct mapping
    if dtype_str in dtype_map:
        return dtype_map[dtype_str]

    # Default fallback for common types
    if "float32" in dtype_str or "f4" in dtype_str:
        return cls.FLOAT32_ARRAY
    elif "float64" in dtype_str or "f8" in dtype_str:
        return cls.FLOAT64_ARRAY
    # Do not silently map signed ints; require explicit handling upstream
    elif "int32" in dtype_str or "i4" in dtype_str or "int64" in dtype_str or "i8" in dtype_str:
        raise ValueError(f"Signed integer dtypes are not supported: {dtype_str}")

    raise ValueError(f"Unsupported numpy dtype: {dtype_str} (original: {dtype})")

Backend

Bases: IntEnum

Backend implementations for SCDL archives.

Defines how array data is stored and accessed.

Source code in bionemo/scdl/util/scdl_constants.py
157
158
159
160
161
162
163
class Backend(IntEnum):
    """Backend implementations for SCDL archives.

    Defines how array data is stored and accessed.
    """

    MEMMAP_V0 = 1

FileNames

Bases: str, Enum

All files in SCDL archive.

This enum contains both array data files and special metadata files. For arrays, use the array_name property to get the canonical header name.

Source code in bionemo/scdl/util/scdl_constants.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
class FileNames(str, Enum):
    """All files in SCDL archive.

    This enum contains both array data files and special metadata files.
    For arrays, use the `array_name` property to get the canonical header name.
    """

    # Array data files
    DATA = "data.npy"
    ROWPTR = "row_ptr.npy"
    COLPTR = "col_ptr.npy"
    NEIGHBOR_INDICES = "neighbor_indices.npy"
    NEIGHBOR_INDICES_PTR = "neighbor_indptr.npy"
    NEIGHBOR_VALUES = "neighbor_values.npy"
    METADATA = "metadata.json"
    FEATURES = "features"
    VERSION = "version.json"
    HEADER = "header.sch"

Mode

Bases: str, Enum

Valid modes for file I/O operations.

The write append mode is 'w+' while the read append mode is 'r+'.

Source code in bionemo/scdl/util/scdl_constants.py
166
167
168
169
170
171
172
173
174
175
class Mode(str, Enum):
    """Valid modes for file I/O operations.

    The write append mode is 'w+' while the read append mode is 'r+'.
    """

    CREATE_APPEND = "w+"
    READ_APPEND = "r+"
    READ = "r"
    CREATE = "w"

NeighborSamplingStrategy

Bases: str, Enum

Valid sampling strategies for neighbor selection.

Source code in bionemo/scdl/util/scdl_constants.py
178
179
180
181
182
class NeighborSamplingStrategy(str, Enum):
    """Valid sampling strategies for neighbor selection."""

    RANDOM = "random"
    FIRST = "first"