Skip to content

Single cell row dataset

SingleCellRowDataset

Bases: SingleCellRowDatasetCore, Dataset

One row in an ann dataframe (hdf5 file with a spare array format).

Source code in bionemo/scdl/api/single_cell_row_dataset.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
class SingleCellRowDataset(SingleCellRowDatasetCore, Dataset):
    """One row in an ann dataframe (hdf5 file with a spare array format)."""

    @abstractmethod
    def load(self, data_path: str) -> None:
        """Loads the data from datapath.

        Calls to __len__ and __getitem__ Must be valid after a call to
        this method.
        """
        raise NotImplementedError()

    @abstractmethod
    def save(self, data_path: str) -> None:
        """Saves the class to an archive at datapath."""
        raise NotImplementedError()

    pass

load(data_path) abstractmethod

Loads the data from datapath.

Calls to len and getitem Must be valid after a call to this method.

Source code in bionemo/scdl/api/single_cell_row_dataset.py
 93
 94
 95
 96
 97
 98
 99
100
@abstractmethod
def load(self, data_path: str) -> None:
    """Loads the data from datapath.

    Calls to __len__ and __getitem__ Must be valid after a call to
    this method.
    """
    raise NotImplementedError()

save(data_path) abstractmethod

Saves the class to an archive at datapath.

Source code in bionemo/scdl/api/single_cell_row_dataset.py
102
103
104
105
@abstractmethod
def save(self, data_path: str) -> None:
    """Saves the class to an archive at datapath."""
    raise NotImplementedError()

SingleCellRowDatasetCore

Bases: ABC

Implements the actual ann data-like interface.

Source code in bionemo/scdl/api/single_cell_row_dataset.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
class SingleCellRowDatasetCore(ABC):
    """Implements the actual ann data-like interface."""

    @abstractmethod
    def load_h5ad(self, h5ad_path: str) -> None:
        """Loads an H5AD file and converts it into the backing representation.

        Calls to __len__ and __getitem__ Must be valid after a call to
        this method.
        """
        raise NotImplementedError()

    @abstractmethod
    def number_nonzero_values(self) -> int:
        """Return the number of non-zero values in the data."""
        raise NotImplementedError()

    @abstractmethod
    def number_of_values(self) -> int:
        """Return the total number of values in the data."""
        raise NotImplementedError()

    @abstractmethod
    def number_of_rows(self) -> int:
        """Return the number of rows in the data."""
        raise NotImplementedError()

    @abstractmethod
    def shape(self) -> Tuple[int, List[int]]:
        """Returns the shape of the object, which may be ragged.

        A ragged dataset is where the number and dimension of features
        can be different at every row.
        """
        raise NotImplementedError()

    def sparsity(self) -> float:
        """Return the sparsity of the underlying data.

        Sparsity is defined as the fraction of zero values in the data.
        It is within the range [0, 1.0]. If there are no values, the
        sparsity is defined as 0.0.
        """
        total_values = self.number_of_values()
        if total_values == 0:
            return 0.0

        nonzero_values = self.number_nonzero_values()
        zero_values = total_values - nonzero_values
        sparsity_value = zero_values / total_values
        return sparsity_value

    @abstractmethod
    def version(self) -> str:
        """Returns a version number.

        (following <major>.<minor>.<point> convention).
        """
        pass

load_h5ad(h5ad_path) abstractmethod

Loads an H5AD file and converts it into the backing representation.

Calls to len and getitem Must be valid after a call to this method.

Source code in bionemo/scdl/api/single_cell_row_dataset.py
32
33
34
35
36
37
38
39
@abstractmethod
def load_h5ad(self, h5ad_path: str) -> None:
    """Loads an H5AD file and converts it into the backing representation.

    Calls to __len__ and __getitem__ Must be valid after a call to
    this method.
    """
    raise NotImplementedError()

number_nonzero_values() abstractmethod

Return the number of non-zero values in the data.

Source code in bionemo/scdl/api/single_cell_row_dataset.py
41
42
43
44
@abstractmethod
def number_nonzero_values(self) -> int:
    """Return the number of non-zero values in the data."""
    raise NotImplementedError()

number_of_rows() abstractmethod

Return the number of rows in the data.

Source code in bionemo/scdl/api/single_cell_row_dataset.py
51
52
53
54
@abstractmethod
def number_of_rows(self) -> int:
    """Return the number of rows in the data."""
    raise NotImplementedError()

number_of_values() abstractmethod

Return the total number of values in the data.

Source code in bionemo/scdl/api/single_cell_row_dataset.py
46
47
48
49
@abstractmethod
def number_of_values(self) -> int:
    """Return the total number of values in the data."""
    raise NotImplementedError()

shape() abstractmethod

Returns the shape of the object, which may be ragged.

A ragged dataset is where the number and dimension of features can be different at every row.

Source code in bionemo/scdl/api/single_cell_row_dataset.py
56
57
58
59
60
61
62
63
@abstractmethod
def shape(self) -> Tuple[int, List[int]]:
    """Returns the shape of the object, which may be ragged.

    A ragged dataset is where the number and dimension of features
    can be different at every row.
    """
    raise NotImplementedError()

sparsity()

Return the sparsity of the underlying data.

Sparsity is defined as the fraction of zero values in the data. It is within the range [0, 1.0]. If there are no values, the sparsity is defined as 0.0.

Source code in bionemo/scdl/api/single_cell_row_dataset.py
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
def sparsity(self) -> float:
    """Return the sparsity of the underlying data.

    Sparsity is defined as the fraction of zero values in the data.
    It is within the range [0, 1.0]. If there are no values, the
    sparsity is defined as 0.0.
    """
    total_values = self.number_of_values()
    if total_values == 0:
        return 0.0

    nonzero_values = self.number_nonzero_values()
    zero_values = total_values - nonzero_values
    sparsity_value = zero_values / total_values
    return sparsity_value

version() abstractmethod

Returns a version number.

(following .. convention).

Source code in bionemo/scdl/api/single_cell_row_dataset.py
81
82
83
84
85
86
87
@abstractmethod
def version(self) -> str:
    """Returns a version number.

    (following <major>.<minor>.<point> convention).
    """
    pass