Skip to content

Dependency graph

build_dependency_graph(base_dir, directories)

Build a dependency graph for all sub-packages.

Source code in bionemo/fw/dependency_graph.py
64
65
66
67
68
69
70
71
72
73
74
75
76
def build_dependency_graph(base_dir, directories):
    """Build a dependency graph for all sub-packages."""
    pyproject_files = []
    for directory in directories:
        pyproject_files.append(base_dir / directory / "pyproject.toml")
    dependency_graph = defaultdict(dict)

    for pyproject_file in pyproject_files:
        package_name, dependencies = parse_dependencies(pyproject_file)
        if package_name:
            dependency_graph[package_name] = dependencies

    return dependency_graph

find_bionemo_subpackages(base_dir, directories)

Find all unique bionemo.<name> imports in Python files within a directory.

Source code in bionemo/fw/dependency_graph.py
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
def find_bionemo_subpackages(base_dir, directories):
    """Find all unique `bionemo.<name>` imports in Python files within a directory."""
    bionemo_import_pattern = re.compile(
        r"^\s*(?:from|import)\s+bionemo\.([a-zA-Z_][a-zA-Z0-9_]*)(?:\s+|\.|$)", re.MULTILINE
    )
    found_imports = {}
    for dir_name in directories:
        directory = base_dir / dir_name / "src"
        subpackages = set()

        for file_path in Path(directory).rglob("*.py"):
            try:
                with open(file_path, "r", encoding="utf-8") as f:
                    content = f.read()
                    matches = bionemo_import_pattern.findall(content)
                    subpackages.update(matches)
            except Exception as e:
                print(f"Error reading file {file_path}: {e}")
        full_subpackage_names = {f"bionemo-{subpackage}" for subpackage in subpackages}
        if dir_name in full_subpackage_names:
            full_subpackage_names.remove(dir_name)
        found_imports[dir_name] = full_subpackage_names
    return found_imports

parse_dependencies(pyproject_path)

Parse dependencies from a pyproject.toml file.

Source code in bionemo/fw/dependency_graph.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
def parse_dependencies(pyproject_path):
    """Parse dependencies from a pyproject.toml file."""
    with open(pyproject_path, "r") as f:
        pyproject_data = toml.load(f)
    dependencies = {}
    package_name = None

    # Extract package name
    try:
        package_name = pyproject_data["project"]["name"]
    except KeyError:
        print(f"Warning: Could not find package name in {pyproject_path}")

    # Extract dependencies
    try:
        deps = pyproject_data["project"]["dependencies"]
        if isinstance(deps, dict):  # If dependencies are a dictionary
            for dep, version in deps.items():
                if dep.startswith("bionemo-"):
                    dependencies[dep] = version  # Keep dependency with its version

        elif isinstance(deps, list):  # If dependencies are a list
            for dep in deps:
                if dep.startswith("bionemo-"):
                    dependencies[dep] = "unpinned"
    except KeyError:
        print(f"Warning: Could not find dependencies in {pyproject_path}")

    if "tool" in pyproject_data and "maturin" in pyproject_data["tool"]:
        dep = pyproject_data["tool"]["maturin"]["module-name"]
        if dep.startswith("bionemo."):
            dependencies[dep.replace(".", "-")] = "unpinned"

    return package_name, dependencies

parse_tach_toml(toml_path)

Parse dependencies from a tach.toml file.

Source code in bionemo/fw/dependency_graph.py
149
150
151
152
153
154
155
156
157
158
def parse_tach_toml(toml_path):
    """Parse dependencies from a tach.toml file."""
    tach_toml_dependencies = {}
    with open(toml_path, "r") as f:
        toml_data = toml.load(f)
        for module in toml_data["modules"]:
            tach_toml_dependencies[(module["path"].replace(".", "-"))] = [
                item.replace(".", "-") for item in module["depends_on"]
            ]
    return tach_toml_dependencies

resolve_dependencies(subpackage, toml_imports, resolved=None, seen=None)

Recursively resolve all dependencies, including transitive ones.

Source code in bionemo/fw/dependency_graph.py
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
def resolve_dependencies(subpackage, toml_imports, resolved=None, seen=None):
    """Recursively resolve all dependencies, including transitive ones."""
    if resolved is None:
        resolved = set()
    if seen is None:
        seen = set()

    if subpackage in seen:
        return resolved  # Avoid circular dependencies
    seen.add(subpackage)

    for dep in toml_imports.get(subpackage, []):
        resolved.add(dep)
        if dep in toml_imports:  # Resolve further if it's a subpackage
            resolve_dependencies(dep, toml_imports, resolved, seen)

    return resolved

visualize_dependency_graph(dependency_graph, filename)

Visualize the dependency graph using NetworkX.

Source code in bionemo/fw/dependency_graph.py
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
def visualize_dependency_graph(dependency_graph, filename):
    """Visualize the dependency graph using NetworkX."""
    G = nx.DiGraph()
    edge_labels = {}

    # Track all packages explicitly
    all_packages = set(dependency_graph.keys())

    for package, dependencies in dependency_graph.items():
        if isinstance(dependencies, dict):
            for dep, version in dependencies.items():
                G.add_edge(dep, package)  # Add edge from package to dependency
                edge_labels[(dep, package)] = version  # Label the edge with the version
                all_packages.add(dep)
        else:
            for dep in dependencies:
                G.add_edge(dep, package)  # Add edge from package to dependency
                all_packages.add(dep)

    # Ensure isolated nodes (without edges) are included in the graph
    for package in all_packages:
        if package not in G:
            G.add_node(package)
    # Use a circular layout, ensuring packages are evenly distributed
    pos = nx.circular_layout(G)

    plt.figure(figsize=(14, 10))
    nx.draw(
        G,
        pos,
        with_labels=True,
        node_size=3000,
        node_color="lightblue",
        font_size=10,
        font_weight="bold",
        arrowsize=20,
        edge_color="gray",
    )

    # Draw edge labels for the dependency versions
    nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels, font_size=8, font_color="red")
    plt.title("Dependency Graph", fontsize=16)
    plt.savefig(filename)