Source code for MHCXGraph.app

import logging
import os
import webbrowser
from itertools import combinations
from pathlib import Path

from MHCXGraph.cli.cli_parser import parse_args
from MHCXGraph.core.residue_tracking import ResidueTracker
from MHCXGraph.core.tracking import init_tracker
from MHCXGraph.scripts.create_heatmaps import create_heatmap
from MHCXGraph.scripts.renumber_MHCI_imgt import load_mhci_templates, process_structure_file_mhci
from MHCXGraph.scripts.renumber_MHCII_imgt import load_mhcii_templates, process_structure_file_mhcii
from MHCXGraph.utils.logging_utils import setup_logging
from MHCXGraph.utils.preprocessing import create_graphs
from MHCXGraph.workflow.association import run_association_task
from MHCXGraph.workflow.manifest import build_association_config, load_manifest

[docs] def create_master_dashboard(export_data, output_dir, log): """Helper to inject the master aggregated JSON into the dashboard template.""" import base64 import json import re from pathlib import Path assets_dir = Path(__file__).resolve().parent / "assets" html_files = assets_dir / "dashboard" js_files = html_files / "js" def _load_asset(folder, filename, default=""): p = folder / filename return p.read_text(encoding="utf-8") if p.exists() else default def split_html(raw_html): css_match = re.search(r'<style>(.*?)</style>', raw_html, re.DOTALL) css = css_match.group(1) if css_match else "" clean_html = re.sub(r'<style>.*?</style>', '', raw_html, flags=re.DOTALL).strip() return css, clean_html def inject_js(html, name, code): placeholder = f"__{name.upper()}_JS_INJECTION__" return html.replace(placeholder, code) vis_local = js_files / "vis-network.min.js" if vis_local.exists(): vis_injection = f'<script>\n{vis_local.read_text(encoding="utf-8")}\n</script>' else: vis_injection = '<script type="text/javascript" src="https://unpkg.com/vis-network/standalone/umd/vis-network.min.js"></script>' mol3d_local = js_files / "3Dmol-min.js" if mol3d_local.exists(): mol3d_injection = f'<script>\n{mol3d_local.read_text(encoding="utf-8")}\n</script>' else: mol3d_injection = '<script src="https://3Dmol.csb.pitt.edu/build/3Dmol-min.js"></script>' fabric_local = js_files / "fabric.min.js" if fabric_local.exists(): fabric_injection = f'<script>\n{fabric_local.read_text(encoding="utf-8")}\n</script>' else: fabric_injection = '<script src="https://cdnjs.cloudflare.com/ajax/libs/fabric.js/5.3.1/fabric.min.js"></script>' plotly_local = js_files / "plotly.min.js" if plotly_local.exists(): plotly_injection = f'<script>\n{plotly_local.read_text(encoding="utf-8")}\n</script>' else: plotly_injection = '<script src="https://cdnjs.cloudflare.com/ajax/libs/plotly.js/2.32.0/plotly.min.js"></script>' mhcx_logo_path = assets_dir / "images/MHCXGraph logo.png" mhcx_logo_injection = "<b>MHCXGraph</b>\n" favicon_injection = "" if mhcx_logo_path.exists(): with open(mhcx_logo_path, "rb") as image_file: encoded = base64.b64encode(image_file.read()).decode("utf-8") mhcx_logo_injection = f'<img src="data:image/png;base64,{encoded}" alt="MHCXGraph Logo" style="width: 100%; height: 100%; margin-bottom: -10px;">' favicon_injection = f'<link rel="icon" type="image/png" href="data:image/png;base64,{encoded}">' logo_dark_path = assets_dir / "images/LNBio white.png" logo_light_path = assets_dir / "images/LNBio.png" logo_injection = "" if logo_light_path.exists(): with open(logo_light_path, "rb") as image_file: encoded = base64.b64encode(image_file.read()).decode("utf-8") logo_injection += f'<img src="data:image/png;base64,{encoded}" alt="LNBio Logo" class="logo-light" style="height: 8rem; width: auto;">' if logo_dark_path.exists(): with open(logo_dark_path, "rb") as image_file: encoded = base64.b64encode(image_file.read()).decode("utf-8") logo_injection += f'<img src="data:image/png;base64,{encoded}" alt="LNBio Logo" class="logo-dark" style="height: 8rem; width: auto;">' if not logo_injection: log.debug("LNBio logos not found in assets/images. Skipping logo injection.") html_template = _load_asset(html_files, "base.html") if not html_template: log.error(f"Template base.html not found at {html_files}.") return sidebar_html = _load_asset(html_files, "sidebar.html") modal_html = _load_asset(html_files, "export_modal.html") main_js = _load_asset(js_files, "main.js") modal_js = _load_asset(js_files, "export_modal.js") grid_js = _load_asset(js_files, "grid.js") data_js = _load_asset(js_files, "data.js") theme_js = _load_asset(js_files, "theme.js") viewer_js = _load_asset(js_files, "viewer.js") structures_js = _load_asset(js_files, "structures.js") init_js = _load_asset(js_files, "init_functions.js") analysis_js = _load_asset(js_files, "analysis.js") graph_js = _load_asset(js_files, "graph.js") sidebar_css, sidebar_dom = split_html(sidebar_html) modal_css, modal_dom = split_html(modal_html) # Inject everything final_html = html_template.replace("__FAVICON_INJECTION__", favicon_injection) final_html = inject_js(final_html, "vis", vis_injection) final_html = inject_js(final_html, "3Dmol", mol3d_injection) final_html = inject_js(final_html, "fabric", fabric_injection) # ADD THIS final_html = inject_js(final_html, "plotly", plotly_injection) final_html = final_html.replace("__SIDEBAR_CSS_INJECTION__", sidebar_css) final_html = final_html.replace("__MODAL_CSS_INJECTION__", modal_css) final_html = final_html.replace("__SIDEBAR_HTML_INJECTION__", sidebar_dom) final_html = final_html.replace("__MHCXGRAPH_LOGO_INJECTION__", mhcx_logo_injection) final_html = final_html.replace("__LNBIO_LOGO_INJECTION__", logo_injection) final_html = final_html.replace("__MODAL_HTML_INJECTION__", modal_dom) # Javascript injection final_html = inject_js(final_html, "main", main_js) final_html = inject_js(final_html, "data", data_js) final_html = inject_js(final_html, "graph_data", json.dumps(export_data)) final_html = inject_js(final_html, "init", init_js) final_html = inject_js(final_html, "viewer", viewer_js) final_html = inject_js(final_html, "theme", theme_js) final_html = inject_js(final_html, "grid", grid_js) final_html = inject_js(final_html, "analysis", analysis_js) final_html = inject_js(final_html, "graph", graph_js) final_html = inject_js(final_html, "structures", structures_js) final_html = inject_js(final_html, "modal", modal_js) actual_mode = export_data.get("actual_mode", export_data.get("mode")) if actual_mode == "screening": final_html = final_html.replace( "Pairwise View Mode", "Screening Mode (1 vs All)" ) final_html = final_html.replace( "Global Pair Analysis", "Global Screening Analysis" ) patch_script = """ <script> window.addEventListener('DOMContentLoaded', () => { if (typeof masterData !== 'undefined' && masterData.actual_mode === 'screening') { const observer = new MutationObserver(() => { const metaPanel = document.getElementById('metadata-panel'); if (metaPanel && metaPanel.innerHTML.includes('pairwise')) { metaPanel.innerHTML = metaPanel.innerHTML.replace(/pairwise/g, 'screening'); } }); observer.observe(document.body, { childList: true, subtree: true }); } }); </script> """ final_html = final_html.replace("</body>", f"{patch_script}\n</body>") file_name = "Dashboard_Screening.html" else: mode = export_data.get("mode") file_name = "Dashboard_Pairwise.html" if mode == "pairwise" else "Dashboard_Multiple.html" full_path = output_dir / file_name with open(str(full_path), "w+", encoding="utf-8") as out: out.write(final_html) log.info(f"Interactive Dashboard saved to {full_path}")
[docs] def setup_trackers(output_dir, settings): """ Initialize runtime tracking utilities. This function configures the global tracking system used to store intermediate artifacts produced during execution and optionally creates a :class:`ResidueTracker` to monitor selected residues. Parameters ---------- output_dir : pathlib.Path Directory where tracking artifacts and debug files will be stored. settings : dict[str, Any] Runtime configuration dictionary loaded from the manifest. Relevant keys include ``watch_residues`` and ``debug_tracking``. Returns ------- tracker_residues : ResidueTracker or None Residue tracker instance if residue monitoring is enabled, otherwise ``None``. """ tracker_residues = ( ResidueTracker(settings.get("watch_residues")) ) if settings.get("watch_residues") else None init_tracker( root="CrossSteps", outdir=output_dir, enabled=settings.get("debug_tracking"), prefer_npy_for_ndarray=True, add_timestamp_prefix=False, ) return tracker_residues
[docs] def run_multiple_mode(graphs, base_output, run_name, config, log): """ Execute the association workflow in multiple-graphs mode. In this mode all graphs are processed together in a single association task. Parameters ---------- graphs : list Collection of graph objects produced by the preprocessing stage. base_output : pathlib.Path Base directory where output results are written. run_name : str Identifier for the current execution run. config : dict[str, Any] Association configuration dictionary controlling the graph association algorithm. log : logging.Logger Logger instance used to record runtime messages. Returns ------- None """ target_dir = base_output / "MULTIPLE" G = run_association_task( graphs=graphs, output_path=target_dir, run_name=run_name, association_config=config, log=log, ) if G and G.associated_graphs is not None: global_proteins = [clean_graph_name(g) for g in graphs] master_export = G.get_dashboard_data(global_proteins) master_export["mode"] = "multiple" master_export["run_name"] = run_name master_export["metadata"] = config create_master_dashboard(master_export, target_dir, log)
[docs] def clean_graph_name(graph): """Extract cleaned stem name from graph tuple.""" name = Path(graph[1]).stem return name.replace("_nOH", "")
[docs] def run_pairwise_mode(graphs, base_output, run_name, config, log): """ Execute the association workflow in pairwise mode. Each unique pair of graphs is processed independently and written to a dedicated output directory. Parameters ---------- graphs : list Collection of graph objects produced by preprocessing. base_output : pathlib.Path Root directory where pairwise comparison results will be saved. run_name : str Base identifier for the run. config : dict[str, Any] Association configuration dictionary controlling the graph association algorithm. log : logging.Logger Logger instance used to record runtime messages. Returns ------- None """ pair_base_dir = base_output / "PAIRWISE" global_proteins = [clean_graph_name(g) for g in graphs] master_export = { "mode": "pairwise", "run_name": run_name, "metadata": config, "proteins": global_proteins, "protein_paths": [str(Path(g[1]).resolve()) for g in graphs], "pairs": {} } for g1, g2 in combinations(graphs, 2): name1 = clean_graph_name(g1) name2 = clean_graph_name(g2) pair_folder = f"{name1}_vs_{name2}" pair_key = f"{name1}_vs_{name2}" pair_run_name = f"{run_name}_{name1}_{name2}" G = run_association_task( graphs=[g1, g2], output_path=pair_base_dir / pair_folder, run_name=pair_run_name, association_config=config, log=log, ) if G and G.associated_graphs is not None: master_export["pairs"][pair_key] = G.get_dashboard_data(global_proteins) create_master_dashboard(master_export, pair_base_dir, log)
[docs] def run_screening_mode(ref_graph, target_graphs, base_output, run_name, config, log): """ Execute the association workflow in screening mode (1-vs-All). This mode compares a single reference graph against a collection of target graphs. Each target is processed individually against the reference, and the results are aggregated into a single interactive dashboard. To leverage existing frontend logic, the dashboard payload mimics the "pairwise" mode structure but includes an `actual_mode` flag to trigger specific UI text replacements during HTML generation. Parameters ---------- ref_graph : tuple A tuple containing the reference graph data produced by the preprocessing stage. Typically structured as `(networkx.Graph, file_path, base_name)`. target_graphs : list of tuple A list of graph tuples to be compared against the reference graph. base_output : pathlib.Path The root directory where the screening results and the final HTML dashboard will be saved. run_name : str A unique base identifier for the current execution run. config : dict[str, Any] The association configuration dictionary controlling the graph association algorithm's parameters and thresholds. log : logging.Logger Logger instance used to record runtime progress, warnings, and errors. Returns ------- None """ if not target_graphs: log.error("Screening mode requires at least 1 target graph alongside the reference.") return screening_base_dir = base_output / "SCREENING" ref_name = clean_graph_name(ref_graph) # Reconstruct the global graph list to pass to get_dashboard_data all_graphs = [ref_graph] + target_graphs global_proteins = [clean_graph_name(g) for g in all_graphs] master_export = { "mode": "pairwise", "actual_mode": "screening", "reference_structure": ref_name, "run_name": run_name, "metadata": config, "proteins": global_proteins, "protein_paths": [str(Path(g[1]).resolve()) for g in all_graphs], "pairs": {} } for target_graph in target_graphs: target_name = clean_graph_name(target_graph) pair_folder = f"{ref_name}_vs_{target_name}" pair_key = f"{ref_name}_vs_{target_name}" pair_run_name = f"{run_name}_{ref_name}_{target_name}" G = run_association_task( graphs=[ref_graph, target_graph], output_path=screening_base_dir / pair_folder, run_name=pair_run_name, association_config=config, log=log, ) if G and G.associated_graphs is not None: master_export["pairs"][pair_key] = G.get_dashboard_data(global_proteins) create_master_dashboard(master_export, screening_base_dir, log)
[docs] def run(args): manifest = load_manifest(args.manifest) settings = manifest["settings"] run_name = settings["run_name"] run_mode = settings.get("run_mode") if run_mode not in {"multiple", "pairwise", "screening"}: raise ValueError("run_mode must be 'multiple', 'pairwise' or 'screening'") base_output = Path(settings["output_path"]) output_dir = base_output / run_name log = setup_logging( outdir=output_dir, debug=settings.get("debug_logs"), verbose=settings.get("verbose"), ) tracker_residues = setup_trackers(output_dir=output_dir, settings=settings) association_config = build_association_config(settings, run_mode, tracker_residues) graphs = create_graphs(manifest) if run_mode == "multiple": run_multiple_mode(graphs, base_output, run_name, association_config, log) elif run_mode == "pairwise": run_pairwise_mode(graphs, base_output, run_name, association_config, log) elif run_mode == "screening": ref_name = settings.get("reference_structure") if not ref_name: raise ValueError("Screening mode requires 'reference_structure' to be defined in the manifest settings.") ref_graph = next((g for g in graphs if clean_graph_name(g) == ref_name), None) if not ref_graph: raise ValueError(f"Reference structure '{ref_name}' not found among the input graphs.") target_graphs = [g for g in graphs if clean_graph_name(g) != ref_name] run_screening_mode(ref_graph, target_graphs, base_output, run_name, association_config, log) if tracker_residues: out_path = tracker_residues.dump_json() log.info(f"Residue tracking report saved to: {out_path}") if args.dashboard: log.info("Opening dashboard in the default web browser...") dash_path = None if run_mode == "multiple": dash_path = base_output / "MULTIPLE" / "Dashboard_Multiple.html" elif run_mode == "pairwise": dash_path = base_output / "PAIRWISE" / "Dashboard_Pairs.html" elif run_mode == "screening": dash_path = base_output / "SCREENING" / "Dashboard_Pairwise.html" if dash_path.exists(): webbrowser.open(f"file://{dash_path.resolve()}")
[docs] def renumber(args): if args.mhc_class.upper() == "MHCI": load_templates = load_mhci_templates process_structure_file = process_structure_file_mhci elif args.mhc_class.upper() == "MHCII": load_templates = load_mhcii_templates process_structure_file = process_structure_file_mhcii else: raise ValueError(f"{args.mhc_class} is an invalid class of MHC. Please, choose between MHCI and MHCII.") log = logging.getLogger("MHCXGraph") os.makedirs(args.output_dir, exist_ok=True) assets_dir = Path(__file__).resolve().parent / "assets" display_csv_path = assets_dir / "imgt_display_all.csv" numbering_csv_path = assets_dir / "imgt_numbering_mapping_all.csv" templates = load_templates(display_csv_path, numbering_csv_path) valid_ext = {".pdb", ".cif", ".mmcif"} files = sorted( f for f in os.listdir(args.input_dir) if os.path.isfile(os.path.join(args.input_dir, f)) and os.path.splitext(f)[1].lower() in valid_ext ) if not files: raise RuntimeError("No .pdb, .cif, or .mmcif files found in input directory.") n_ok = 0 n_fail = 0 for fname in files: input_path = os.path.join(args.input_dir, fname) stem, ext = os.path.splitext(fname) out_name = f"{stem}{args.suffix}{ext}" if args.suffix else fname output_path = os.path.join(args.output_dir, out_name) log.info(f"Processing: {fname}") try: process_structure_file( input_path=input_path, output_path=output_path, templates=templates, debug=args.debug, warn_score=args.warn_score ) log.info(f" OK -> {output_path}") n_ok += 1 except Exception as e: log.info(f" FAILED -> {fname}: {e}") n_fail += 1 log.info("\nFinished.") log.info(f" Success: {n_ok}") log.info(f" Failed : {n_fail}")
[docs] def main(): """ Run the MHCXGraph command-line pipeline. This function orchestrates the full workflow: 1. Parse command-line arguments. 2. Load the execution manifest. 3. Configure logging and runtime tracking. 4. Generate graph representations from input structures. 5. Execute the association workflow. The workflow can operate in two modes defined in the manifest: ``multiple`` Process all graphs together in a single association task. ``pairwise`` Perform pairwise comparisons between all graph combinations. Returns ------- None Raises ------ ValueError If the configured ``run_mode`` is not ``"multiple"`` or ``"pairwise"``. """ args = parse_args() if args.command == "run": run(args) elif args.command == "renumber": renumber(args) elif args.command == "heatmap": create_heatmap(args)
if __name__ == "__main__": main()