import logging
import os
import webbrowser
from itertools import combinations
from pathlib import Path
from MHCXGraph.cli.cli_parser import parse_args
from MHCXGraph.core.residue_tracking import ResidueTracker
from MHCXGraph.core.tracking import init_tracker
from MHCXGraph.scripts.create_heatmaps import create_heatmap
from MHCXGraph.scripts.renumber_MHCI_imgt import load_mhci_templates, process_structure_file_mhci
from MHCXGraph.scripts.renumber_MHCII_imgt import load_mhcii_templates, process_structure_file_mhcii
from MHCXGraph.utils.logging_utils import setup_logging
from MHCXGraph.utils.preprocessing import create_graphs
from MHCXGraph.workflow.association import run_association_task
from MHCXGraph.workflow.manifest import build_association_config, load_manifest
[docs]
def create_master_dashboard(export_data, output_dir, log):
"""Helper to inject the master aggregated JSON into the dashboard template."""
import base64
import json
import re
from pathlib import Path
assets_dir = Path(__file__).resolve().parent / "assets"
html_files = assets_dir / "dashboard"
js_files = html_files / "js"
def _load_asset(folder, filename, default=""):
p = folder / filename
return p.read_text(encoding="utf-8") if p.exists() else default
def split_html(raw_html):
css_match = re.search(r'<style>(.*?)</style>', raw_html, re.DOTALL)
css = css_match.group(1) if css_match else ""
clean_html = re.sub(r'<style>.*?</style>', '', raw_html, flags=re.DOTALL).strip()
return css, clean_html
def inject_js(html, name, code):
placeholder = f"__{name.upper()}_JS_INJECTION__"
return html.replace(placeholder, code)
vis_local = js_files / "vis-network.min.js"
if vis_local.exists():
vis_injection = f'<script>\n{vis_local.read_text(encoding="utf-8")}\n</script>'
else:
vis_injection = '<script type="text/javascript" src="https://unpkg.com/vis-network/standalone/umd/vis-network.min.js"></script>'
mol3d_local = js_files / "3Dmol-min.js"
if mol3d_local.exists():
mol3d_injection = f'<script>\n{mol3d_local.read_text(encoding="utf-8")}\n</script>'
else:
mol3d_injection = '<script src="https://3Dmol.csb.pitt.edu/build/3Dmol-min.js"></script>'
fabric_local = js_files / "fabric.min.js"
if fabric_local.exists():
fabric_injection = f'<script>\n{fabric_local.read_text(encoding="utf-8")}\n</script>'
else:
fabric_injection = '<script src="https://cdnjs.cloudflare.com/ajax/libs/fabric.js/5.3.1/fabric.min.js"></script>'
plotly_local = js_files / "plotly.min.js"
if plotly_local.exists():
plotly_injection = f'<script>\n{plotly_local.read_text(encoding="utf-8")}\n</script>'
else:
plotly_injection = '<script src="https://cdnjs.cloudflare.com/ajax/libs/plotly.js/2.32.0/plotly.min.js"></script>'
mhcx_logo_path = assets_dir / "images/MHCXGraph logo.png"
mhcx_logo_injection = "<b>MHCXGraph</b>\n"
favicon_injection = ""
if mhcx_logo_path.exists():
with open(mhcx_logo_path, "rb") as image_file:
encoded = base64.b64encode(image_file.read()).decode("utf-8")
mhcx_logo_injection = f'<img src="data:image/png;base64,{encoded}" alt="MHCXGraph Logo" style="width: 100%; height: 100%; margin-bottom: -10px;">'
favicon_injection = f'<link rel="icon" type="image/png" href="data:image/png;base64,{encoded}">'
logo_dark_path = assets_dir / "images/LNBio white.png"
logo_light_path = assets_dir / "images/LNBio.png"
logo_injection = ""
if logo_light_path.exists():
with open(logo_light_path, "rb") as image_file:
encoded = base64.b64encode(image_file.read()).decode("utf-8")
logo_injection += f'<img src="data:image/png;base64,{encoded}" alt="LNBio Logo" class="logo-light" style="height: 8rem; width: auto;">'
if logo_dark_path.exists():
with open(logo_dark_path, "rb") as image_file:
encoded = base64.b64encode(image_file.read()).decode("utf-8")
logo_injection += f'<img src="data:image/png;base64,{encoded}" alt="LNBio Logo" class="logo-dark" style="height: 8rem; width: auto;">'
if not logo_injection:
log.debug("LNBio logos not found in assets/images. Skipping logo injection.")
html_template = _load_asset(html_files, "base.html")
if not html_template:
log.error(f"Template base.html not found at {html_files}.")
return
sidebar_html = _load_asset(html_files, "sidebar.html")
modal_html = _load_asset(html_files, "export_modal.html")
main_js = _load_asset(js_files, "main.js")
modal_js = _load_asset(js_files, "export_modal.js")
grid_js = _load_asset(js_files, "grid.js")
data_js = _load_asset(js_files, "data.js")
theme_js = _load_asset(js_files, "theme.js")
viewer_js = _load_asset(js_files, "viewer.js")
structures_js = _load_asset(js_files, "structures.js")
init_js = _load_asset(js_files, "init_functions.js")
analysis_js = _load_asset(js_files, "analysis.js")
graph_js = _load_asset(js_files, "graph.js")
sidebar_css, sidebar_dom = split_html(sidebar_html)
modal_css, modal_dom = split_html(modal_html)
# Inject everything
final_html = html_template.replace("__FAVICON_INJECTION__", favicon_injection)
final_html = inject_js(final_html, "vis", vis_injection)
final_html = inject_js(final_html, "3Dmol", mol3d_injection)
final_html = inject_js(final_html, "fabric", fabric_injection) # ADD THIS
final_html = inject_js(final_html, "plotly", plotly_injection)
final_html = final_html.replace("__SIDEBAR_CSS_INJECTION__", sidebar_css)
final_html = final_html.replace("__MODAL_CSS_INJECTION__", modal_css)
final_html = final_html.replace("__SIDEBAR_HTML_INJECTION__", sidebar_dom)
final_html = final_html.replace("__MHCXGRAPH_LOGO_INJECTION__", mhcx_logo_injection)
final_html = final_html.replace("__LNBIO_LOGO_INJECTION__", logo_injection)
final_html = final_html.replace("__MODAL_HTML_INJECTION__", modal_dom)
# Javascript injection
final_html = inject_js(final_html, "main", main_js)
final_html = inject_js(final_html, "data", data_js)
final_html = inject_js(final_html, "graph_data", json.dumps(export_data))
final_html = inject_js(final_html, "init", init_js)
final_html = inject_js(final_html, "viewer", viewer_js)
final_html = inject_js(final_html, "theme", theme_js)
final_html = inject_js(final_html, "grid", grid_js)
final_html = inject_js(final_html, "analysis", analysis_js)
final_html = inject_js(final_html, "graph", graph_js)
final_html = inject_js(final_html, "structures", structures_js)
final_html = inject_js(final_html, "modal", modal_js)
actual_mode = export_data.get("actual_mode", export_data.get("mode"))
if actual_mode == "screening":
final_html = final_html.replace(
"Pairwise View Mode",
"Screening Mode (1 vs All)"
)
final_html = final_html.replace(
"Global Pair Analysis",
"Global Screening Analysis"
)
patch_script = """
<script>
window.addEventListener('DOMContentLoaded', () => {
if (typeof masterData !== 'undefined' && masterData.actual_mode === 'screening') {
const observer = new MutationObserver(() => {
const metaPanel = document.getElementById('metadata-panel');
if (metaPanel && metaPanel.innerHTML.includes('pairwise')) {
metaPanel.innerHTML = metaPanel.innerHTML.replace(/pairwise/g, 'screening');
}
});
observer.observe(document.body, { childList: true, subtree: true });
}
});
</script>
"""
final_html = final_html.replace("</body>", f"{patch_script}\n</body>")
file_name = "Dashboard_Screening.html"
else:
mode = export_data.get("mode")
file_name = "Dashboard_Pairwise.html" if mode == "pairwise" else "Dashboard_Multiple.html"
full_path = output_dir / file_name
with open(str(full_path), "w+", encoding="utf-8") as out:
out.write(final_html)
log.info(f"Interactive Dashboard saved to {full_path}")
[docs]
def setup_trackers(output_dir, settings):
"""
Initialize runtime tracking utilities.
This function configures the global tracking system used to store
intermediate artifacts produced during execution and optionally
creates a :class:`ResidueTracker` to monitor selected residues.
Parameters
----------
output_dir : pathlib.Path
Directory where tracking artifacts and debug files will be stored.
settings : dict[str, Any]
Runtime configuration dictionary loaded from the manifest.
Relevant keys include ``watch_residues`` and ``debug_tracking``.
Returns
-------
tracker_residues : ResidueTracker or None
Residue tracker instance if residue monitoring is enabled,
otherwise ``None``.
"""
tracker_residues = (
ResidueTracker(settings.get("watch_residues"))
) if settings.get("watch_residues") else None
init_tracker(
root="CrossSteps",
outdir=output_dir,
enabled=settings.get("debug_tracking"),
prefer_npy_for_ndarray=True,
add_timestamp_prefix=False,
)
return tracker_residues
[docs]
def run_multiple_mode(graphs, base_output, run_name, config, log):
"""
Execute the association workflow in multiple-graphs mode.
In this mode all graphs are processed together in a single
association task.
Parameters
----------
graphs : list
Collection of graph objects produced by the preprocessing stage.
base_output : pathlib.Path
Base directory where output results are written.
run_name : str
Identifier for the current execution run.
config : dict[str, Any]
Association configuration dictionary controlling the
graph association algorithm.
log : logging.Logger
Logger instance used to record runtime messages.
Returns
-------
None
"""
target_dir = base_output / "MULTIPLE"
G = run_association_task(
graphs=graphs,
output_path=target_dir,
run_name=run_name,
association_config=config,
log=log,
)
if G and G.associated_graphs is not None:
global_proteins = [clean_graph_name(g) for g in graphs]
master_export = G.get_dashboard_data(global_proteins)
master_export["mode"] = "multiple"
master_export["run_name"] = run_name
master_export["metadata"] = config
create_master_dashboard(master_export, target_dir, log)
[docs]
def clean_graph_name(graph):
"""Extract cleaned stem name from graph tuple."""
name = Path(graph[1]).stem
return name.replace("_nOH", "")
[docs]
def run_pairwise_mode(graphs, base_output, run_name, config, log):
"""
Execute the association workflow in pairwise mode.
Each unique pair of graphs is processed independently and
written to a dedicated output directory.
Parameters
----------
graphs : list
Collection of graph objects produced by preprocessing.
base_output : pathlib.Path
Root directory where pairwise comparison results will be saved.
run_name : str
Base identifier for the run.
config : dict[str, Any]
Association configuration dictionary controlling the
graph association algorithm.
log : logging.Logger
Logger instance used to record runtime messages.
Returns
-------
None
"""
pair_base_dir = base_output / "PAIRWISE"
global_proteins = [clean_graph_name(g) for g in graphs]
master_export = {
"mode": "pairwise",
"run_name": run_name,
"metadata": config,
"proteins": global_proteins,
"protein_paths": [str(Path(g[1]).resolve()) for g in graphs],
"pairs": {}
}
for g1, g2 in combinations(graphs, 2):
name1 = clean_graph_name(g1)
name2 = clean_graph_name(g2)
pair_folder = f"{name1}_vs_{name2}"
pair_key = f"{name1}_vs_{name2}"
pair_run_name = f"{run_name}_{name1}_{name2}"
G = run_association_task(
graphs=[g1, g2],
output_path=pair_base_dir / pair_folder,
run_name=pair_run_name,
association_config=config,
log=log,
)
if G and G.associated_graphs is not None:
master_export["pairs"][pair_key] = G.get_dashboard_data(global_proteins)
create_master_dashboard(master_export, pair_base_dir, log)
[docs]
def run_screening_mode(ref_graph, target_graphs, base_output, run_name, config, log):
"""
Execute the association workflow in screening mode (1-vs-All).
This mode compares a single reference graph against a collection of target
graphs. Each target is processed individually against the reference, and
the results are aggregated into a single interactive dashboard. To leverage
existing frontend logic, the dashboard payload mimics the "pairwise" mode
structure but includes an `actual_mode` flag to trigger specific UI text
replacements during HTML generation.
Parameters
----------
ref_graph : tuple
A tuple containing the reference graph data produced by the preprocessing
stage. Typically structured as `(networkx.Graph, file_path, base_name)`.
target_graphs : list of tuple
A list of graph tuples to be compared against the reference graph.
base_output : pathlib.Path
The root directory where the screening results and the final HTML
dashboard will be saved.
run_name : str
A unique base identifier for the current execution run.
config : dict[str, Any]
The association configuration dictionary controlling the graph
association algorithm's parameters and thresholds.
log : logging.Logger
Logger instance used to record runtime progress, warnings, and errors.
Returns
-------
None
"""
if not target_graphs:
log.error("Screening mode requires at least 1 target graph alongside the reference.")
return
screening_base_dir = base_output / "SCREENING"
ref_name = clean_graph_name(ref_graph)
# Reconstruct the global graph list to pass to get_dashboard_data
all_graphs = [ref_graph] + target_graphs
global_proteins = [clean_graph_name(g) for g in all_graphs]
master_export = {
"mode": "pairwise",
"actual_mode": "screening",
"reference_structure": ref_name,
"run_name": run_name,
"metadata": config,
"proteins": global_proteins,
"protein_paths": [str(Path(g[1]).resolve()) for g in all_graphs],
"pairs": {}
}
for target_graph in target_graphs:
target_name = clean_graph_name(target_graph)
pair_folder = f"{ref_name}_vs_{target_name}"
pair_key = f"{ref_name}_vs_{target_name}"
pair_run_name = f"{run_name}_{ref_name}_{target_name}"
G = run_association_task(
graphs=[ref_graph, target_graph],
output_path=screening_base_dir / pair_folder,
run_name=pair_run_name,
association_config=config,
log=log,
)
if G and G.associated_graphs is not None:
master_export["pairs"][pair_key] = G.get_dashboard_data(global_proteins)
create_master_dashboard(master_export, screening_base_dir, log)
[docs]
def run(args):
manifest = load_manifest(args.manifest)
settings = manifest["settings"]
run_name = settings["run_name"]
run_mode = settings.get("run_mode")
if run_mode not in {"multiple", "pairwise", "screening"}:
raise ValueError("run_mode must be 'multiple', 'pairwise' or 'screening'")
base_output = Path(settings["output_path"])
output_dir = base_output / run_name
log = setup_logging(
outdir=output_dir,
debug=settings.get("debug_logs"),
verbose=settings.get("verbose"),
)
tracker_residues = setup_trackers(output_dir=output_dir, settings=settings)
association_config = build_association_config(settings, run_mode, tracker_residues)
graphs = create_graphs(manifest)
if run_mode == "multiple":
run_multiple_mode(graphs, base_output, run_name, association_config, log)
elif run_mode == "pairwise":
run_pairwise_mode(graphs, base_output, run_name, association_config, log)
elif run_mode == "screening":
ref_name = settings.get("reference_structure")
if not ref_name:
raise ValueError("Screening mode requires 'reference_structure' to be defined in the manifest settings.")
ref_graph = next((g for g in graphs if clean_graph_name(g) == ref_name), None)
if not ref_graph:
raise ValueError(f"Reference structure '{ref_name}' not found among the input graphs.")
target_graphs = [g for g in graphs if clean_graph_name(g) != ref_name]
run_screening_mode(ref_graph, target_graphs, base_output, run_name, association_config, log)
if tracker_residues:
out_path = tracker_residues.dump_json()
log.info(f"Residue tracking report saved to: {out_path}")
if args.dashboard:
log.info("Opening dashboard in the default web browser...")
dash_path = None
if run_mode == "multiple":
dash_path = base_output / "MULTIPLE" / "Dashboard_Multiple.html"
elif run_mode == "pairwise":
dash_path = base_output / "PAIRWISE" / "Dashboard_Pairs.html"
elif run_mode == "screening":
dash_path = base_output / "SCREENING" / "Dashboard_Pairwise.html"
if dash_path.exists():
webbrowser.open(f"file://{dash_path.resolve()}")
[docs]
def renumber(args):
if args.mhc_class.upper() == "MHCI":
load_templates = load_mhci_templates
process_structure_file = process_structure_file_mhci
elif args.mhc_class.upper() == "MHCII":
load_templates = load_mhcii_templates
process_structure_file = process_structure_file_mhcii
else:
raise ValueError(f"{args.mhc_class} is an invalid class of MHC. Please, choose between MHCI and MHCII.")
log = logging.getLogger("MHCXGraph")
os.makedirs(args.output_dir, exist_ok=True)
assets_dir = Path(__file__).resolve().parent / "assets"
display_csv_path = assets_dir / "imgt_display_all.csv"
numbering_csv_path = assets_dir / "imgt_numbering_mapping_all.csv"
templates = load_templates(display_csv_path, numbering_csv_path)
valid_ext = {".pdb", ".cif", ".mmcif"}
files = sorted(
f for f in os.listdir(args.input_dir)
if os.path.isfile(os.path.join(args.input_dir, f))
and os.path.splitext(f)[1].lower() in valid_ext
)
if not files:
raise RuntimeError("No .pdb, .cif, or .mmcif files found in input directory.")
n_ok = 0
n_fail = 0
for fname in files:
input_path = os.path.join(args.input_dir, fname)
stem, ext = os.path.splitext(fname)
out_name = f"{stem}{args.suffix}{ext}" if args.suffix else fname
output_path = os.path.join(args.output_dir, out_name)
log.info(f"Processing: {fname}")
try:
process_structure_file(
input_path=input_path,
output_path=output_path,
templates=templates,
debug=args.debug,
warn_score=args.warn_score
)
log.info(f" OK -> {output_path}")
n_ok += 1
except Exception as e:
log.info(f" FAILED -> {fname}: {e}")
n_fail += 1
log.info("\nFinished.")
log.info(f" Success: {n_ok}")
log.info(f" Failed : {n_fail}")
[docs]
def main():
"""
Run the MHCXGraph command-line pipeline.
This function orchestrates the full workflow:
1. Parse command-line arguments.
2. Load the execution manifest.
3. Configure logging and runtime tracking.
4. Generate graph representations from input structures.
5. Execute the association workflow.
The workflow can operate in two modes defined in the manifest:
``multiple``
Process all graphs together in a single association task.
``pairwise``
Perform pairwise comparisons between all graph combinations.
Returns
-------
None
Raises
------
ValueError
If the configured ``run_mode`` is not ``"multiple"`` or ``"pairwise"``.
"""
args = parse_args()
if args.command == "run":
run(args)
elif args.command == "renumber":
renumber(args)
elif args.command == "heatmap":
create_heatmap(args)
if __name__ == "__main__":
main()