Source code for DOLPHIN.model.run_model

import os
from .train import run_train
import numpy as np
import torch
import pickle
import random

"""
The main function, hyperparameter search
"""

[docs]def run_DOLPHIN(data_type, graph_in, fea_in, current_out_path='./', params=None, device='auto', seed_num=0):
    """
    Run the DOLPHIN model on single-cell RNA-seq data to obtain latent cell embeddings.

    Parameters
    ----------
    data_type : str
        Specifies the type of input single-cell RNA-seq data.
        - "full-length": For full-length RNA-seq data.
        - "10x": For 10x Genomics RNA-seq data.

    graph_in : object
        The input graph structure (precomputed from exon-level data).

    fea_in : anndata.AnnData
        The input feature matrix, provided as an AnnData object.

    current_out_path : str, optional
        Output directory where the resulting cell embeddings will be saved.
        The embeddings will be written to `DOLPHIN_Z.h5ad`. Default is `'./'`.

    params : dict, optional
        A dictionary of model hyperparameters. If not provided, default parameters will be used
        depending on `data_type`. Customizable parameters include:

        - "gat_channel"       : Number of GAT output channels per head.
        - "nhead"             : Number of GAT attention heads.
        - "gat_dropout"       : Dropout rate in the GAT layer.
        - "list_gra_enc_hid"  : Encoder MLP hidden layer sizes.
        - "gra_p_dropout"     : Dropout rate in the encoder.
        - "z_dim"             : Dimensionality of the latent space.
        - "list_fea_dec_hid"  : Feature decoder MLP hidden layer sizes.
        - "list_adj_dec_hid"  : Adjacency decoder MLP hidden layer sizes.
        - "lr"                : Learning rate.
        - "batch"             : Mini-batch size.
        - "epochs"            : Number of training epochs.
        - "kl_beta"           : KL divergence loss weight.
        - "fea_lambda"        : Feature reconstruction loss weight.
        - "adj_lambda"        : Adjacency reconstruction loss weight.

    device : str, optional
        Computational device to run the model on. Options are:
        
        - `'auto'` (default): Automatically selects `'cuda'` if a GPU is available, otherwise falls back to `'cpu'`.
        - `'cuda'` or `'cuda:0'`: Use the first available GPU.
        - `'cpu'`: Run on CPU only.

        GPU acceleration is recommended for large datasets or faster training.

    seed_num : int, optional
        Random seed for reproducibility. Default is `0`.

    Returns
    -------
    None
        Saves the latent cell embedding matrix to `DOLPHIN_Z.h5ad` under `current_out_path`.
    """
    
    random.seed(seed_num)
    os.environ['PYTHONHASHSEED'] = str(seed_num)
    np.random.seed(seed_num)
    torch.manual_seed(seed_num)
    torch.cuda.manual_seed(seed_num)
    torch.cuda.manual_seed_all(seed_num)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    torch.backends.cudnn.enabled = True
        
    default_params_full_length = {"gat_channel":[2],
            "nhead": 9,
            "gat_dropout": 0.3,
            "concat": False,
            "list_gra_enc_hid": [256],
            "gra_p_dropout":0.2,
            "z_dim": 50,
            "list_fea_dec_hid":[256],
            "list_adj_dec_hid":[128],
            "lr": 1.0e-3,
            "batch": 20,
            "epochs": 200,
            "kl_beta": 0.7,
            "fea_lambda": 0.5, 
            "adj_lambda": 0.5,
    }
    
    default_params_10x = {"gat_channel":[9],
            "nhead": 1,
            "gat_dropout": 0.1,
            "concat": False,
            "list_gra_enc_hid": [512],
            "gra_p_dropout":0.3,
            "z_dim": 35,
            "list_fea_dec_hid":[512],
            "list_adj_dec_hid":[256],
            "lr": 1.0e-3,
            "batch": 20,
            "epochs": 200,
            "kl_beta": 0.7,
            "fea_lambda": 0.5, 
            "adj_lambda": 0.5,
    }
    
    # Select the default parameters based on data_type
    if data_type == "full-length":
        default_params = default_params_full_length
    else:
        default_params = default_params_10x

    # If params is None, use default parameters
    if params is None:
        params = default_params
    else:
        # Update the default parameters with user-provided parameters
        default_params.update(params)
        params = default_params

    # print(params)
    if device == 'auto':
        device = 'cuda' if torch.cuda.is_available() else 'cpu'

    print(f"[DOLPHIN] Training on device: {device.upper()} ({'GPU' if 'cuda' in device else 'CPU'})")

    run_train(graph_in, fea_in, current_out_path, params, device=device)