Source code for DOLPHIN.cell_reads_aggregation.find_cell_neighbor

import anndata
from sklearn.neighbors import kneighbors_graph
import numpy as np
import pandas as pd
import os

[docs]def run_find_neighbor( embedding_data: str, out_name: str, N_neighbor: int = 10, out_directory: str = "./" ): """ Identify nearest neighbors for each cell based on the embedding space. Parameters ---------- embedding_data : str Path to the `.h5ad` file generated by the DOLPHIN model. The file must contain the embedding matrix `X_z` in `obsm`. out_name : str Output filename prefix. N_neighbor : int, optional Number of neighbors to find for each cell (including itself). Default is 10. out_directory : str, optional Directory where the neighbor list CSV file will be saved. Default is current directory. Returns ------- None Saves a CSV file named `N_<out_name>_<N_neighbor>.csv` containing two columns: - `main_name`: the target cell - `neighbor`: a neighboring cell ID from the embedding space """ adata = anndata.read_h5ad(embedding_data) #save the neighborhood information based on cell embedding cell_conn_new = kneighbors_graph(adata.obsm['X_z'], N_neighbor, mode='connectivity',include_self=True, n_jobs=20).toarray() # cell_dist_new = kneighbors_graph(adata.obsm['X_z'], N_neighbor, mode='distance', include_self=True,n_jobs=20).toarray() #save the neighborhood information for 0.701 main_name = [] combine_name = [] for _cell_idx in range(0, adata.obs.shape[0]): for i, _idx in enumerate(np.nonzero(cell_conn_new[_cell_idx])[0]): main_name.append(adata.obs.index[_cell_idx]) combine_name.append(adata.obs.index[_idx]) out_path = os.path.join(out_directory, f"N_{out_name}_{N_neighbor}.csv") pd.DataFrame({"main_name": main_name, "neighbor":combine_name}).to_csv(out_path, index=None) print(f"Saved neighbor list to {out_path}")