{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Alternative Splicing Analysis" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Step 1: Convert Outrigger PSI output to .h5ad format\n", "\n", "This step converts the Outrigger PSI matrix into an `.h5ad` file for downstream analysis. \n", "Missing (NaN) values are preserved to reflect unquantified splicing events.\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from DOLPHIN.AS.convert_psi_to_h5ad import run_convert_psi" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 795/795 [05:34<00:00, 2.38it/s]\n" ] } ], "source": [ "adata_psi = run_convert_psi(\n", " metadata_path=\"./fsla_meta.csv\",\n", " outrigger_path=\"./outrigger_output\",\n", " out_name='fsla',\n", " out_directory=\"./\"\n", ")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "AnnData object with n_obs × n_vars = 795 × 9487\n", " obs: 'celltype1', 'celltype2'\n", " var: 'gene_name'" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adata_psi " ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | isoform1=junction:10:100246936-100253420:-|isoform2=junction:10:100250333-100253420:-@exon:10:100250248-100250332:-@junction:10:100246936-100250247:- | \n", "isoform1=junction:10:100256477-100260965:-|isoform2=junction:10:100260320-100260965:-@exon:10:100260218-100260319:-@junction:10:100256477-100260217:- | \n", "isoform1=junction:10:100489762-100490705:-|isoform2=junction:10:100490323-100490705:-@exon:10:100490008-100490322:-@junction:10:100489762-100490007:- | \n", "isoform1=junction:10:100496432-100497666:-|isoform2=junction:10:100497281-100497666:-@exon:10:100497135-100497280:-@junction:10:100496432-100497134:- | \n", "isoform1=junction:10:100498208-100499159:-|isoform2=junction:10:100498805-100499159:-@exon:10:100498705-100498804:-@junction:10:100498208-100498704:- | \n", "isoform1=junction:10:100516961-100526974:-|isoform2=junction:10:100526555-100526974:-@exon:10:100526399-100526554:-@junction:10:100516961-100526398:- | \n", "isoform1=junction:10:100523930-100526974:-|isoform2=junction:10:100526555-100526974:-@exon:10:100526399-100526554:-@junction:10:100523930-100526398:- | \n", "isoform1=junction:10:100983818-100986748:-|isoform2=junction:10:100984075-100986748:-@exon:10:100983948-100984074:-@junction:10:100983818-100983947:- | \n", "isoform1=junction:10:101611770-101624744:-|isoform2=junction:10:101612478-101624744:-@exon:10:101612337-101612477:-@junction:10:101611770-101612336:- | \n", "isoform1=junction:10:101624811-101672914:-|isoform2=junction:10:101667981-101672914:-@exon:10:101667886-101667980:-@junction:10:101624811-101667885:- | \n", "... | \n", "isoform1=junction:X:78945496-78960507:+|isoform2=junction:X:78945496-78952192:+@exon:X:78952193-78952335:+@junction:X:78952336-78960507:+ | \n", "isoform1=junction:X:78947864-78960507:+|isoform2=junction:X:78947864-78952192:+@exon:X:78952193-78952335:+@junction:X:78952336-78960507:+ | \n", "isoform1=junction:X:79361480-79362941:-|isoform2=junction:X:79362692-79362941:-@exon:X:79362581-79362691:-@junction:X:79361480-79362580:- | \n", "isoform1=junction:X:81202246-81276983:+|isoform2=junction:X:81202246-81202436:+@exon:X:81202437-81202576:+@junction:X:81202577-81276983:+ | \n", "isoform1=junction:Y:12909408-12912726:+|isoform2=junction:Y:12909408-12911838:+@exon:Y:12911839-12911968:+@junction:Y:12911969-12912726:+ | \n", "isoform1=junction:Y:13359987-13366266:-|isoform2=junction:Y:13360529-13366266:-@exon:Y:13360430-13360528:-@junction:Y:13359987-13360429:- | \n", "isoform1=junction:Y:19587508-19590082:+|isoform2=junction:Y:19587508-19589520:+@exon:Y:19589521-19589612:+@junction:Y:19589613-19590082:+ | \n", "isoform1=junction:Y:19735751-19741317:-|isoform2=junction:Y:19739663-19741317:-@exon:Y:19739528-19739662:-@junction:Y:19735751-19739527:- | \n", "isoform1=junction:Y:20582694-20588023:+|isoform2=junction:Y:20582694-20584473:+@exon:Y:20584474-20584524:+@junction:Y:20584525-20588023:+ | \n", "isoform1=junction:Y:2854772-2866792:+|isoform2=junction:Y:2854772-2865087:+@exon:Y:2865088-2865245:+@junction:Y:2865246-2866792:+ | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| SRR18388386 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "1.0 | \n", "0.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1.0 | \n", "
| SRR18387779 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.054945 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1.0 | \n", "
| SRR18387770 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "1.0 | \n", "0.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1.0 | \n", "
| SRR18388394 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1.0 | \n", "
| SRR18387788 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "1.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1.0 | \n", "
5 rows × 9487 columns
\n", "| \n", " | isoform1=junction:10:100246936-100253420:-|isoform2=junction:10:100250333-100253420:-@exon:10:100250248-100250332:-@junction:10:100246936-100250247:- | \n", "isoform1=junction:10:100256477-100260965:-|isoform2=junction:10:100260320-100260965:-@exon:10:100260218-100260319:-@junction:10:100256477-100260217:- | \n", "isoform1=junction:10:100489762-100490705:-|isoform2=junction:10:100490323-100490705:-@exon:10:100490008-100490322:-@junction:10:100489762-100490007:- | \n", "isoform1=junction:10:100496432-100497666:-|isoform2=junction:10:100497281-100497666:-@exon:10:100497135-100497280:-@junction:10:100496432-100497134:- | \n", "isoform1=junction:10:100498208-100499159:-|isoform2=junction:10:100498805-100499159:-@exon:10:100498705-100498804:-@junction:10:100498208-100498704:- | \n", "isoform1=junction:10:100516961-100526974:-|isoform2=junction:10:100526555-100526974:-@exon:10:100526399-100526554:-@junction:10:100516961-100526398:- | \n", "isoform1=junction:10:100523930-100526974:-|isoform2=junction:10:100526555-100526974:-@exon:10:100526399-100526554:-@junction:10:100523930-100526398:- | \n", "isoform1=junction:10:100983818-100986748:-|isoform2=junction:10:100984075-100986748:-@exon:10:100983948-100984074:-@junction:10:100983818-100983947:- | \n", "isoform1=junction:10:101611770-101624744:-|isoform2=junction:10:101612478-101624744:-@exon:10:101612337-101612477:-@junction:10:101611770-101612336:- | \n", "isoform1=junction:10:101624811-101672914:-|isoform2=junction:10:101667981-101672914:-@exon:10:101667886-101667980:-@junction:10:101624811-101667885:- | \n", "... | \n", "isoform1=junction:X:78945496-78960507:+|isoform2=junction:X:78945496-78952192:+@exon:X:78952193-78952335:+@junction:X:78952336-78960507:+ | \n", "isoform1=junction:X:78947864-78960507:+|isoform2=junction:X:78947864-78952192:+@exon:X:78952193-78952335:+@junction:X:78952336-78960507:+ | \n", "isoform1=junction:X:79361480-79362941:-|isoform2=junction:X:79362692-79362941:-@exon:X:79362581-79362691:-@junction:X:79361480-79362580:- | \n", "isoform1=junction:X:81202246-81276983:+|isoform2=junction:X:81202246-81202436:+@exon:X:81202437-81202576:+@junction:X:81202577-81276983:+ | \n", "isoform1=junction:Y:12909408-12912726:+|isoform2=junction:Y:12909408-12911838:+@exon:Y:12911839-12911968:+@junction:Y:12911969-12912726:+ | \n", "isoform1=junction:Y:13359987-13366266:-|isoform2=junction:Y:13360529-13366266:-@exon:Y:13360430-13360528:-@junction:Y:13359987-13360429:- | \n", "isoform1=junction:Y:19587508-19590082:+|isoform2=junction:Y:19587508-19589520:+@exon:Y:19589521-19589612:+@junction:Y:19589613-19590082:+ | \n", "isoform1=junction:Y:19735751-19741317:-|isoform2=junction:Y:19739663-19741317:-@exon:Y:19739528-19739662:-@junction:Y:19735751-19739527:- | \n", "isoform1=junction:Y:20582694-20588023:+|isoform2=junction:Y:20582694-20584473:+@exon:Y:20584474-20584524:+@junction:Y:20584525-20588023:+ | \n", "isoform1=junction:Y:2854772-2866792:+|isoform2=junction:Y:2854772-2865087:+@exon:Y:2865088-2865245:+@junction:Y:2865246-2866792:+ | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| SRR18388386 | \n", "0.548814 | \n", "0.715189 | \n", "0.602763 | \n", "0.544883 | \n", "0.423655 | \n", "0.645894 | \n", "1.000000 | \n", "0.891773 | \n", "0.963663 | \n", "0.383442 | \n", "... | \n", "0.192200 | \n", "0.916999 | \n", "1.000000 | \n", "0.000000 | \n", "0.224325 | \n", "0.646099 | \n", "0.377303 | \n", "0.239175 | \n", "0.843921 | \n", "1.0 | \n", "
| SRR18387779 | \n", "0.471649 | \n", "0.285935 | \n", "0.872293 | \n", "0.419384 | \n", "0.465397 | \n", "0.191993 | \n", "1.000000 | \n", "0.549905 | \n", "0.656898 | \n", "0.418817 | \n", "... | \n", "0.690972 | \n", "0.570053 | \n", "0.554895 | \n", "0.054945 | \n", "0.350364 | \n", "0.765937 | \n", "0.074863 | \n", "0.808629 | \n", "0.241341 | \n", "1.0 | \n", "
| SRR18387770 | \n", "0.467157 | \n", "0.207176 | \n", "0.913840 | \n", "0.688435 | \n", "0.001312 | \n", "0.802888 | \n", "0.192368 | \n", "0.410850 | \n", "0.828048 | \n", "0.916628 | \n", "... | \n", "0.655658 | \n", "0.150540 | \n", "1.000000 | \n", "0.000000 | \n", "0.511076 | \n", "0.095635 | \n", "0.835670 | \n", "0.217615 | \n", "0.790823 | \n", "1.0 | \n", "
| SRR18388394 | \n", "0.921630 | \n", "0.576743 | \n", "0.486409 | \n", "0.646680 | \n", "0.844161 | \n", "0.301350 | \n", "1.000000 | \n", "0.214558 | \n", "0.589372 | \n", "0.956229 | \n", "... | \n", "0.258662 | \n", "0.366379 | \n", "0.270519 | \n", "0.613285 | \n", "0.829367 | \n", "0.948184 | \n", "0.816119 | \n", "0.677352 | \n", "0.157222 | \n", "1.0 | \n", "
| SRR18387788 | \n", "0.978044 | \n", "0.774697 | \n", "0.780661 | \n", "0.542142 | \n", "0.946817 | \n", "0.996528 | \n", "1.000000 | \n", "0.841271 | \n", "0.634649 | \n", "0.234987 | \n", "... | \n", "0.359507 | \n", "0.236903 | \n", "1.000000 | \n", "0.950538 | \n", "0.375510 | \n", "0.247062 | \n", "0.320256 | \n", "0.047280 | \n", "0.274863 | \n", "1.0 | \n", "
5 rows × 9487 columns
\n", "