From d293b1e37cba46bf0eef89881d48d150a22bdea0 Mon Sep 17 00:00:00 2001 From: Merel Kuijs Date: Thu, 10 Jul 2025 12:48:07 +0200 Subject: [PATCH 1/3] Add notebook showing how the heart dataset was created --- notebooks/processing/processing_visium.ipynb | 381 +++++++++++++++++++ 1 file changed, 381 insertions(+) create mode 100644 notebooks/processing/processing_visium.ipynb diff --git a/notebooks/processing/processing_visium.ipynb b/notebooks/processing/processing_visium.ipynb new file mode 100644 index 0000000..e1aa75d --- /dev/null +++ b/notebooks/processing/processing_visium.ipynb @@ -0,0 +1,381 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "92dea304", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import re\n", + "\n", + "import numpy as np\n", + "import scanpy as sc\n", + "import squidpy as sq\n", + "import anndata as ad\n", + "\n", + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "766544dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'0.10.9'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ad.__version__" + ] + }, + { + "cell_type": "markdown", + "id": "86fce258-5977-48c2-a9b8-1efa65388313", + "metadata": {}, + "source": [ + "## Load data\n", + "\n", + "Download the Visium data from https://zenodo.org/records/6578047.\n", + "Download all files whose names start with \"Visium\" and place them in the ```processed``` folder within the user-specified working directory (```WD```)." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a956c20f-eb6a-4c57-adfe-37ec9839ea9a", + "metadata": {}, + "outputs": [], + "source": [ + "WD = \"/data/visium_heart\" # change as needed" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "77028847-4520-45bf-b091-8babd86652ff", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_3880255/1306160217.py:14: FutureWarning: Use anndata.concat instead of AnnData.concatenate, AnnData.concatenate is deprecated and will be removed in the future. See the tutorial for concat at: https://anndata.readthedocs.io/en/latest/concatenation.html\n", + " adata_combined = ad.AnnData.concatenate(*adatas, join='inner', batch_key='sample', batch_categories=sample_names)\n" + ] + }, + { + "data": { + "text/plain": [ + "AnnData object with n_obs × n_vars = 88704 × 11681\n", + " obs: 'n_counts', 'n_genes', 'percent.mt', 'Adipocyte', 'Cardiomyocyte', 'Endothelial', 'Fibroblast', 'Lymphoid', 'Mast', 'Myeloid', 'Neuronal', 'Pericyte', 'Cycling.cells', 'vSMCs', 'cell_type_original', 'assay_ontology_term_id', 'cell_type_ontology_term_id', 'development_stage_ontology_term_id', 'disease_ontology_term_id', 'ethnicity_ontology_term_id', 'is_primary_data', 'organism_ontology_term_id', 'sex_ontology_term_id', 'tissue_ontology_term_id', 'sample'\n", + " var: 'features'\n", + " obsm: 'X_pca', 'X_spatial', 'X_umap'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "input_dir = os.path.join(WD, \"processed\")\n", + "\n", + "# Find all .h5ad files in the directory\n", + "h5ad_files = [os.path.join(input_dir, f) for f in os.listdir(input_dir) if f.endswith(\".h5ad\")]\n", + "\n", + "# Load each file into an AnnData object\n", + "adatas = [sc.read_h5ad(f) for f in h5ad_files]\n", + "\n", + "# Extract clean sample names for each file using regex\n", + "sample_names = [re.search(r'Visium_(.+?)\\.h5ad', os.path.basename(f)).group(1) for f in h5ad_files]\n", + "\n", + "# Concatenate all AnnData objects\n", + "# Adjust `join='outer'` or `join='inner'` depending on whether you want to keep all or only common genes\n", + "adata_combined = ad.AnnData.concatenate(*adatas, join='inner', batch_key='sample', batch_categories=sample_names)\n", + "adata_combined" + ] + }, + { + "cell_type": "markdown", + "id": "d57e9cf5-5623-402b-8ed2-7f80499c1b0e", + "metadata": {}, + "source": [ + "## Inspect data" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "69b1f549-5d50-4012-a5ea-b58477dc2be6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.0\n", + "8.193758066884202\n" + ] + } + ], + "source": [ + "print(np.min(adata_combined.X))\n", + "print(np.max(adata_combined.X))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "155b20bb-8d98-411d-8848-c449b0fd69ff", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Minimum number of cells a gene is expressed in: 786\n" + ] + } + ], + "source": [ + "# Count in how many cells each gene is expressed\n", + "gene_expression_counts = np.array((adata_combined.X > 0).sum(axis=0)).flatten()\n", + "min_cells_per_gene = gene_expression_counts.min()\n", + "print(f\"Minimum number of cells a gene is expressed in: {min_cells_per_gene}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "e6884c74-8449-431e-9987-226614b6ca67", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Minimum number of genes a cell expresses: 292\n" + ] + } + ], + "source": [ + "# Count how many genes are expressed in each cell\n", + "cell_expression_counts = np.array((adata_combined.X > 0).sum(axis=1)).flatten()\n", + "min_genes_per_cell = cell_expression_counts.min()\n", + "print(f\"Minimum number of genes a cell expresses: {min_genes_per_cell}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "09f40b8f-9417-4d7e-88f1-e2c9240ed874", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "sample\n", + "RZ_BZ_P3 4659\n", + "GT_IZ_P9 4361\n", + "control_P1 4269\n", + "FZ_GT_P4 4253\n", + "IZ_BZ_P2 4203\n", + "GT_IZ_P9_rep2 4113\n", + "IZ_P3 3771\n", + "IZ_P10 3646\n", + "RZ_P9 3626\n", + "GT_IZ_P15 3572\n", + "RZ_GT_P2 3538\n", + "RZ_P6 3484\n", + "RZ_BZ_P12 3392\n", + "RZ_BZ_P2 3373\n", + "FZ_P14 3175\n", + "FZ_GT_P19 3100\n", + "IZ_P15 3083\n", + "RZ_FZ_P5 3082\n", + "RZ_P3 2994\n", + "control_P7 2931\n", + "IZ_P16 2713\n", + "FZ_P18 2551\n", + "control_P8 2456\n", + "FZ_P20 2410\n", + "control_P17 2043\n", + "RZ_P11 2016\n", + "GT_IZ_P13 1890\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "adata_combined.obs['sample'].value_counts()" + ] + }, + { + "cell_type": "markdown", + "id": "37fa330c-cab4-433a-8f1d-e88031a9c570", + "metadata": {}, + "source": [ + "## Continue building adata" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "fada886f-89a6-4f9f-9b4b-ebbd9ffd7574", + "metadata": {}, + "outputs": [], + "source": [ + "assay_map = {\n", + " 'EFO:0010961': 'Visium Spatial Gene Expression'\n", + "}\n", + "\n", + "cell_type_map = {\n", + " 'CL:0000513': 'cardiac muscle myoblast',\n", + " 'CL:0002548': 'fibroblast of cardiac tissue',\n", + " 'CL:0010008': 'cardiac endothelial cell',\n", + " 'CL:0001082': 'immature innate lymphoid cell',\n", + " 'CL:0000003': 'native cell',\n", + " 'CL:0000514': 'smooth muscle myoblast',\n", + " 'CL:0000669': 'pericyte cell',\n", + " 'CL:0000838': 'lymphoid lineage restricted progenitor cell',\n", + " 'CL:0000006': 'neuronal receptor cell', \n", + " 'CL:0000097': 'mast cell',\n", + " 'CL:1000311': 'adipocyte of epicardial fat of left ventricle'\n", + "}\n", + "\n", + "development_stage_map = {\n", + " 'HsapDv:0000138': '44-year-old human stage', \n", + " 'HsapDv:0000151': '57-year-old human stage',\n", + " 'HsapDv:0000146': '52-year-old human stage',\n", + " 'HsapDv:0000137': '43-year-old human stage',\n", + " 'HsapDv:0000160': '66-year-old human stage',\n", + " 'HsapDv:0000168': '74-year-old human stage',\n", + " 'HsapDv:0000132': '38-year-old human stage',\n", + " 'HsapDv:0000141': '47-year-old human stage',\n", + " 'HsapDv:0000134': '40-year-old human stage',\n", + " 'HsapDv:0000152': '58-year-old human stage',\n", + " 'HsapDv:0000157': '63-year-old human stage',\n", + " 'HsapDv:0000149': '55-year-old human stage',\n", + " 'HsapDv:0000158': '64-year-old human stage',\n", + " 'HsapDv:0000155': '61-year-old human stage',\n", + " 'HsapDv:0000154': '60-year-old human stage',\n", + " 'HsapDv:0000145': '51-year-old human stage'\n", + "}\n", + "\n", + "disease_map = {\n", + " 'MONDO:0005068': 'myocardial infarction',\n", + " 'PATO:0000461': 'normal'\n", + "}\n", + "\n", + "ethnicity_map = {\n", + " 'HANCESTRO:0005': 'European'\n", + "}\n", + "\n", + "organism_map = {\n", + " 'NCBITaxon:9606': 'Homo sapiens'\n", + "}\n", + "\n", + "sex_map = {\n", + " 'PATO:0000383': 'female',\n", + " 'PATO:0000384': 'male'\n", + "}\n", + "\n", + "tissue_map = {\n", + " 'UBERON:0002084': 'heart left ventricle'\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "14efcbea-981e-4740-8d35-acda28d7c932", + "metadata": {}, + "outputs": [], + "source": [ + "adata_combined.obs['assay'] = adata_combined.obs['assay_ontology_term_id'].map(assay_map)\n", + "adata_combined.obs['cell_type'] = adata_combined.obs['cell_type_ontology_term_id'].map(cell_type_map)\n", + "adata_combined.obs['development_stage'] = adata_combined.obs['development_stage_ontology_term_id'].map(development_stage_map)\n", + "adata_combined.obs['disease'] = adata_combined.obs['disease_ontology_term_id'].map(disease_map)\n", + "adata_combined.obs['ethnicity'] = adata_combined.obs['ethnicity_ontology_term_id'].map(ethnicity_map)\n", + "adata_combined.obs['organism'] = adata_combined.obs['organism_ontology_term_id'].map(organism_map)\n", + "adata_combined.obs['sex'] = adata_combined.obs['sex_ontology_term_id'].map(sex_map)\n", + "adata_combined.obs['tissue'] = adata_combined.obs['tissue_ontology_term_id'].map(tissue_map)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "6ec041fd-23c1-4a81-8396-83bbd46009d9", + "metadata": {}, + "outputs": [], + "source": [ + "adata_combined.obsm['spatial'] = adata_combined.obsm['X_spatial'].copy()" + ] + }, + { + "cell_type": "markdown", + "id": "a61b6073-1008-463a-951a-17d2d5fe4426", + "metadata": {}, + "source": [ + "## Save adata" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "3d04127c-ad9a-43ec-a0ad-3da1f6b8f7ba", + "metadata": {}, + "outputs": [], + "source": [ + "adata_combined.write_h5ad(os.path.join(WD, \"adata_combined.h5ad\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c390dafd-45ba-46b3-80be-67bb80118b7e", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "graphcompass_venv", + "language": "python", + "name": "graphcompass_venv" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From b8e266948114c5ad86cd9654619659e547e2888c Mon Sep 17 00:00:00 2001 From: Merel Kuijs Date: Fri, 11 Jul 2025 03:01:52 +0200 Subject: [PATCH 2/3] Add metadata --- notebooks/processing/processing_visium.ipynb | 812 ++++++++++++++++++- 1 file changed, 808 insertions(+), 4 deletions(-) diff --git a/notebooks/processing/processing_visium.ipynb b/notebooks/processing/processing_visium.ipynb index e1aa75d..932ee89 100644 --- a/notebooks/processing/processing_visium.ipynb +++ b/notebooks/processing/processing_visium.ipynb @@ -11,6 +11,7 @@ "import re\n", "\n", "import numpy as np\n", + "import pandas as pd\n", "import scanpy as sc\n", "import squidpy as sq\n", "import anndata as ad\n", @@ -58,7 +59,8 @@ "metadata": {}, "outputs": [], "source": [ - "WD = \"/data/visium_heart\" # change as needed" + "# WD = \"/data/visium_heart\" # change as needed\n", + "WD = \"/dss/dssfs02/lwp-dss-0001/pn36po/pn36po-dss-0001/di93vel/visium_heart\"" ] }, { @@ -71,7 +73,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_3880255/1306160217.py:14: FutureWarning: Use anndata.concat instead of AnnData.concatenate, AnnData.concatenate is deprecated and will be removed in the future. See the tutorial for concat at: https://anndata.readthedocs.io/en/latest/concatenation.html\n", + "/tmp/ipykernel_3881765/1306160217.py:14: FutureWarning: Use anndata.concat instead of AnnData.concatenate, AnnData.concatenate is deprecated and will be removed in the future. See the tutorial for concat at: https://anndata.readthedocs.io/en/latest/concatenation.html\n", " adata_combined = ad.AnnData.concatenate(*adatas, join='inner', batch_key='sample', batch_categories=sample_names)\n" ] }, @@ -330,6 +332,808 @@ "adata_combined.obsm['spatial'] = adata_combined.obsm['X_spatial'].copy()" ] }, + { + "cell_type": "code", + "execution_count": 12, + "id": "dd13c91a-3294-4f9a-b3a9-8c8fdda5c45e", + "metadata": {}, + "outputs": [], + "source": [ + "adata_combined.write_h5ad(os.path.join(WD, \"adata_combined.h5ad\"))" + ] + }, + { + "cell_type": "markdown", + "id": "692bc4b8-a147-44be-b711-2985e6fffad0", + "metadata": {}, + "source": [ + "Download the Visium metadata from https://zenodo.org/records/6580069 and place the file ```metadata-Visium.csv``` in the working directory (```WD```)." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "53a1da00-00f5-40fe-a505-4fa3f74ef2c3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
patientpatient_region_idpatient_groupmajor_lablbatchhca_sample_id
0P5RZ/FZ_P5group_1FZ10X10X0027
1P3IZ_P3group_2IZ10X10X0017
2P3RZ/BZ_P3group_1BZ10X10X0026
3P3RZ_P3group_1RZ10X10X0020
4P2IZ/BZ_P2group_2IZ10X10X0018
5P2RZ/BZ_P2group_1BZ10X10X0025
6P4FZ/GT_P4group_3FZ10X10X009
7P1control_P1group_1CTRL10X10X001
8P17control_P17group_1CTRLACHACH002
9P12RZ/BZ_P12group_1BZACHACH0024
10P13GT/IZ_P13group_2IZACHACH0010
11P14FZ_P14group_3FZACHACH005
12P18FZ_P18group_3FZACHACH006
13P19FZ/GT_P19group_3FZACHACH008
14P16IZ_P16group_2IZACHACH0016
15P20FZ_P20group_3FZACHACH007
16P6RZ_P6group_1RZACHACH0022
17P15GT/IZ_P15group_2IZACHACH0011
18P9GT/IZ_P9_rep2group_2IZACHACH0013
20P15IZ_P15group_2IZACHACH0015
21P7control_P7group_1CTRLACHACH003
22P8control_P8group_1CTRLACHACH004
23P2RZ/GT_P2group_1RZACHACH0028
24P9RZ_P9group_1RZACHACH0023
25P10IZ_P10group_2IZACHACH0014
26P11RZ_P11group_1RZACHACH0019
27P9GT/IZ_P9group_2IZACHACH0012
\n", + "
" + ], + "text/plain": [ + " patient patient_region_id patient_group major_labl batch hca_sample_id\n", + "0 P5 RZ/FZ_P5 group_1 FZ 10X 10X0027\n", + "1 P3 IZ_P3 group_2 IZ 10X 10X0017\n", + "2 P3 RZ/BZ_P3 group_1 BZ 10X 10X0026\n", + "3 P3 RZ_P3 group_1 RZ 10X 10X0020\n", + "4 P2 IZ/BZ_P2 group_2 IZ 10X 10X0018\n", + "5 P2 RZ/BZ_P2 group_1 BZ 10X 10X0025\n", + "6 P4 FZ/GT_P4 group_3 FZ 10X 10X009\n", + "7 P1 control_P1 group_1 CTRL 10X 10X001\n", + "8 P17 control_P17 group_1 CTRL ACH ACH002\n", + "9 P12 RZ/BZ_P12 group_1 BZ ACH ACH0024\n", + "10 P13 GT/IZ_P13 group_2 IZ ACH ACH0010\n", + "11 P14 FZ_P14 group_3 FZ ACH ACH005\n", + "12 P18 FZ_P18 group_3 FZ ACH ACH006\n", + "13 P19 FZ/GT_P19 group_3 FZ ACH ACH008\n", + "14 P16 IZ_P16 group_2 IZ ACH ACH0016\n", + "15 P20 FZ_P20 group_3 FZ ACH ACH007\n", + "16 P6 RZ_P6 group_1 RZ ACH ACH0022\n", + "17 P15 GT/IZ_P15 group_2 IZ ACH ACH0011\n", + "18 P9 GT/IZ_P9_rep2 group_2 IZ ACH ACH0013\n", + "20 P15 IZ_P15 group_2 IZ ACH ACH0015\n", + "21 P7 control_P7 group_1 CTRL ACH ACH003\n", + "22 P8 control_P8 group_1 CTRL ACH ACH004\n", + "23 P2 RZ/GT_P2 group_1 RZ ACH ACH0028\n", + "24 P9 RZ_P9 group_1 RZ ACH ACH0023\n", + "25 P10 IZ_P10 group_2 IZ ACH ACH0014\n", + "26 P11 RZ_P11 group_1 RZ ACH ACH0019\n", + "27 P9 GT/IZ_P9 group_2 IZ ACH ACH0012" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "metadata = pd.read_csv(os.path.join(WD, 'metadata-Visium.csv'))\n", + "\n", + "# Note that no processed data is available for the hca_sample_id ACH0021.\n", + "metadata = metadata[metadata['hca_sample_id'] != 'ACH0021']\n", + "metadata = metadata.drop_duplicates()\n", + "metadata" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "7a0cc2f1-0c78-426b-acb6-64a27feeea12", + "metadata": {}, + "outputs": [], + "source": [ + "# Replace '/' with '_' in patient_region_id to match the sample naming convention in adata_combined.obs\n", + "metadata['patient_region_id'] = metadata['patient_region_id'].str.replace('/', '_', regex=False)\n", + "metadata_indexed = metadata.set_index('patient_region_id')" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "11c93020-1a8e-41f3-ac98-63562a8a98e7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
n_countsn_genespercent.mtAdipocyteCardiomyocyteEndothelialFibroblastLymphoidMastMyeloid...cell_typedevelopment_stagediseaseethnicityorganismsextissuepatientbatchhca_sample_id
AAACAAGTATCTCCCA-1-control_P14429.0214729.5754650.0015980.4373240.0243560.3183030.0225370.0038580.059224...cardiac muscle myoblast44-year-old human stagenormalEuropeanHomo sapiensfemaleheart left ventricleP110X10X001
AAACAATCTACTAGCA-1-control_P13037.0159131.2998980.0004820.7439490.0829480.0896870.0023310.0005620.039163...cardiac muscle myoblast44-year-old human stagenormalEuropeanHomo sapiensfemaleheart left ventricleP110X10X001
AAACACCAATAACTGC-1-control_P12507.0146223.9566400.0019740.3752960.1679840.1516150.0041230.0330530.009395...cardiac muscle myoblast44-year-old human stagenormalEuropeanHomo sapiensfemaleheart left ventricleP110X10X001
AAACAGAGCGACTCCT-1-control_P12502.0134133.0188680.0001100.6526960.0959140.1545880.0050170.0009770.055517...cardiac muscle myoblast44-year-old human stagenormalEuropeanHomo sapiensfemaleheart left ventricleP110X10X001
AAACAGCTTTCAGAAG-1-control_P13054.0161733.6381320.0000840.5336600.1641570.0659990.0044930.0015320.072105...cardiac muscle myoblast44-year-old human stagenormalEuropeanHomo sapiensfemaleheart left ventricleP110X10X001
..................................................................
TTGTTTCACATCCAGG-1-FZ_GT_P41221.083262.4395490.0053410.0822690.0520380.5115660.0130430.0008540.260980...fibroblast of cardiac tissue74-year-old human stagemyocardial infarctionEuropeanHomo sapiensfemaleheart left ventricleP410X10X009
TTGTTTCATTAGTCTA-1-FZ_GT_P41001.065356.4194580.0382690.3443020.0826310.3491320.0109140.0005170.014096...fibroblast of cardiac tissue74-year-old human stagemyocardial infarctionEuropeanHomo sapiensfemaleheart left ventricleP410X10X009
TTGTTTCCATACAACT-1-FZ_GT_P4823.058866.1510130.0001320.1114440.2873190.2768200.0447790.0013960.184154...cardiac endothelial cell74-year-old human stagemyocardial infarctionEuropeanHomo sapiensfemaleheart left ventricleP410X10X009
TTGTTTGTATTACACG-1-FZ_GT_P4511.039173.8279470.0003860.0021040.0903310.7503870.0020420.0004720.003078...fibroblast of cardiac tissue74-year-old human stagemyocardial infarctionEuropeanHomo sapiensfemaleheart left ventricleP410X10X009
TTGTTTGTGTAAATTC-1-FZ_GT_P4935.053272.4021400.0272600.0071280.4789640.4537480.0028210.0004570.010041...cardiac endothelial cell74-year-old human stagemyocardial infarctionEuropeanHomo sapiensfemaleheart left ventricleP410X10X009
\n", + "

88704 rows × 36 columns

\n", + "
" + ], + "text/plain": [ + " n_counts n_genes percent.mt Adipocyte \\\n", + "AAACAAGTATCTCCCA-1-control_P1 4429.0 2147 29.575465 0.001598 \n", + "AAACAATCTACTAGCA-1-control_P1 3037.0 1591 31.299898 0.000482 \n", + "AAACACCAATAACTGC-1-control_P1 2507.0 1462 23.956640 0.001974 \n", + "AAACAGAGCGACTCCT-1-control_P1 2502.0 1341 33.018868 0.000110 \n", + "AAACAGCTTTCAGAAG-1-control_P1 3054.0 1617 33.638132 0.000084 \n", + "... ... ... ... ... \n", + "TTGTTTCACATCCAGG-1-FZ_GT_P4 1221.0 832 62.439549 0.005341 \n", + "TTGTTTCATTAGTCTA-1-FZ_GT_P4 1001.0 653 56.419458 0.038269 \n", + "TTGTTTCCATACAACT-1-FZ_GT_P4 823.0 588 66.151013 0.000132 \n", + "TTGTTTGTATTACACG-1-FZ_GT_P4 511.0 391 73.827947 0.000386 \n", + "TTGTTTGTGTAAATTC-1-FZ_GT_P4 935.0 532 72.402140 0.027260 \n", + "\n", + " Cardiomyocyte Endothelial Fibroblast \\\n", + "AAACAAGTATCTCCCA-1-control_P1 0.437324 0.024356 0.318303 \n", + "AAACAATCTACTAGCA-1-control_P1 0.743949 0.082948 0.089687 \n", + "AAACACCAATAACTGC-1-control_P1 0.375296 0.167984 0.151615 \n", + "AAACAGAGCGACTCCT-1-control_P1 0.652696 0.095914 0.154588 \n", + "AAACAGCTTTCAGAAG-1-control_P1 0.533660 0.164157 0.065999 \n", + "... ... ... ... \n", + "TTGTTTCACATCCAGG-1-FZ_GT_P4 0.082269 0.052038 0.511566 \n", + "TTGTTTCATTAGTCTA-1-FZ_GT_P4 0.344302 0.082631 0.349132 \n", + "TTGTTTCCATACAACT-1-FZ_GT_P4 0.111444 0.287319 0.276820 \n", + "TTGTTTGTATTACACG-1-FZ_GT_P4 0.002104 0.090331 0.750387 \n", + "TTGTTTGTGTAAATTC-1-FZ_GT_P4 0.007128 0.478964 0.453748 \n", + "\n", + " Lymphoid Mast Myeloid ... \\\n", + "AAACAAGTATCTCCCA-1-control_P1 0.022537 0.003858 0.059224 ... \n", + "AAACAATCTACTAGCA-1-control_P1 0.002331 0.000562 0.039163 ... \n", + "AAACACCAATAACTGC-1-control_P1 0.004123 0.033053 0.009395 ... \n", + "AAACAGAGCGACTCCT-1-control_P1 0.005017 0.000977 0.055517 ... \n", + "AAACAGCTTTCAGAAG-1-control_P1 0.004493 0.001532 0.072105 ... \n", + "... ... ... ... ... \n", + "TTGTTTCACATCCAGG-1-FZ_GT_P4 0.013043 0.000854 0.260980 ... \n", + "TTGTTTCATTAGTCTA-1-FZ_GT_P4 0.010914 0.000517 0.014096 ... \n", + "TTGTTTCCATACAACT-1-FZ_GT_P4 0.044779 0.001396 0.184154 ... \n", + "TTGTTTGTATTACACG-1-FZ_GT_P4 0.002042 0.000472 0.003078 ... \n", + "TTGTTTGTGTAAATTC-1-FZ_GT_P4 0.002821 0.000457 0.010041 ... \n", + "\n", + " cell_type \\\n", + "AAACAAGTATCTCCCA-1-control_P1 cardiac muscle myoblast \n", + "AAACAATCTACTAGCA-1-control_P1 cardiac muscle myoblast \n", + "AAACACCAATAACTGC-1-control_P1 cardiac muscle myoblast \n", + "AAACAGAGCGACTCCT-1-control_P1 cardiac muscle myoblast \n", + "AAACAGCTTTCAGAAG-1-control_P1 cardiac muscle myoblast \n", + "... ... \n", + "TTGTTTCACATCCAGG-1-FZ_GT_P4 fibroblast of cardiac tissue \n", + "TTGTTTCATTAGTCTA-1-FZ_GT_P4 fibroblast of cardiac tissue \n", + "TTGTTTCCATACAACT-1-FZ_GT_P4 cardiac endothelial cell \n", + "TTGTTTGTATTACACG-1-FZ_GT_P4 fibroblast of cardiac tissue \n", + "TTGTTTGTGTAAATTC-1-FZ_GT_P4 cardiac endothelial cell \n", + "\n", + " development_stage disease \\\n", + "AAACAAGTATCTCCCA-1-control_P1 44-year-old human stage normal \n", + "AAACAATCTACTAGCA-1-control_P1 44-year-old human stage normal \n", + "AAACACCAATAACTGC-1-control_P1 44-year-old human stage normal \n", + "AAACAGAGCGACTCCT-1-control_P1 44-year-old human stage normal \n", + "AAACAGCTTTCAGAAG-1-control_P1 44-year-old human stage normal \n", + "... ... ... \n", + "TTGTTTCACATCCAGG-1-FZ_GT_P4 74-year-old human stage myocardial infarction \n", + "TTGTTTCATTAGTCTA-1-FZ_GT_P4 74-year-old human stage myocardial infarction \n", + "TTGTTTCCATACAACT-1-FZ_GT_P4 74-year-old human stage myocardial infarction \n", + "TTGTTTGTATTACACG-1-FZ_GT_P4 74-year-old human stage myocardial infarction \n", + "TTGTTTGTGTAAATTC-1-FZ_GT_P4 74-year-old human stage myocardial infarction \n", + "\n", + " ethnicity organism sex \\\n", + "AAACAAGTATCTCCCA-1-control_P1 European Homo sapiens female \n", + "AAACAATCTACTAGCA-1-control_P1 European Homo sapiens female \n", + "AAACACCAATAACTGC-1-control_P1 European Homo sapiens female \n", + "AAACAGAGCGACTCCT-1-control_P1 European Homo sapiens female \n", + "AAACAGCTTTCAGAAG-1-control_P1 European Homo sapiens female \n", + "... ... ... ... \n", + "TTGTTTCACATCCAGG-1-FZ_GT_P4 European Homo sapiens female \n", + "TTGTTTCATTAGTCTA-1-FZ_GT_P4 European Homo sapiens female \n", + "TTGTTTCCATACAACT-1-FZ_GT_P4 European Homo sapiens female \n", + "TTGTTTGTATTACACG-1-FZ_GT_P4 European Homo sapiens female \n", + "TTGTTTGTGTAAATTC-1-FZ_GT_P4 European Homo sapiens female \n", + "\n", + " tissue patient batch \\\n", + "AAACAAGTATCTCCCA-1-control_P1 heart left ventricle P1 10X \n", + "AAACAATCTACTAGCA-1-control_P1 heart left ventricle P1 10X \n", + "AAACACCAATAACTGC-1-control_P1 heart left ventricle P1 10X \n", + "AAACAGAGCGACTCCT-1-control_P1 heart left ventricle P1 10X \n", + "AAACAGCTTTCAGAAG-1-control_P1 heart left ventricle P1 10X \n", + "... ... ... ... \n", + "TTGTTTCACATCCAGG-1-FZ_GT_P4 heart left ventricle P4 10X \n", + "TTGTTTCATTAGTCTA-1-FZ_GT_P4 heart left ventricle P4 10X \n", + "TTGTTTCCATACAACT-1-FZ_GT_P4 heart left ventricle P4 10X \n", + "TTGTTTGTATTACACG-1-FZ_GT_P4 heart left ventricle P4 10X \n", + "TTGTTTGTGTAAATTC-1-FZ_GT_P4 heart left ventricle P4 10X \n", + "\n", + " hca_sample_id \n", + "AAACAAGTATCTCCCA-1-control_P1 10X001 \n", + "AAACAATCTACTAGCA-1-control_P1 10X001 \n", + "AAACACCAATAACTGC-1-control_P1 10X001 \n", + "AAACAGAGCGACTCCT-1-control_P1 10X001 \n", + "AAACAGCTTTCAGAAG-1-control_P1 10X001 \n", + "... ... \n", + "TTGTTTCACATCCAGG-1-FZ_GT_P4 10X009 \n", + "TTGTTTCATTAGTCTA-1-FZ_GT_P4 10X009 \n", + "TTGTTTCCATACAACT-1-FZ_GT_P4 10X009 \n", + "TTGTTTGTATTACACG-1-FZ_GT_P4 10X009 \n", + "TTGTTTGTGTAAATTC-1-FZ_GT_P4 10X009 \n", + "\n", + "[88704 rows x 36 columns]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Match metadata rows to adata_combined.obs by sample name (which corresponds to patient_region_id)\n", + "matched_metadata = metadata_indexed.loc[adata_combined.obs['sample']]\n", + "\n", + "# Add 'patient' and 'batch' columns from metadata to adata_combined.obs, preserving spot order\n", + "adata_combined.obs['patient'] = matched_metadata['patient'].values\n", + "adata_combined.obs['batch'] = matched_metadata['batch'].values\n", + "adata_combined.obs['hca_sample_id'] = matched_metadata['hca_sample_id'].values\n", + "adata_combined.obs" + ] + }, { "cell_type": "markdown", "id": "a61b6073-1008-463a-951a-17d2d5fe4426", @@ -340,12 +1144,12 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 16, "id": "3d04127c-ad9a-43ec-a0ad-3da1f6b8f7ba", "metadata": {}, "outputs": [], "source": [ - "adata_combined.write_h5ad(os.path.join(WD, \"adata_combined.h5ad\"))" + "adata_combined.write_h5ad(os.path.join(WD, \"adata_annotated.h5ad\"))" ] }, { From e2824243ede4f0ead154da1d9b3470e5528f67da Mon Sep 17 00:00:00 2001 From: Merel Kuijs Date: Fri, 11 Jul 2025 16:19:38 +0200 Subject: [PATCH 3/3] add condition column --- notebooks/processing/processing_visium.ipynb | 127 ++++++++++++------- 1 file changed, 81 insertions(+), 46 deletions(-) diff --git a/notebooks/processing/processing_visium.ipynb b/notebooks/processing/processing_visium.ipynb index 932ee89..b4f69f5 100644 --- a/notebooks/processing/processing_visium.ipynb +++ b/notebooks/processing/processing_visium.ipynb @@ -73,7 +73,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_3881765/1306160217.py:14: FutureWarning: Use anndata.concat instead of AnnData.concatenate, AnnData.concatenate is deprecated and will be removed in the future. See the tutorial for concat at: https://anndata.readthedocs.io/en/latest/concatenation.html\n", + "/tmp/ipykernel_263833/1306160217.py:14: FutureWarning: Use anndata.concat instead of AnnData.concatenate, AnnData.concatenate is deprecated and will be removed in the future. See the tutorial for concat at: https://anndata.readthedocs.io/en/latest/concatenation.html\n", " adata_combined = ad.AnnData.concatenate(*adatas, join='inner', batch_key='sample', batch_categories=sample_names)\n" ] }, @@ -325,6 +325,54 @@ { "cell_type": "code", "execution_count": 11, + "id": "5bb5d9cf-b00c-43c7-a279-2ecf0cda5eb2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[['control_P1', 'control'],\n", + " ['IZ_P15', 'IZ'],\n", + " ['RZ_P6', 'RZ'],\n", + " ['RZ_P3', 'RZ'],\n", + " ['IZ_P16', 'IZ'],\n", + " ['control_P17', 'control'],\n", + " ['control_P7', 'control'],\n", + " ['RZ_P11', 'RZ'],\n", + " ['GT_IZ_P13', 'GT_IZ'],\n", + " ['RZ_BZ_P2', 'RZ_BZ'],\n", + " ['GT_IZ_P9_rep2', 'GT_IZ'],\n", + " ['IZ_P10', 'IZ'],\n", + " ['control_P8', 'control'],\n", + " ['FZ_P14', 'FZ'],\n", + " ['GT_IZ_P15', 'GT_IZ'],\n", + " ['FZ_P20', 'FZ'],\n", + " ['RZ_GT_P2', 'RZ_GT'],\n", + " ['GT_IZ_P9', 'GT_IZ'],\n", + " ['RZ_BZ_P12', 'RZ_BZ'],\n", + " ['IZ_BZ_P2', 'IZ_BZ'],\n", + " ['IZ_P3', 'IZ'],\n", + " ['RZ_BZ_P3', 'RZ_BZ'],\n", + " ['RZ_FZ_P5', 'RZ_FZ'],\n", + " ['RZ_P9', 'RZ'],\n", + " ['FZ_P18', 'FZ'],\n", + " ['FZ_GT_P19', 'FZ_GT'],\n", + " ['FZ_GT_P4', 'FZ_GT']]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "adata_combined.obs['condition'] = adata_combined.obs['sample'].str.split('_P').str[0].astype('category')\n", + "adata_combined.obs[['sample', 'condition']].drop_duplicates().values.tolist()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, "id": "6ec041fd-23c1-4a81-8396-83bbd46009d9", "metadata": {}, "outputs": [], @@ -334,7 +382,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "id": "dd13c91a-3294-4f9a-b3a9-8c8fdda5c45e", "metadata": {}, "outputs": [], @@ -352,7 +400,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "id": "53a1da00-00f5-40fe-a505-4fa3f74ef2c3", "metadata": {}, "outputs": [ @@ -664,7 +712,7 @@ "27 P9 GT/IZ_P9 group_2 IZ ACH ACH0012" ] }, - "execution_count": 13, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -680,7 +728,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "id": "7a0cc2f1-0c78-426b-acb6-64a27feeea12", "metadata": {}, "outputs": [], @@ -692,7 +740,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "id": "11c93020-1a8e-41f3-ac98-63562a8a98e7", "metadata": {}, "outputs": [ @@ -728,13 +776,13 @@ " Mast\n", " Myeloid\n", " ...\n", - " cell_type\n", " development_stage\n", " disease\n", " ethnicity\n", " organism\n", " sex\n", " tissue\n", + " condition\n", " patient\n", " batch\n", " hca_sample_id\n", @@ -754,13 +802,13 @@ " 0.003858\n", " 0.059224\n", " ...\n", - " cardiac muscle myoblast\n", " 44-year-old human stage\n", " normal\n", " European\n", " Homo sapiens\n", " female\n", " heart left ventricle\n", + " control\n", " P1\n", " 10X\n", " 10X001\n", @@ -778,13 +826,13 @@ " 0.000562\n", " 0.039163\n", " ...\n", - " cardiac muscle myoblast\n", " 44-year-old human stage\n", " normal\n", " European\n", " Homo sapiens\n", " female\n", " heart left ventricle\n", + " control\n", " P1\n", " 10X\n", " 10X001\n", @@ -802,13 +850,13 @@ " 0.033053\n", " 0.009395\n", " ...\n", - " cardiac muscle myoblast\n", " 44-year-old human stage\n", " normal\n", " European\n", " Homo sapiens\n", " female\n", " heart left ventricle\n", + " control\n", " P1\n", " 10X\n", " 10X001\n", @@ -826,13 +874,13 @@ " 0.000977\n", " 0.055517\n", " ...\n", - " cardiac muscle myoblast\n", " 44-year-old human stage\n", " normal\n", " European\n", " Homo sapiens\n", " female\n", " heart left ventricle\n", + " control\n", " P1\n", " 10X\n", " 10X001\n", @@ -850,13 +898,13 @@ " 0.001532\n", " 0.072105\n", " ...\n", - " cardiac muscle myoblast\n", " 44-year-old human stage\n", " normal\n", " European\n", " Homo sapiens\n", " female\n", " heart left ventricle\n", + " control\n", " P1\n", " 10X\n", " 10X001\n", @@ -898,13 +946,13 @@ " 0.000854\n", " 0.260980\n", " ...\n", - " fibroblast of cardiac tissue\n", " 74-year-old human stage\n", " myocardial infarction\n", " European\n", " Homo sapiens\n", " female\n", " heart left ventricle\n", + " FZ_GT\n", " P4\n", " 10X\n", " 10X009\n", @@ -922,13 +970,13 @@ " 0.000517\n", " 0.014096\n", " ...\n", - " fibroblast of cardiac tissue\n", " 74-year-old human stage\n", " myocardial infarction\n", " European\n", " Homo sapiens\n", " female\n", " heart left ventricle\n", + " FZ_GT\n", " P4\n", " 10X\n", " 10X009\n", @@ -946,13 +994,13 @@ " 0.001396\n", " 0.184154\n", " ...\n", - " cardiac endothelial cell\n", " 74-year-old human stage\n", " myocardial infarction\n", " European\n", " Homo sapiens\n", " female\n", " heart left ventricle\n", + " FZ_GT\n", " P4\n", " 10X\n", " 10X009\n", @@ -970,13 +1018,13 @@ " 0.000472\n", " 0.003078\n", " ...\n", - " fibroblast of cardiac tissue\n", " 74-year-old human stage\n", " myocardial infarction\n", " European\n", " Homo sapiens\n", " female\n", " heart left ventricle\n", + " FZ_GT\n", " P4\n", " 10X\n", " 10X009\n", @@ -994,20 +1042,20 @@ " 0.000457\n", " 0.010041\n", " ...\n", - " cardiac endothelial cell\n", " 74-year-old human stage\n", " myocardial infarction\n", " European\n", " Homo sapiens\n", " female\n", " heart left ventricle\n", + " FZ_GT\n", " P4\n", " 10X\n", " 10X009\n", " \n", " \n", "\n", - "

88704 rows × 36 columns

\n", + "

88704 rows × 37 columns

\n", "" ], "text/plain": [ @@ -1050,19 +1098,6 @@ "TTGTTTGTATTACACG-1-FZ_GT_P4 0.002042 0.000472 0.003078 ... \n", "TTGTTTGTGTAAATTC-1-FZ_GT_P4 0.002821 0.000457 0.010041 ... \n", "\n", - " cell_type \\\n", - "AAACAAGTATCTCCCA-1-control_P1 cardiac muscle myoblast \n", - "AAACAATCTACTAGCA-1-control_P1 cardiac muscle myoblast \n", - "AAACACCAATAACTGC-1-control_P1 cardiac muscle myoblast \n", - "AAACAGAGCGACTCCT-1-control_P1 cardiac muscle myoblast \n", - "AAACAGCTTTCAGAAG-1-control_P1 cardiac muscle myoblast \n", - "... ... \n", - "TTGTTTCACATCCAGG-1-FZ_GT_P4 fibroblast of cardiac tissue \n", - "TTGTTTCATTAGTCTA-1-FZ_GT_P4 fibroblast of cardiac tissue \n", - "TTGTTTCCATACAACT-1-FZ_GT_P4 cardiac endothelial cell \n", - "TTGTTTGTATTACACG-1-FZ_GT_P4 fibroblast of cardiac tissue \n", - "TTGTTTGTGTAAATTC-1-FZ_GT_P4 cardiac endothelial cell \n", - "\n", " development_stage disease \\\n", "AAACAAGTATCTCCCA-1-control_P1 44-year-old human stage normal \n", "AAACAATCTACTAGCA-1-control_P1 44-year-old human stage normal \n", @@ -1089,18 +1124,18 @@ "TTGTTTGTATTACACG-1-FZ_GT_P4 European Homo sapiens female \n", "TTGTTTGTGTAAATTC-1-FZ_GT_P4 European Homo sapiens female \n", "\n", - " tissue patient batch \\\n", - "AAACAAGTATCTCCCA-1-control_P1 heart left ventricle P1 10X \n", - "AAACAATCTACTAGCA-1-control_P1 heart left ventricle P1 10X \n", - "AAACACCAATAACTGC-1-control_P1 heart left ventricle P1 10X \n", - "AAACAGAGCGACTCCT-1-control_P1 heart left ventricle P1 10X \n", - "AAACAGCTTTCAGAAG-1-control_P1 heart left ventricle P1 10X \n", - "... ... ... ... \n", - "TTGTTTCACATCCAGG-1-FZ_GT_P4 heart left ventricle P4 10X \n", - "TTGTTTCATTAGTCTA-1-FZ_GT_P4 heart left ventricle P4 10X \n", - "TTGTTTCCATACAACT-1-FZ_GT_P4 heart left ventricle P4 10X \n", - "TTGTTTGTATTACACG-1-FZ_GT_P4 heart left ventricle P4 10X \n", - "TTGTTTGTGTAAATTC-1-FZ_GT_P4 heart left ventricle P4 10X \n", + " tissue condition patient batch \\\n", + "AAACAAGTATCTCCCA-1-control_P1 heart left ventricle control P1 10X \n", + "AAACAATCTACTAGCA-1-control_P1 heart left ventricle control P1 10X \n", + "AAACACCAATAACTGC-1-control_P1 heart left ventricle control P1 10X \n", + "AAACAGAGCGACTCCT-1-control_P1 heart left ventricle control P1 10X \n", + "AAACAGCTTTCAGAAG-1-control_P1 heart left ventricle control P1 10X \n", + "... ... ... ... ... \n", + "TTGTTTCACATCCAGG-1-FZ_GT_P4 heart left ventricle FZ_GT P4 10X \n", + "TTGTTTCATTAGTCTA-1-FZ_GT_P4 heart left ventricle FZ_GT P4 10X \n", + "TTGTTTCCATACAACT-1-FZ_GT_P4 heart left ventricle FZ_GT P4 10X \n", + "TTGTTTGTATTACACG-1-FZ_GT_P4 heart left ventricle FZ_GT P4 10X \n", + "TTGTTTGTGTAAATTC-1-FZ_GT_P4 heart left ventricle FZ_GT P4 10X \n", "\n", " hca_sample_id \n", "AAACAAGTATCTCCCA-1-control_P1 10X001 \n", @@ -1115,10 +1150,10 @@ "TTGTTTGTATTACACG-1-FZ_GT_P4 10X009 \n", "TTGTTTGTGTAAATTC-1-FZ_GT_P4 10X009 \n", "\n", - "[88704 rows x 36 columns]" + "[88704 rows x 37 columns]" ] }, - "execution_count": 15, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -1144,7 +1179,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "id": "3d04127c-ad9a-43ec-a0ad-3da1f6b8f7ba", "metadata": {}, "outputs": [],