Integrating ST and scRNA-seq Data

In this tutorial, we show how to apply CellMirror to integrate ST and scRNA-seq data. As an example, we use the ST data and paired scRNA-seq data of pancreatic ductal adenocarcinoma (PDAC) patient from Moncada , et al. 2020., including 428 spots and 1,926 cells.

Step0: Loading packages (Python)

 1import warnings
 2warnings.filterwarnings("ignore")
 3
 4import os
 5os.environ['R_HOME'] = '/sibcb2/chenluonanlab7/zuochunman/anaconda3/envs/r4.0/lib/R'
 6os.environ['R_USER'] = '/sibcb2/chenluonanlab7/zuochunman/anaconda3/envs/CellMirror/lib/python3.8/site-packages/rpy2'
 7
 8import random
 9import numpy as np
10import scanpy as sc
11import matplotlib.pyplot as plt
12
13from CellMirror_utils.utilities import *
14from CellMirror_utils.layers import *
15from CellMirror_utils.cLDVAE_torch import *
16import torch.utils.data as data_utils
17
18parser = parameter_setting()
19args = parser.parse_known_args()[0]
20
21# Set seed
22np.random.seed(args.seed)
23random.seed(args.seed)
24torch.manual_seed(args.seed)
25torch.cuda.manual_seed(args.seed)
26
27args.lr_cLDVAE = 1e-5; args.gamma = -100
28
29# Set workpath
30Save_path = '/sibcb2/chenluonanlab7/zuochunman/Share_data/xiajunjie/PDAC/'

Step1: Reading and preprocessing data (Python)

 1# Normalization
 2PDAC_A_st = sc.read_h5ad(Save_path + 'PDAC_A_st.h5ad')
 3PDAC_A_st.obs['type'] = 'Spatial'
 4PDAC_A_st.obs['y'] = 100 - PDAC_A_st.obs['y']
 5PDAC_A_st.obsm['spatial'] = PDAC_A_st.obs[['x','y']].values
 6sc.pp.normalize_total(PDAC_A_st)
 7sc.pp.log1p(PDAC_A_st)
 8sc.pp.highly_variable_genes(PDAC_A_st, flavor='seurat', n_top_genes=3000)
 9
10PDAC_A_sc = sc.read_h5ad(Save_path + 'PDAC_A_sc.h5ad')
11PDAC_A_sc.obs['type'] = 'SingleCell'
12sc.pp.normalize_total(PDAC_A_sc)
13sc.pp.log1p(PDAC_A_sc)
14sc.pp.highly_variable_genes(PDAC_A_sc, flavor='seurat', n_top_genes=3000)
15
16common_HVGs=np.intersect1d(list(PDAC_A_sc.var.index[PDAC_A_sc.var['highly_variable']]),list(PDAC_A_st.var.index[PDAC_A_st.var['highly_variable']])).tolist()
17print(len(common_HVGs))
18
19PDAC_A_st, PDAC_A_sc = PDAC_A_st[:,common_HVGs], PDAC_A_sc[:,common_HVGs]
20
21print('\nShape of target object: ', PDAC_A_st.shape, '\tShape of background object: ', PDAC_A_sc.shape)
22
23# Last batch setting
24args = set_last_batchsize(args, PDAC_A_st, PDAC_A_sc)
25
26# Pseudo-data padding
27pseudo_st, pseudo_sc = pseudo_data_padding(PDAC_A_st, PDAC_A_sc)

Step2: Training cLDVAE model (Python)

 1# Dataloader preparation
 2train = data_utils.TensorDataset(torch.from_numpy(pseudo_st),torch.from_numpy(pseudo_sc))
 3train_loader = data_utils.DataLoader(train, batch_size=args.batch_size, shuffle=True)
 4
 5total = data_utils.TensorDataset(torch.from_numpy(pseudo_st),torch.from_numpy(pseudo_sc))
 6total_loader = data_utils.DataLoader(total, batch_size=args.batch_size, shuffle=False)
 7
 8# Run cLDVAE
 9model_cLDVAE = cLDVAE(args=args, n_input=PDAC_A_st.shape[1]).cuda()
10history = model_cLDVAE.fit(train_loader, total_loader)

Step3: Saving cLDVAE outputs (Python)

1# Pseudo-data deparsing
2outputs = model_cLDVAE.predict(total_loader)
3PDAC_A_st.obsm['cLDVAE'], PDAC_A_sc.obsm['cLDVAE'] = pseudo_data_deparser(PDAC_A_st, outputs['tg_z_outputs'], PDAC_A_sc, outputs['bg_z_outputs'])

Step4: Implementing MNN on the processed data (Python)

1# Run MNN
2PDAC_A_st.obsm['CellMirror'], PDAC_A_sc.obsm['CellMirror'] = mnn_correct(PDAC_A_st.obsm['cLDVAE'], PDAC_A_sc.obsm['cLDVAE'])
3
4# Cell type estimation
5PDAC_A_st, PDAC_A_sc = estimate_cell_type(PDAC_A_st, PDAC_A_sc, used_obsm='CellMirror', used_label='cell_type_ductal', neighbors=50)

Step5: Saving results for further visualization (Python)

1# Save data
2colors = np.unique(PDAC_A_sc.obs['cell_type_ductal'].values).tolist()
3sc.pl.spatial(PDAC_A_st, img_key=None, color=colors, color_map=plt.cm.get_cmap('plasma'), ncols=5, spot_size=1, frameon=False, show=False)
4plt.savefig(Save_path + 'PDAC_A_CellMirror_celltype_proportion.jpg', dpi=100)
5
6PDAC_A_st.obs.to_csv(Save_path + 'PDAC_st_CellMirror_celltype_proportion.csv')
7PDAC_A_st.write(Save_path + 'PDAC_A_CellMirror.h5ad', compression='gzip')

Step6: Visualization (R)

 1PDAC_st <- read.csv('C:\\Users\\我的电脑\\Desktop\\待办\\PDAC_st_CellMirror_celltype_proportion.csv')
 2celltypes <- colnames(PDAC_st)[11:dim(PDAC_st)[2]]
 3
 4suppressMessages(
 5ggplot2::ggplot()
 6+ scatterpie::geom_scatterpie(data = PDAC_st, ggplot2::aes(x = x,y = y), cols = celltypes, color = NA,alpha = 1, pie_scale = 0.8)
 7+ ggplot2::scale_y_reverse()
 8+ ggplot2::theme_void()
 9+ ggplot2::coord_fixed(ratio = 1,xlim = NULL, ylim = NULL, expand = TRUE, clip = "on")
10+ ggplot2::scale_fill_manual(values = c('Acinar.cells'='#FEDB2C',
11                                        'Cancer.clone.A'='#4AAE4B',
12                                        'Cancer.clone.B'='#FDCBE4',
13                                        'Ductal.antigen.presenting'='#7DC87E',
14                                        'Ductal.centroacinar'='#337FB4',
15                                        'Ductal.high.hypoxic'='#D9D9D9',
16                                        'Ductal.terminal'='#BAACCF',
17                                        'Endocrine.cells'='#FCBE84',
18                                        'Endothelial.cells'='#FFFF91',
19                                        'Fibroblasts'='#386AB2',
20                                        'Macrophages'='#BC5914',
21                                        'Mast.cells'='#656666',
22                                        'Monocytes'='#817EAE',
23                                        'RBCs'='#63A618',
24                                        'T...NK.cells'='#E7A703',
25                                        'Tuft.cells'='#A8771A',
26                                        'mDCs'='#D95E08',
27                                        'pDCs'='#E72989')))
_images/PDAC_celltype_estimation.png