Cluster spatial data based on predicted cell proportions
Source:R/spatialClustering.R
spatialPropClustering.Rd
Cluster spatial transcriptomics data according to the cell proportions predicted in each spot. It allows to segregate ST data into niches with similar cell composition.
Usage
spatialPropClustering(
object,
index.st,
method = "graph",
k.nn = 10,
k.centers = 5,
verbose = TRUE
)
Arguments
- object
SpatialDDLS
object with deconvoluted ST datasets.- index.st
Name or index of the dataset/slide already deconvoluted to be clustered. If missing, all datasets already deconvoluted will be clustered.
- method
Clustering method. It can be
graph
(a nearest neighbor graph is created and Louvain algorithm is used to detect communities) ork.means
(k-means algorithm is run with the specified number of centers (k.centers
parameter)).- k.nn
An integer specifying the number of nearest neighbors to be used during graph construction (10 by default). Only if
method == "graph"
.- k.centers
An integer specifying the number of centers for k-means algorithm (5 by default). Only if
method == "k.means"
.- verbose
Show informative messages during the execution (
TRUE
by default).
Value
A SpatialDDLS
object containing computed
clusters as a column in the slot colData
of the
SpatialExperiment
objects.
Examples
# \donttest{
set.seed(123)
sce <- SingleCellExperiment::SingleCellExperiment(
assays = list(
counts = matrix(
rpois(30, lambda = 5), nrow = 15, ncol = 10,
dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(10)))
)
),
colData = data.frame(
Cell_ID = paste0("RHC", seq(10)),
Cell_Type = sample(x = paste0("CellType", seq(2)), size = 10,
replace = TRUE)
),
rowData = data.frame(
Gene_ID = paste0("Gene", seq(15))
)
)
SDDLS <- createSpatialDDLSobject(
sc.data = sce,
sc.cell.ID.column = "Cell_ID",
sc.gene.ID.column = "Gene_ID",
sc.filt.genes.cluster = FALSE
)
#> === Spatial transcriptomics data not provided
#> === Processing single-cell data
#> - Filtering features:
#> - Selected features: 15
#> - Discarded features: 0
#>
#> === No mitochondrial genes were found by using ^mt- as regrex
#>
#> === Final number of dimensions for further analyses: 15
SDDLS <- genMixedCellProp(
SDDLS,
cell.ID.column = "Cell_ID",
cell.type.column = "Cell_Type",
num.sim.spots = 50,
train.freq.cells = 2/3,
train.freq.spots = 2/3,
verbose = TRUE
)
#>
#> === The number of mixed profiles that will be generated is equal to 50
#>
#> === Training set cells by type:
#> - CellType1: 4
#> - CellType2: 3
#> === Test set cells by type:
#> - CellType1: 2
#> - CellType2: 1
#> === Probability matrix for training data:
#> - Mixed spots: 34
#> - Cell types: 2
#> === Probability matrix for test data:
#> - Mixed spots: 16
#> - Cell types: 2
#> DONE
SDDLS <- simMixedProfiles(SDDLS)
#> === Setting parallel environment to 1 thread(s)
#>
#> === Generating train mixed profiles:
#>
#> === Generating test mixed profiles:
#>
#> DONE
SDDLS <- trainDeconvModel(
SDDLS,
batch.size = 12,
num.epochs = 5
)
#> === Training and test from stored data
#> Using only simulated mixed samples
#> Using only simulated mixed samples
#> Model: "SpatialDDLS"
#> _____________________________________________________________________
#> Layer (type) Output Shape Param #
#> =====================================================================
#> Dense1 (Dense) (None, 200) 3200
#> _____________________________________________________________________
#> BatchNormalization1 (BatchNorm (None, 200) 800
#> _____________________________________________________________________
#> Activation1 (Activation) (None, 200) 0
#> _____________________________________________________________________
#> Dropout1 (Dropout) (None, 200) 0
#> _____________________________________________________________________
#> Dense2 (Dense) (None, 200) 40200
#> _____________________________________________________________________
#> BatchNormalization2 (BatchNorm (None, 200) 800
#> _____________________________________________________________________
#> Activation2 (Activation) (None, 200) 0
#> _____________________________________________________________________
#> Dropout2 (Dropout) (None, 200) 0
#> _____________________________________________________________________
#> Dense3 (Dense) (None, 2) 402
#> _____________________________________________________________________
#> BatchNormalization3 (BatchNorm (None, 2) 8
#> _____________________________________________________________________
#> ActivationSoftmax (Activation) (None, 2) 0
#> =====================================================================
#> Total params: 45,410
#> Trainable params: 44,606
#> Non-trainable params: 804
#> _____________________________________________________________________
#>
#> === Training DNN with 34 samples:
#>
#> === Evaluating DNN in test data (16 samples)
#> - loss: NaN
#> - accuracy: 0.5
#> - mean_absolute_error: NaN
#> - categorical_accuracy: 0.5
#>
#> === Generating prediction results using test data
#> DONE
# simulating spatial data
ngenes <- sample(3:40, size = 1)
ncells <- sample(10:40, size = 1)
counts <- matrix(
rpois(ngenes * ncells, lambda = 5), ncol = ncells,
dimnames = list(paste0("Gene", seq(ngenes)), paste0("Spot", seq(ncells)))
)
coordinates <- matrix(
rep(c(1, 2), ncells), ncol = 2
)
st <- SpatialExperiment::SpatialExperiment(
assays = list(counts = as.matrix(counts)),
rowData = data.frame(Gene_ID = paste0("Gene", seq(ngenes))),
colData = data.frame(Cell_ID = paste0("Spot", seq(ncells))),
spatialCoords = coordinates
)
SDDLS <- loadSTProfiles(
object = SDDLS,
st.data = st,
st.spot.ID.column = "Cell_ID",
st.gene.ID.column = "Gene_ID"
)
#> === 1 SpatialExperiment objects provided
#> === Processing spatial transcriptomics data
#> - Filtering features:
#> - Selected features: 17
#> - Discarded features: 0
#>
SDDLS <- deconvSpatialDDLS(
SDDLS,
index.st = 1
)
#> === Filtering out 2 features in data that are not present in trained model
#> === Normalizing data (LogCPM)
#> === Predicting cell type proportions
#>
#> === Calculating distances in PCA space
#>
#> === Calculating 50 PCs
#> Warning: You're computing too large a percentage of total singular values, use a standard svd instead.
#> === Calculating alpha factors based on distances
#> DONE
SDDLS <- spatialPropClustering(SDDLS, index.st = 1, k.nn = 5)
#> === Selected graph-based clustering
#> === Running clustering for slide 1
# }