Cluster spatial data based on predicted cell proportions

Cluster spatial transcriptomics data according to the cell proportions predicted in each spot. It allows to segregate ST data into niches with similar cell composition.

Usage

spatialPropClustering(
  object,
  index.st,
  method = "graph",
  k.nn = 10,
  k.centers = 5,
  verbose = TRUE
)

Arguments

object: SpatialDDLS object with deconvoluted ST datasets.
index.st: Name or index of the dataset/slide already deconvoluted to be clustered. If missing, all datasets already deconvoluted will be clustered.
method: Clustering method. It can be graph (a nearest neighbor graph is created and Louvain algorithm is used to detect communities) or k.means (k-means algorithm is run with the specified number of centers (k.centers parameter)).
k.nn: An integer specifying the number of nearest neighbors to be used during graph construction (10 by default). Only if method == "graph".
k.centers: An integer specifying the number of centers for k-means algorithm (5 by default). Only if method == "k.means".
verbose: Show informative messages during the execution (TRUE by default).

Value

A SpatialDDLS object containing computed clusters as a column in the slot colData of the SpatialExperiment objects.

Examples

# \donttest{
set.seed(123)
sce <- SingleCellExperiment::SingleCellExperiment(
  assays = list(
    counts = matrix(
      rpois(30, lambda = 5), nrow = 15, ncol = 10,
      dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(10)))
    )
  ),
  colData = data.frame(
    Cell_ID = paste0("RHC", seq(10)),
    Cell_Type = sample(x = paste0("CellType", seq(2)), size = 10,
                       replace = TRUE)
  ),
  rowData = data.frame(
    Gene_ID = paste0("Gene", seq(15))
  )
)
SDDLS <- createSpatialDDLSobject(
  sc.data = sce,
  sc.cell.ID.column = "Cell_ID",
  sc.gene.ID.column = "Gene_ID",
  sc.filt.genes.cluster = FALSE
) 
#> === Spatial transcriptomics data not provided
#> === Processing single-cell data
#>       - Filtering features:
#>          - Selected features: 15
#>          - Discarded features: 0
#> 
#> === No mitochondrial genes were found by using ^mt- as regrex
#> 
#> === Final number of dimensions for further analyses: 15
SDDLS <- genMixedCellProp(
  SDDLS,
  cell.ID.column = "Cell_ID",
  cell.type.column = "Cell_Type",
  num.sim.spots = 50,
  train.freq.cells = 2/3,
  train.freq.spots = 2/3,
  verbose = TRUE
) 
#> 
#> === The number of mixed profiles that will be generated is equal to 50
#> 
#> === Training set cells by type:
#>     - CellType1: 4
#>     - CellType2: 3
#> === Test set cells by type:
#>     - CellType1: 2
#>     - CellType2: 1
#> === Probability matrix for training data:
#>     - Mixed spots: 34
#>     - Cell types: 2
#> === Probability matrix for test data:
#>     - Mixed spots: 16
#>     - Cell types: 2
#> DONE
SDDLS <- simMixedProfiles(SDDLS) 
#> === Setting parallel environment to 1 thread(s)
#> 
#> === Generating train mixed profiles:
#> 
#> === Generating test mixed profiles:
#> 
#> DONE
SDDLS <- trainDeconvModel(
  SDDLS,
  batch.size = 12,
  num.epochs = 5
) 
#> === Training and test from stored data
#>     Using only simulated mixed samples
#>     Using only simulated mixed samples
#> Model: "SpatialDDLS"
#> _____________________________________________________________________
#> Layer (type)                   Output Shape               Param #    
#> =====================================================================
#> Dense1 (Dense)                 (None, 200)                3200       
#> _____________________________________________________________________
#> BatchNormalization1 (BatchNorm (None, 200)                800        
#> _____________________________________________________________________
#> Activation1 (Activation)       (None, 200)                0          
#> _____________________________________________________________________
#> Dropout1 (Dropout)             (None, 200)                0          
#> _____________________________________________________________________
#> Dense2 (Dense)                 (None, 200)                40200      
#> _____________________________________________________________________
#> BatchNormalization2 (BatchNorm (None, 200)                800        
#> _____________________________________________________________________
#> Activation2 (Activation)       (None, 200)                0          
#> _____________________________________________________________________
#> Dropout2 (Dropout)             (None, 200)                0          
#> _____________________________________________________________________
#> Dense3 (Dense)                 (None, 2)                  402        
#> _____________________________________________________________________
#> BatchNormalization3 (BatchNorm (None, 2)                  8          
#> _____________________________________________________________________
#> ActivationSoftmax (Activation) (None, 2)                  0          
#> =====================================================================
#> Total params: 45,410
#> Trainable params: 44,606
#> Non-trainable params: 804
#> _____________________________________________________________________
#> 
#> === Training DNN with 34 samples:
#> 
#> === Evaluating DNN in test data (16 samples)
#>    - loss: NaN
#>    - accuracy: 0.5
#>    - mean_absolute_error: NaN
#>    - categorical_accuracy: 0.5
#> 
#> === Generating prediction results using test data
#> DONE
# simulating spatial data
ngenes <- sample(3:40, size = 1)
ncells <- sample(10:40, size = 1)
counts <- matrix(
  rpois(ngenes * ncells, lambda = 5), ncol = ncells,
  dimnames = list(paste0("Gene", seq(ngenes)), paste0("Spot", seq(ncells)))
)
coordinates <- matrix(
  rep(c(1, 2), ncells), ncol = 2
)
st <- SpatialExperiment::SpatialExperiment(
  assays = list(counts = as.matrix(counts)),
  rowData = data.frame(Gene_ID = paste0("Gene", seq(ngenes))),
  colData = data.frame(Cell_ID = paste0("Spot", seq(ncells))),
  spatialCoords = coordinates
)
SDDLS <- loadSTProfiles(
  object = SDDLS,
  st.data = st,
  st.spot.ID.column = "Cell_ID",
  st.gene.ID.column = "Gene_ID"
)
#> === 1 SpatialExperiment objects provided
#>    === Processing spatial transcriptomics data
#>       - Filtering features:
#>          - Selected features: 17
#>          - Discarded features: 0
#> 
SDDLS <- deconvSpatialDDLS(
  SDDLS,
  index.st = 1
) 
#> === Filtering out 2 features in data that are not present in trained model
#> === Normalizing data (LogCPM)
#> === Predicting cell type proportions
#> 
#> === Calculating distances in PCA space
#> 
#> === Calculating 50 PCs
#> Warning: You're computing too large a percentage of total singular values, use a standard svd instead.
#> === Calculating alpha factors based on distances
#> DONE
SDDLS <- spatialPropClustering(SDDLS, index.st = 1, k.nn = 5)
#> === Selected graph-based clustering
#> === Running clustering for slide 1
# }

Usage

Arguments

Value

See also

Examples