R/interGradientsDL.R
topGradientsCellType.Rd
Retrieve feature names with the largest/smallest gradients per cell
type. These genes can be used to plot the calculated
gradients as a heatmap (plotGradHeatmap
function).
topGradientsCellType(object, method = "class", top.n.genes = 15)
DigitalDLSorter
object with a
DigitalDLSorterDNN
object containing gradients in the
interpret.gradients
slot.
Method gradients were calculated by. It can be either
'class'
(gradients of predicted classes w.r.t. inputs) or
'loss'
(gradients of loss w.r.t. input features).
Top n genes (positive and negative) taken per cell type.
List of gene names with the top positive and negative gradients per cell type.
# \donttest{
set.seed(123)
sce <- SingleCellExperiment::SingleCellExperiment(
assays = list(
counts = matrix(
rpois(30, lambda = 5), nrow = 15, ncol = 10,
dimnames = list(paste0("Gene", seq(15)), paste0("RHC", seq(10)))
)
),
colData = data.frame(
Cell_ID = paste0("RHC", seq(10)),
Cell_Type = sample(x = paste0("CellType", seq(2)), size = 10,
replace = TRUE)
),
rowData = data.frame(
Gene_ID = paste0("Gene", seq(15))
)
)
DDLS <- createDDLSobject(
sc.data = sce,
sc.cell.ID.column = "Cell_ID",
sc.gene.ID.column = "Gene_ID",
sc.filt.genes.cluster = FALSE
)
#> === Bulk RNA-seq data not provided
#> === Processing single-cell data
#> - Filtering features:
#> - Selected features: 15
#> - Discarded features: 0
#>
#> === No mitochondrial genes were found by using ^mt- as regrex
#>
#> === Final number of dimensions for further analyses: 15
prop.design <- data.frame(
Cell_Type = paste0("CellType", seq(2)),
from = c(1, 30),
to = c(15, 70)
)
DDLS <- generateBulkCellMatrix(
object = DDLS,
cell.ID.column = "Cell_ID",
cell.type.column = "Cell_Type",
prob.design = prop.design,
num.bulk.samples = 50,
verbose = TRUE
)
#>
#> === The number of bulk RNA-Seq samples that will be generated is equal to 50
#>
#> === Training set cells by type:
#> - CellType1: 4
#> - CellType2: 3
#> === Test set cells by type:
#> - CellType1: 2
#> - CellType2: 1
#> === Probability matrix for training data:
#> - Bulk RNA-Seq samples: 38
#> - Cell types: 2
#> === Probability matrix for test data:
#> - Bulk RNA-Seq samples: 12
#> - Cell types: 2
#> DONE
DDLS <- simBulkProfiles(DDLS)
#> === Setting parallel environment to 1 thread(s)
#>
#> === Generating train bulk samples:
#>
#> === Generating test bulk samples:
#>
#> DONE
DDLS <- trainDDLSModel(
object = DDLS,
batch.size = 12,
num.epochs = 5
)
#> === Training and test from stored data
#> Using only simulated bulk samples
#> Using only simulated bulk samples
#> Model: "DigitalDLSorter"
#> _____________________________________________________________________
#> Layer (type) Output Shape Param #
#> =====================================================================
#> Dense1 (Dense) (None, 200) 3200
#> _____________________________________________________________________
#> BatchNormalization1 (BatchNorm (None, 200) 800
#> _____________________________________________________________________
#> Activation1 (Activation) (None, 200) 0
#> _____________________________________________________________________
#> Dropout1 (Dropout) (None, 200) 0
#> _____________________________________________________________________
#> Dense2 (Dense) (None, 200) 40200
#> _____________________________________________________________________
#> BatchNormalization2 (BatchNorm (None, 200) 800
#> _____________________________________________________________________
#> Activation2 (Activation) (None, 200) 0
#> _____________________________________________________________________
#> Dropout2 (Dropout) (None, 200) 0
#> _____________________________________________________________________
#> Dense3 (Dense) (None, 2) 402
#> _____________________________________________________________________
#> BatchNormalization3 (BatchNorm (None, 2) 8
#> _____________________________________________________________________
#> ActivationSoftmax (Activation) (None, 2) 0
#> =====================================================================
#> Total params: 45,410
#> Trainable params: 44,606
#> Non-trainable params: 804
#> _____________________________________________________________________
#>
#> === Training DNN with 38 samples:
#>
#> === Evaluating DNN in test data (12 samples)
#> - loss: NaN
#> - accuracy: 0.1667
#> - mean_absolute_error: NaN
#> - categorical_accuracy: 0.1667
#>
#> === Generating prediction results using test data
#> DONE
## calculating gradients
DDLS <- interGradientsDL(DDLS)
listGradients <- topGradientsCellType(DDLS)
lapply(listGradients, head, n = 5)
#> $CellType1
#> $CellType1$Absolute
#> [1] "Gene12" "Gene8" "Gene4" "Gene11" "Gene14" "Gene2" "Gene3" "Gene13"
#> [9] "Gene7" "Gene5" "Gene10" "Gene9" "Gene1" "Gene15" "Gene6"
#>
#> $CellType1$Positive
#> [1] "Gene12" "Gene8" "Gene4" "Gene11" "Gene14" "Gene2" "Gene3" "Gene13"
#> [9] "Gene7" "Gene5" "Gene10" "Gene9" "Gene1" "Gene15" "Gene6"
#>
#> $CellType1$Negative
#> [1] "Gene12" "Gene8" "Gene4" "Gene11" "Gene14" "Gene2" "Gene3" "Gene13"
#> [9] "Gene7" "Gene5" "Gene10" "Gene9" "Gene1" "Gene15" "Gene6"
#>
#>
#> $CellType2
#> $CellType2$Absolute
#> [1] "Gene4" "Gene7" "Gene9" "Gene1" "Gene12" "Gene6" "Gene2" "Gene15"
#> [9] "Gene10" "Gene14" "Gene8" "Gene5" "Gene13" "Gene11" "Gene3"
#>
#> $CellType2$Positive
#> [1] "Gene4" "Gene7" "Gene9" "Gene1" "Gene12" "Gene6" "Gene2" "Gene15"
#> [9] "Gene10" "Gene14" "Gene8" "Gene5" "Gene13" "Gene11" "Gene3"
#>
#> $CellType2$Negative
#> [1] "Gene4" "Gene7" "Gene9" "Gene1" "Gene12" "Gene6" "Gene2" "Gene15"
#> [9] "Gene10" "Gene14" "Gene8" "Gene5" "Gene13" "Gene11" "Gene3"
#>
#>
# }