devtools::load_all(".") # From root of your SimOmics folder
## ℹ Loading SimOmics
library(SimOmics)
library(mixOmics)
This vignette demonstrates how to use the SimOmics
package to simulate multi-omics data with latent structures and
block-wise covariance. The goal is to provide synthetic,
biologically-plausible data for benchmarking integration methods like
mixOmics
.
sim_data <- simulate_multiomics(
n = 200,
block_dims = list(transcriptome = 1000, proteome = 200),
n_factors = 3,
block_corr = 0.4,
noise_sd = 0.5,
seed = 123
)
str(sim_data$X_blocks)
## List of 2
## $ transcriptome: num [1:200, 1:1000] 13.566 6.203 -0.136 5.527 -9.481 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:1000] "transcriptome_1" "transcriptome_2" "transcriptome_3" "transcriptome_4" ...
## $ proteome : num [1:200, 1:200] 6.7574 -1.4933 -0.0737 8.7048 2.3373 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:200] "proteome_1" "proteome_2" "proteome_3" "proteome_4" ...
# Example: Save PCA plot
png("pca_plot.png", width = 1200, height = 1000, res = 200)
plot_simulated_data(sim_data, type = "pca", block = "transcriptome")
dev.off()
## quartz_off_screen
## 2
plot_simulated_data(sim_data, type = "pca", block = "transcriptome")
png("correlation_plot.png", width = 3000, height = 3000, res = 300)
plot_simulated_data(sim_data, type = "correlation")
dev.off()
## quartz_off_screen
## 2
plot_simulated_data(sim_data, type = "correlation")
We now apply block.pls
from the mixOmics
package to integrate the transcriptome and proteome blocks.
Y <- factor(rep(c("A", "B"), each = 100))
res <- block.plsda(X = sim_data$X_blocks, Y = Y, ncomp = 2)
png("plsda_simulated.png", width = 1600, height = 1400, res = 200)
plotIndiv(res, legend = TRUE, title = "PLS-DA of Simulated Multi-Omics Data")
dev.off()
## quartz_off_screen
## 2
plotIndiv(res, legend = TRUE)
saveRDS(sim_data, file = "sim_data.rds")
write.csv(sim_data$X_blocks$transcriptome, file = "transcriptome.csv", row.names = FALSE)
write.csv(sim_data$X_blocks$proteome, file = "proteome.csv", row.names = FALSE)
This example shows how SimOmics can generate reproducible, structured multi-omics datasets for integration and benchmarking. These synthetic data allow control over latent factors, noise, and covariance structure to stress-test bioinformatics methods.