Knowledge base

Solve the problem of excessive memory consumption in seurat merge

Introduction

I found process was shut down due to low memory when I merged multiple seurat objects from different patients. Here is my solution. Firstly, I saved all expression matrix (in dgCMatrix format) from 10X directory into one list file. You can filter cells this time if you had correspondingly metadata. Secondly we got union genes from all matrix, then we added 0 to each matrix so that each matrix with expression of union genes. Thirly, we merge expression of all samples by using cbind instead of merge function. Lastly, we used this matrix to create seurat object.

Code exmaple

library(Seurat)
library(data.table)
library(ggplot2)
library(tidyverse)
library(magrittr)
library(Matrix)

setwd("/home/hxzk/project/sc_gbm/")

obj.ls <- list()

metadata <- fread("data/Metadata/Meta_Data_GBMatlas.txt")
setnames(metadata, "V1", "NAME")

# Read 10X matrix of all samples
for(f in dir("data/Matrix/", pattern = "^GSM")){
   Sample <- f
   P <- strsplit(Sample, split = "_")[[1]][1]
   tmp_mt <- Read10X(paste0("data/Matrix/", f))
   colnames(tmp_mt) <- paste0(P, "_", colnames(tmp_mt))
   tmp_mt <- tmp_mt[,colnames(tmp_mt) %in% metadata$NAME]
   obj.ls[[f]] <- tmp_mt
}

# Get union genes
get_all_genes <- function(obj.ls){
   all_genes <- rownames(obj.ls[[names(obj.ls)[1]]])
   for(f in names(obj.ls)[2:length(names(obj.ls))]){
      all_genes <- union(all_genes, rownames(obj.ls[[f]]))
   }
   return(all_genes)
}

# add 0
add_zero <- function(mt, all_genes){
   gene_left <- setdiff(all_genes, rownames(mt))
   left_mt <- as(matrix(0, ncol = ncol(mt), nrow = length(gene_left)), "dgCMatrix")
   colnames(left_mt)  <- colnames(mt)
   rownames(left_mt) <- gene_left
   mt <- rbind(mt, left_mt)
   return(mt[all_genes,,drop = F])
}

# Cbind matrix 
cbind_dgC_lst <- function(dgc_lst){
    merge_mt <- dgc_lst[[names(dgc_lst)[1]]]
    for(i in names(dgc_lst)[2:length(names(dgc_lst))]){
       merge_mt <- cbind(merge_mt, dgc_lst[[i]])
   }
   return(merge_mt)
}

all_genes <- get_all_genes(obj.ls)
new_obj.ls <- lapply(obj.ls, add_zero, all_genes)
rm(obj.ls)
gc()
all_mt <- cbind_dgC_lst(new_obj.ls)
rm(new_obj.ls)
gc()

# Create seurat object
combined <- CreateSeuratObject(counts = all_mt, min.cells = 3)
# You can also use metadata you had

metadata <- metadata[rownames(combined@meta.data),, on = "NAME"]
combined@meta.data <- cbind(combined@meta.data, data.frame(metadata)[,!colnames(metadata) %in% colnames(combined@meta.data)])

References

None

Original

None

Leave a Reply

Your email address will not be published. Required fields are marked *