Sequence logo diagram showing motif polymorphism
Introduction
View local motifs through sequence logo
Code explanation
Load the package, read in the data
library(MutationalPatterns)
library(BSgenome.Mmusculus.UCSC.mm10)
library(ggseqlogo)
library(gridExtra)
library(grid)
sbss <- read.delim('../starting_data/SigProfilerMatrixGenerator_matrices/SBS96.all',header=T,sep='\t',check.names=F)
samples1 <- sort(unique(colnames(sbss[,2:182])))
samples <- c(samples1[1:36],samples1[42:83],samples1[37:41],samples1[84:181])
uni <- replication_time_tsb %>% select(chrom,position,ref,alt,sample)
uni <- uni %>% mutate(chrom=paste('chr',as.character(chrom),sep=''))
signatures <-readRDS('../figure1/mSBSs.rds')
signatures <- as.matrix(signatures)
activitiessig=readRDS('../figure1/mexposuresig.rds')
activitiessig <- as.matrix(activitiessig)
rownames(activitiessig)=samples
Get the sequence of a specific sample and a specific position, use the getSeq function to obtain the sequence
consider <- rownames(activitiessig[which(activitiessig[,11]>0),])
ref_genome <- "BSgenome.Mmusculus.UCSC.mm10"
uni1 <- uni %>% filter(sample %in% consider)
uni1 <- uni1 %>% mutate(vcf_context = as.character(getSeq(get(ref_genome),chrom,position-2,position+ 2)))
uni1 <- uni1 %>% mutate(vcf_context_small = as.character(getSeq(get(ref_genome),chrom,position-1,position+ 1)))
uni2 <- uni1 %>% mutate(vcf_context_smallr=reverse(chartr('ATGC', 'TACG', vcf_context_small))) %>% mutate(vcf_contextr=reverse(chartr('ATGC','TACG',vcf_context)))
#I will filter in different parts already here
uni_GCC_CG <- uni1 %>% filter(ref=='C' & alt=='G' & vcf_context_small=='GCC')
uni_GCC_CGr <- uni2 %>% filter(ref=='G' & alt=='C' & vcf_context_smallr=='GCC')
context_GCC_CG <- c(uni_GCC_CG$vcf_context,uni_GCC_CGr$vcf_contextr)
uni_GCC_CT <- uni1 %>% filter(ref=='C' & alt=='T' & vcf_context_small=='GCC')
uni_GCC_CTr <- uni2 %>% filter(ref=='G' & alt=='A' & vcf_context_smallr=='GCC')
context_GCC_CT <- c(uni_GCC_CT$vcf_context,uni_GCC_CTr$vcf_contextr)
Use ggseqlogo to draw
sel3 <- as.data.frame(context_GCC_CG)%>%mutate(sml=substr(context_GCC_CG, 1, 4))
fig2e1 <- ggseqlogo(sel3$sml,method = 'prob')
sel3 <- as.data.frame(context_GCC_CT)%>%mutate(sml=substr(context_GCC_CT, 1, 4))
fig2e2<- ggseqlogo(sel3$sml,method = 'prob')
ggsave(plot=grid.arrange(fig2e1,fig2e2,nrow=1), file='Fig-2e.pdf', device=cairo_pdf, width=12, height=4)
Mutation of WGCC motifs in samples with mSBS_N3 altering the underlined nucleotide C>G
Interpretation of results
mSBS_N3 sample GCC upstream T, A base ratio is higher
References
Riva, L., Pandiri, A. R., Li, Y. R., Droop, A., Hewinson, J., Quail, M. A., … Adams, D. J. (2020). The mutational signature profile of known and suspected human carcinogens in mice. Nature Genetics. doi:10.1038/s41588-020-0692-4
Original code
https://github.com/team113sanger/mouse-mutatation-signatures