Dot plot of the percentage of tumors with signature
Introduction
Through mouse models of carcinogenesis in different environments, it is found that tumors caused by different chemical exposures have different signatures
Code explanation
Data pre-processing, read in the ratio of signatures contained in tumors of different exposure categories, rate and the exposure value mecounts of that category of tumors
namem <- sort(unique(as.character(lapply(samples,function(x) str_trim(str_sub(x, start = 1L, end =str_locate(x,"\\d$")[1]-2 ))))))
namem <- c(namem[1:7],namem[9:17],namem[8],namem[18:33])
mexposuretotal <- mexposure*matrix(rep(nsnvs,11),nrow=181,ncol=11)
rate <- matrix(0,nrow=11,ncol=33)
mediana <- matrix(0,nrow=11,ncol=33)
for (i in seq(1,33)){
indi <- str_which(samples,namem[i])
for (j in seq(1,11)){
rate[j,i] <- length(which(mexposure[indi,j]>=0.1))/length(indi)
if (length(which(mexposure[indi,j]>=0.1))>0){
mediana[j,i] <- mean(mexposure[indi[which(mexposure[indi,j]>=0.1)],j])}
}
}
rownames(rate) <- namesig
colnames(rate) <- namem
rownames(mediana) <- namesig
colnames(mediana) <- namem
ratesi <- data.frame(t(rate),stringsAsFactors=F,check.names=F)
ratesi <- ratesi %>% mutate(tumour_type=rownames(ratesi))
ratesi.df <- gather(ratesi, namesig, key='signature',value='rate')
medianasi <- data.frame(t(mediana),stringsAsFactors=F,check.names=F)
medianasi <- medianasi %>% mutate(tumour_type=rownames(medianasi))
medianasi.df <- gather(medianasi, namesig, key='signature',value='median_counts')
ratesi.df <- ratesi.df %>% mutate(mecounts=medianasi.df$median_counts)
The main idea is to draw a point with geom_point first, and then add the shape with geom_rect
t1.rect1 <- data.frame (xmin=1.5, xmax=2.5, ymin=0.5, ymax=11.5)
t2.rect1 <- data.frame (xmin=3.5, xmax=4.5, ymin=0.5, ymax=11.5)
t3.rect1 <- data.frame (xmin=5.5, xmax=6.5, ymin=0.5, ymax=11.5)
t4.rect1 <- data.frame (xmin=7.5, xmax=8.5, ymin=0.5, ymax=11.5)
t5.rect1 <- data.frame (xmin=9.5, xmax=10.5, ymin=0.5, ymax=11.5)
t6.rect1 <- data.frame (xmin=11.5, xmax=12.5, ymin=0.5, ymax=11.5)
t7.rect1 <- data.frame (xmin=13.5, xmax=14.5, ymin=0.5, ymax=11.5)
t8.rect1 <- data.frame (xmin=15.5, xmax=16.5, ymin=0.5, ymax=11.5)
t9.rect1 <- data.frame (xmin=17.5, xmax=18.5, ymin=0.5, ymax=11.5)
t10.rect1 <- data.frame (xmin=19.5, xmax=20.5, ymin=0.5, ymax=11.5)
t11.rect1 <- data.frame (xmin=21.5, xmax=22.5, ymin=0.5, ymax=11.5)
t12.rect1 <- data.frame (xmin=23.5, xmax=24.5, ymin=0.5, ymax=11.5)
t13.rect1 <- data.frame (xmin=25.5, xmax=26.5, ymin=0.5, ymax=11.5)
t14.rect1 <- data.frame (xmin=27.5, xmax=28.5, ymin=0.5, ymax=11.5)
t15.rect1 <- data.frame (xmin=29.5, xmax=30.5, ymin=0.5, ymax=11.5)
t16.rect1 <- data.frame (xmin=31.5, xmax=32.5, ymin=0.5, ymax=11.5)
namesig1 <- c('mSBS_N3','mSBS_N2','mSBS_N1','mSBS42','mSBS40','mSBS19','mSBS18','mSBS17','mSBS12','mSBS5','mSBS1')
spot.theme <- list(
theme_classic(),
theme(axis.ticks.x=element_blank(), axis.text.x=element_text(size = 10, angle = 45, hjust = 0)),
theme(axis.ticks.y=element_blank(), axis.text.y=element_text(size = 12)),
theme(axis.line=element_blank()),
theme(text = element_text(size = 22)),
theme(panel.background = element_rect(fill = 'white')),
theme(plot.margin = unit(c(10,10,10,10), "mm")),
theme(legend.box.background = element_rect(color='white')),
scale_size_continuous(range = c(-1, 8)),
scale_colour_gradient2(low = "#F39B7FB2",high = "black",mid = "#3C5488B2",midpoint=0.4,guide = "colourbar",aesthetics = "colour",limits=c(0.08,0.8)),
scale_x_discrete(position = "top"))
# First plot point
p <- ratesi.df %>% mutate(name = fct_relevel(tumour_type, namem_ord)) %>% mutate(mSBS = fct_relevel(signature, namesig1)) %>% ggplot( aes(x=name, y=mSBS)) + geom_point(aes(colour = mecounts, size = rate))+xlab("tumour type")+ spot.theme
# Add background grid
p <- p +
geom_rect(data=t1.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
geom_rect(data=t2.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
geom_rect(data=t3.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
geom_rect(data=t4.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
geom_rect(data=t5.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
geom_rect(data=t6.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
geom_rect(data=t7.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
geom_rect(data=t8.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
geom_rect(data=t9.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
geom_rect(data=t10.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
geom_rect(data=t11.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
geom_rect(data=t12.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
geom_rect(data=t13.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
geom_rect(data=t14.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
geom_rect(data=t15.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
geom_rect(data=t16.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
geom_hline(yintercept=1.5,color = "white",size=1)+ geom_hline(yintercept=2.5,color = "white",size=1)+geom_hline(yintercept=3.5,color = "white",size=1)+
geom_hline(yintercept=4.5,color = "white",size=1)+ geom_hline(yintercept=4.5,color = "white",size=1)+geom_hline(yintercept=5.5,color = "white",size=1)+
geom_hline(yintercept=6.5,color = "white",size=1)+ geom_hline(yintercept=7.5,color = "white",size=1)+geom_hline(yintercept=8.5,color = "white",size=1)+
geom_hline(yintercept=9.5,color = "white",size=1)+ geom_hline(yintercept=10.5,color = "white",size=1)+
geom_vline(xintercept=10.5,color ="black",size=1)+geom_vline(xintercept=31.5,color = "black",size=1)+
geom_vline(xintercept=0.5,color ="black",size=1)+geom_vline(xintercept=33.5,color = "black",size=1)+
geom_vline(xintercept=1.5,color ="grey",size=1)+geom_vline(xintercept=11.5,color = "grey",size=1)
ggsave(plot=p, file='Fig-1c.pdf', device=cairo_pdf, width=16, height=7)
Comparison of mouse substitution signatures with human signatures.
*SBS17 = SBS17a and SBS17b. c, Contribution of mSBS signatures across lung, liver, kidney and forestomach tumors, grouped by chemical exposure. The size
of the dots corresponds to the percentage of samples in each category having a minimal contribution level of 10% from the signature. The color represents
the mean relative contribution for the samples where the signature contribution is ≥10%. Of note, mSBS_N3 was detected in a spontaneous liver tumor
just below this threshold.
Interpretation of results
You can see that some signatures are mutually exclusive, and mSBS_N1 is related to VDC exposure
References
Riva, L., Pandiri, A. R., Li, Y. R., Droop, A., Hewinson, J., Quail, M. A., … Adams, D. J. (2020). The mutational signature profile of known and suspected human carcinogens in mice. Nature Genetics. doi:10.1038/s41588-020-0692-4
Original code
https://github.com/team113sanger/mouse-mutatation-signatures