Codes in paper

Dot plot of the percentage of tumors with signature

Introduction

Through mouse models of carcinogenesis in different environments, it is found that tumors caused by different chemical exposures have different signatures

Code explanation

Data pre-processing, read in the ratio of signatures contained in tumors of different exposure categories, rate and the exposure value mecounts of that category of tumors

namem <- sort(unique(as.character(lapply(samples,function(x) str_trim(str_sub(x, start = 1L, end =str_locate(x,"\\d$")[1]-2 ))))))
namem <- c(namem[1:7],namem[9:17],namem[8],namem[18:33])
mexposuretotal <- mexposure*matrix(rep(nsnvs,11),nrow=181,ncol=11)
rate <- matrix(0,nrow=11,ncol=33)
mediana <- matrix(0,nrow=11,ncol=33)

for (i in seq(1,33)){
    indi <- str_which(samples,namem[i]) 
    for (j in seq(1,11)){
    rate[j,i] <- length(which(mexposure[indi,j]>=0.1))/length(indi) 
    if (length(which(mexposure[indi,j]>=0.1))>0){
    mediana[j,i] <- mean(mexposure[indi[which(mexposure[indi,j]>=0.1)],j])}
    }
 }

rownames(rate) <- namesig
colnames(rate) <- namem
rownames(mediana) <- namesig
colnames(mediana) <- namem

ratesi <- data.frame(t(rate),stringsAsFactors=F,check.names=F)
ratesi <- ratesi %>% mutate(tumour_type=rownames(ratesi))
ratesi.df <- gather(ratesi, namesig, key='signature',value='rate')

medianasi <- data.frame(t(mediana),stringsAsFactors=F,check.names=F)
medianasi <- medianasi %>% mutate(tumour_type=rownames(medianasi))
medianasi.df <- gather(medianasi, namesig, key='signature',value='median_counts')

ratesi.df <- ratesi.df %>% mutate(mecounts=medianasi.df$median_counts)

The main idea is to draw a point with geom_point first, and then add the shape with geom_rect


t1.rect1 <- data.frame (xmin=1.5, xmax=2.5, ymin=0.5, ymax=11.5)
t2.rect1 <- data.frame (xmin=3.5, xmax=4.5, ymin=0.5, ymax=11.5)
t3.rect1 <- data.frame (xmin=5.5, xmax=6.5, ymin=0.5, ymax=11.5)
t4.rect1 <- data.frame (xmin=7.5, xmax=8.5, ymin=0.5, ymax=11.5)
t5.rect1 <- data.frame (xmin=9.5, xmax=10.5, ymin=0.5, ymax=11.5)
t6.rect1 <- data.frame (xmin=11.5, xmax=12.5, ymin=0.5, ymax=11.5)
t7.rect1 <- data.frame (xmin=13.5, xmax=14.5, ymin=0.5, ymax=11.5)
t8.rect1 <- data.frame (xmin=15.5, xmax=16.5, ymin=0.5, ymax=11.5)
t9.rect1 <- data.frame (xmin=17.5, xmax=18.5, ymin=0.5, ymax=11.5)
t10.rect1 <- data.frame (xmin=19.5, xmax=20.5, ymin=0.5, ymax=11.5)
t11.rect1 <- data.frame (xmin=21.5, xmax=22.5, ymin=0.5, ymax=11.5)
t12.rect1 <- data.frame (xmin=23.5, xmax=24.5, ymin=0.5, ymax=11.5)
t13.rect1 <- data.frame (xmin=25.5, xmax=26.5, ymin=0.5, ymax=11.5)
t14.rect1 <- data.frame (xmin=27.5, xmax=28.5, ymin=0.5, ymax=11.5)
t15.rect1 <- data.frame (xmin=29.5, xmax=30.5, ymin=0.5, ymax=11.5)
t16.rect1 <- data.frame (xmin=31.5, xmax=32.5, ymin=0.5, ymax=11.5)

namesig1 <- c('mSBS_N3','mSBS_N2','mSBS_N1','mSBS42','mSBS40','mSBS19','mSBS18','mSBS17','mSBS12','mSBS5','mSBS1')

spot.theme <- list(
    theme_classic(),
    theme(axis.ticks.x=element_blank(), axis.text.x=element_text(size = 10, angle = 45, hjust = 0)),
    theme(axis.ticks.y=element_blank(), axis.text.y=element_text(size = 12)),
    theme(axis.line=element_blank()),
    theme(text = element_text(size = 22)),
    theme(panel.background = element_rect(fill = 'white')),
    theme(plot.margin = unit(c(10,10,10,10), "mm")),
    theme(legend.box.background = element_rect(color='white')),
    scale_size_continuous(range = c(-1, 8)),
    scale_colour_gradient2(low = "#F39B7FB2",high = "black",mid = "#3C5488B2",midpoint=0.4,guide = "colourbar",aesthetics = "colour",limits=c(0.08,0.8)),
    scale_x_discrete(position = "top"))

# First plot point
p <- ratesi.df %>% mutate(name = fct_relevel(tumour_type, namem_ord)) %>% mutate(mSBS = fct_relevel(signature, namesig1)) %>% ggplot( aes(x=name, y=mSBS)) + geom_point(aes(colour = mecounts, size = rate))+xlab("tumour type")+ spot.theme

# Add background grid
p <- p +
  geom_rect(data=t1.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
  geom_rect(data=t2.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
  geom_rect(data=t3.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
  geom_rect(data=t4.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
  geom_rect(data=t5.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
  geom_rect(data=t6.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
  geom_rect(data=t7.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
  geom_rect(data=t8.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
  geom_rect(data=t9.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
  geom_rect(data=t10.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
  geom_rect(data=t11.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
  geom_rect(data=t12.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
  geom_rect(data=t13.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
  geom_rect(data=t14.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
  geom_rect(data=t15.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
  geom_rect(data=t16.rect1, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax), fill='black', alpha=0.1, inherit.aes = FALSE) +
  geom_hline(yintercept=1.5,color = "white",size=1)+  geom_hline(yintercept=2.5,color = "white",size=1)+geom_hline(yintercept=3.5,color = "white",size=1)+
  geom_hline(yintercept=4.5,color = "white",size=1)+  geom_hline(yintercept=4.5,color = "white",size=1)+geom_hline(yintercept=5.5,color = "white",size=1)+
  geom_hline(yintercept=6.5,color = "white",size=1)+  geom_hline(yintercept=7.5,color = "white",size=1)+geom_hline(yintercept=8.5,color = "white",size=1)+
  geom_hline(yintercept=9.5,color = "white",size=1)+  geom_hline(yintercept=10.5,color = "white",size=1)+
  geom_vline(xintercept=10.5,color ="black",size=1)+geom_vline(xintercept=31.5,color = "black",size=1)+
  geom_vline(xintercept=0.5,color ="black",size=1)+geom_vline(xintercept=33.5,color = "black",size=1)+
geom_vline(xintercept=1.5,color ="grey",size=1)+geom_vline(xintercept=11.5,color = "grey",size=1)   

ggsave(plot=p, file='Fig-1c.pdf', device=cairo_pdf, width=16, height=7)


  Comparison of mouse substitution signatures with human signatures.
*SBS17 = SBS17a and SBS17b. c, Contribution of mSBS signatures across lung, liver, kidney and forestomach tumors, grouped by chemical exposure. The size
of the dots corresponds to the percentage of samples in each category having a minimal contribution level of 10% from the signature. The color represents
the mean relative contribution for the samples where the signature contribution is ≥10%. Of note, mSBS_N3 was detected in a spontaneous liver tumor
just below this threshold.

Interpretation of results

You can see that some signatures are mutually exclusive, and mSBS_N1 is related to VDC exposure

References

Riva, L., Pandiri, A. R., Li, Y. R., Droop, A., Hewinson, J., Quail, M. A., … Adams, D. J. (2020). The mutational signature profile of known and suspected human carcinogens in mice. Nature Genetics. doi:10.1038/s41588-020-0692-4

Original code

https://github.com/team113sanger/mouse-mutatation-signatures

Leave a Reply

Your email address will not be published. Required fields are marked *