Codes in paper

A bar graph comparing the number of hypomethylated regions of the sample with the number of intersections of CpG islands, mutation load, and number of SVs

August 15, 2020February 21, 2021 by Yuwei Liao

Legend

A summary of the HMR (hypomethylation region) frequency of 100 independent mCRPC samples and the sample level of somatic mutations. The bar graph shows the HMR count within the genome feature, the HMR count that overlaps with CpG islands, shores and shelves (CpG overlap), the percentage of genomes with DNA copy number changes (CNA), somatic cell mutations per megabase (mutations per Mb) Number) and structural variance number (SV count). CMP samples are marked in blue. Samples with fewer HMR tumors on CpG islands, shores and shelves and CpG openseas (regions other than islands, shores and shelves) were designated as CpG methylator phenotype (CMP).

Code explanation

First get the required HMR count, CpG ovderlap, mutation load, SV count values

#source('~/mnt/data1/projects/WCDT_WGBS_2019/WCDT_WGBS/scripts/2019_05_15_prepare_environment.r')  #Load enviroment

# Plots Figure 1A

colstf <- c('CRPC_ARE','LOC_ARE','chip_FOXA1','chip_HOXB13','chip_ERG')
pcttotal=FALSE

output <- data.frame()
for( i in 1 : length(hmr) ) {
  print(i)
  sample <- toupper(names(hmr)[i])
  hmri <- hmr[[i]]

  totalhmrs <- length(hmri)
  output[sample,'Sample'] <- sample
  output[sample,'hmr'] <- totalhmrs

  divideby <- 1
  if(pcttotal) {
    divideby <- totalhmrs
  }

   #Calculate the TFBS, hmr count of each sample, including genebody, promoter, H3K27ac, H3K27me3, and CpG_island, CpG_shelf, CpG_shore
  in_tf <- rep(F,totalhmrs)
  for(j in 1:length(colstf)) {
    coltf <- colstf[j]
    in_tf <- in_tf | countOverlaps(hmri,tracks[[coltf]],ignore.strand=T)
  }

  in_genebody = countOverlaps(hmri,tracks[['genebody']],ignore.strand=T)>0
  in_promoter = countOverlaps(hmri,tracks[['promoter']],ignore.strand=T)>0
  in_h3k27ac = countOverlaps(hmri,tracks[['pca100_H3K27ac']],ignore.strand=T)>0
  in_h3k27me3 = countOverlaps(hmri,tracks[['pca100_H3K27me3']],ignore.strand=T)>0

  output[sample,'Promoter'] <- sum(in_promoter) / divideby
  output[sample,'Genebody'] <- sum(in_genebody & !in_promoter) / divideby
  output[sample,'TFBS_not_promoter'] <- sum(in_tf & !in_genebody & !in_promoter) / divideby
  output[sample,'H3K27ac_not_promoter_tfbs'] <- sum(in_h3k27ac & !in_tf & !in_genebody & !in_promoter) / divideby
  output[sample,'H3K27me3_not_others'] <- sum(in_h3k27me3 & !in_h3k27ac & !in_tf & !in_genebody & !in_promoter) / divideby

  promoter_oncogenes = countOverlaps(hmri,gr_oncogenes, ignore.strand=T)
  output[sample,'oncogene_promoter'] = sum(promoter_oncogenes>0)/divideby
  promoter_tsg = countOverlaps(hmri, gr_tsg, ignore.strand=T)
  output[sample,'TS_promoter'] = sum(promoter_tsg>0)/divideby

  in_cpg_island = countOverlaps(hmri,tracks[['cpg_island']],ignore.strand=T)>0
  in_cpg_shore = countOverlaps(hmri,tracks[['cpg_shore']],ignore.strand=T)>0
  in_cpg_shelf = countOverlaps(hmri,tracks[['cpg_shelf']],ignore.strand=T)>0

  output[sample,'CpG_island'] = sum(in_cpg_island)/divideby
  output[sample,'CpG_shore'] = sum(in_cpg_shore & !in_cpg_island)/divideby
  output[sample,'CpG_shelf'] = sum(in_cpg_shelf & !in_cpg_shore & !in_cpg_island)/divideby
}

# Screen samples based on tumor purity
#output <- output[rownames(tumorpurity)[tumorpurity[samples_wgbs,'Tumor.Purity.Comp']>80],]
output <- output[rownames(output) %in% sample_ids_wgbs,]

lvl <- output[order(output$hmr),'Sample']
output$Sample <- factor(output$Sample,levels=lvl)

if(pcttotal) {
  countsother <- 1
} else {
  countsother <- output$hmr
}
countsother <- output$hmr-(output$Promoter+output$Genebody+output$TFBS_not_promoter+
                             output$H3K27me3_not_others+output$H3K27ac_not_promoter_tfbs)

# Integrate the sub-categories into one category, Other, Promoter, Genebody, TFBS, H3K27ac, H3K27me3 belong to df1, CpG_island, CpG_shelf, CpG_shore belong to df3, df5 is cn and mutation load, df6 is sv
df1 <- rbind.data.frame(
  cbind.data.frame(sample=output$Sample,count=countsother,group='Other'),
  cbind.data.frame(sample=output$Sample,count=output$Promoter,group='Promoter'),
  cbind.data.frame(sample=output$Sample,count=output$Genebody,group='Genebody'),
  cbind.data.frame(sample=output$Sample,count=output$TFBS_not_promoter,group='TFBS'),
  cbind.data.frame(sample=output$Sample,count=output$H3K27ac_not_promoter_tfbs,group='H3K27ac'),
  cbind.data.frame(sample=output$Sample,count=output$H3K27me3_not_others,group='H3K27me3'))
df1$group <- factor(df1$group,levels=c('Other','H3K27me3','H3K27ac','TFBS','Genebody','Promoter'))

df3 <- rbind.data.frame(
  cbind.data.frame(sample=output$Sample,count=output$CpG_island,group='CpG_island'),
  cbind.data.frame(sample=output$Sample,count=output$CpG_shelf,group='CpG_shelf'),
  cbind.data.frame(sample=output$Sample,count=output$CpG_shore,group='CpG_shore'))
df3$group <- factor(df3$group,levels=c('CpG_shelf','CpG_shore','CpG_island'))

df5 <- cellsummary[lvl,]
df5$mutation_count <- df5$mutation_count / (HG38_SIZE / 1000000)
df5$sample <- factor(rownames(df5),levels=lvl)
df5$namecn <- '%Altered'
df5$namemut <- 'Mut/Mb'

df6slice <- manta_counts[lvl,]
df6slice$sample <- factor(rownames(df6slice),levels=lvl)
df6 <- rbind.data.frame(
  cbind.data.frame(sample=df6slice$sample,count=df6slice$BND,group='BND'),
  cbind.data.frame(sample=df6slice$sample,count=df6slice$DEL,group='DEL'),
  cbind.data.frame(sample=df6slice$sample,count=df6slice$DUP,group='DUP'),
  cbind.data.frame(sample=df6slice$sample,count=df6slice$INS,group='INS'),
  cbind.data.frame(sample=df6slice$sample,count=df6slice$INV,group='INV'))

df6$color = rep("black", 100)
df6$color[ match.idx( df6$sample, samples_cmp )$idx.A ] = "cornflowerblue"

Then we use the obtained data to draw

cols <- brewer.pal(n=5, name='Set1')  #配置色板
# 用plist存放ggplot对象，再用ggarrange进行排版并画图
plist <- list()
plist[[1]] <- ggplot(df1,aes(x=sample,y=count,fill=group))+theme_classic()+geom_bar(stat='identity')+
  theme(legend.position = "none", 
        axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank()) + 
  ylab('HMR#')+
  scale_fill_manual(values=c('darkgray',cols))
plist[[2]] <- ggplot(df3,aes(x=sample,y=count,fill=group))+theme_classic()+geom_bar(stat='identity')+
  theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(),legend.position = "none")+
  scale_fill_brewer(palette='Dark2')+ylab('CpG#')
plist[[3]] <- ggplot(df5,aes(x=sample,y=percent.copy.altered,fill=namecn))+theme_classic()+geom_bar(stat='identity')+
  theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(),legend.position = "none")+
  scale_fill_manual(values='darkgrey')+ylab('% CN Altered')
plist[[4]] <- ggplot(df5,aes(x=sample,y=mutation_count,fill=namemut))+theme_classic()+geom_bar(stat='identity')+
  theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank(),legend.position = "none")+
  scale_fill_manual(values='darkgrey')+ylab('Mut/Mb')+scale_y_log10() 

plist[[5]] <- ggplot(df6,aes(x=sample,y=count,fill=group))+theme_classic()+geom_bar(stat='identity')+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_text(angle=90,hjust=1,vjust=0.5,colour=df6$color),
        axis.ticks.x=element_blank(),
        legend.position = "none")+ 
  scale_fill_brewer(palette='Set2')+ylab('# SV')

ggarrange(plotlist=plist,
          ncol=1,
          nrow=length(plist),
          heights=c(3,1,1,1,2) ,
          align="v")
ggsave(fn_fig1a, height=8, width=14)

Reference

Zhao S G , Chen W S , Li H , et al. The DNA methylation landscape of advanced prostate cancer[J]. Nature Genetics, 2020.

Original code

https://github.com/DavidQuigley/WCDT_WGBS/blob/master/scripts/2019_05_15_WGBS_figure_1A.R

Recent Posts

Most Used Categories

The Code Search

A bar graph comparing the number of hypomethylated regions of the sample with the number of intersections of CpG islands, mutation load, and number of SVs

Legend

Code explanation

First get the required HMR count, CpG ovderlap, mutation load, SV count values

Then we use the obtained data to draw

Reference

Original code

Leave a Reply Cancel reply

Recent Posts

Most Used Categories

A bar graph comparing the number of hypomethylated regions of the sample with the number of intersections of CpG islands, mutation load, and number of SVs

Legend

Code explanation

First get the required HMR count, CpG ovderlap, mutation load, SV count values

Then we use the obtained data to draw

Reference

Original code

Leave a Reply Cancel reply

You may like

All codes for scRNA analysis in R

Significance test of PPI(protein–protein interaction) interface mutations.