Codes in paper

The forest plot of hazard ratio shows that clonal hematopoietic mutations of some genes are related to treatment

Introduction

Show that different factors (age, smoking, treatment) have an impact on clonal hematopoiesis

Code explanation

Load the required tool function

library(tidyverse)
library(ggplot2)

signif.num <- function(x, ns = FALSE) {
    if (ns) {
        symbols = c("***", "**", "*", "ns")
    } else {
        symbols = c("***", "**", "*", "")
    }

    symnum(unlist(x), corr = FALSE, na = FALSE, legend = FALSE,
           cutpoints = c(0, 0.001, 0.01, 0.05, 1), 
           symbols = symbols)
}

plot_forest <- function(class_results, x = "class", y = "estimate", ymin = "conf.low", ymax = "conf.high",
                       label = "p.label", limits = NULL, breaks = waiver(), title = "", col = NULL, fill = NULL,
                        dodge_width = 0.8, outer_limit_arrows = FALSE, ps=3, eb_w=0.4, eb_s=0.4, or_s=4, OR=T, yinter = 1,
                        nudge = 0, base_plot = geom_blank(), bar_col = 'black') { 
    # suppressWarnings(ggthemr::ggthemr("fresh"))
  #' Forest plot of the coefficients in 'class_results'
  output_plot <- ggplot(class_results, aes_string(x = x, y = y, ymin = ymin, ymax = ymax, col = col, fill = fill)) + 
    base_plot +
    geom_hline(yintercept = yinter, color = "gray", linetype = "solid") +
    geom_errorbar(position = position_dodge(width = dodge_width), width = eb_w, size=eb_s) + 
    geom_point(position = position_dodge(width = dodge_width), size=ps) +
    geom_text(aes_string(label = label, vjust = nudge), position = position_dodge(width = dodge_width), color = 'black', size = or_s, alpha = .9) +
    coord_flip() +
    ggtitle(title)

  if (OR==T) {
  output_plot <-  output_plot +   
    scale_y_log10(limits = limits, breaks = breaks)
  }

  if (outer_limit_arrows) {
    stopifnot(length(limits) > 0)
    # Check for errorbar values outside 
    class_results[, "ymin_out"] <- ifelse(class_results[, ymin] < limits[1], limits[1], NA)
    class_results[, "linestyle_min"] <- ifelse(class_results[, ymin] < limits[1], "b", NA)
    class_results[, "ymax_out"] <- ifelse(class_results[, ymax] > limits[2], limits[2], NA)
    class_results[, "linestyle_max"] <- ifelse(class_results[, ymax] > limits[2], "b", NA)

    output_plot <- output_plot + geom_linerange(data = class_results, aes_string(x = x, ymin = "ymin_out", ymax = ymax), linetype = 3, position = position_dodge(width = dodge_width))
    output_plot <- output_plot + geom_linerange(data = class_results, aes_string(x = x, ymin = ymin, ymax = "ymax_out"), linetype = 3, position = position_dodge(width = dodge_width))
    output_plot <- output_plot + geom_linerange(data = class_results, aes_string(x = x, ymin = "ymin_out", ymax = "ymax_out"), linetype = 3, position = position_dodge(width = dodge_width))

    #output_plot <- output_plot + geom_segment(data = class_results, aes_string(x = x, xend = x, y = ymax, yend = paste0("ymin_out")), 
    #                                          arrow = arrow(length = unit(.2, "cm")), position = position_dodge(width = dodge_width), lineend = "round")
    #output_plot <- output_plot + geom_segment(data = class_results, aes_string(x = x, xend = x, y = ymin, yend = paste0("ymax_out")), 
    #                                          arrow = arrow(length = unit(.2, "cm")), position = position_dodge(width = dodge_width), lineend = "round")
  }
  return(output_plot)
}

Data reading

M_wide_all = suppressWarnings(data.table::fread('M_wide_all.txt', sep = '\t', header = T))  %>% as.data.frame()

M_wide_all = M_wide_all %>%
    mutate(race_b = as.integer(race == "White")
    ) %>%
    mutate(age_scaled = as.vector(scale(age)),
    age_d=age/10,
    ) %>%
    mutate(mutnum_all_r = case_when(
        mutnum_all ==0 ~ 0,
        mutnum_all == 1 ~ 1,
        mutnum_all >= 2 ~ 2)
    ) %>%
    mutate(smoke_bin=case_when(
        smoke==0 ~ 0,
        smoke==1 ~ 1,
        smoke==2 ~ 1)
    ) %>%
    mutate(
        Gender = relevel(factor(Gender), ref = 'Male'),
        race = relevel(factor(race), "White"),
        smoke = relevel(factor(smoke), "0"),
        smoke_bin = relevel(factor(smoke_bin), "0"),
        therapy_binary = relevel(factor(therapy_binary), 'untreated')
    )

#Define wide data frame with treatment known
M_wide <- M_wide_all %>% filter(therapy_known==1)

Calculate the coefficient and confidence interval of age_scaled(Age) + smoke_bin(Smoke) + race_b + Gender + therapy_binary(Therapy) according to glm generalized linear regression

## forest plot
DTA = c('DNMT3A', 'TET2', 'ASXL1')
DDR = c('PPM1D', 'TP53', 'CHEK2')
SPL = c('SF3B1', 'SRSF2')
OTH = c('JAK2', 'ATM')

gene_list = c(DDR, DTA, SPL, OTH)

#ALL adjusted for treatment
logit_gene_var = list()

for (gene in gene_list) {
    logit = glm(
        formula = get(gene) ~ age_scaled + smoke_bin + race_b + Gender + therapy_binary,
        data = M_wide,
        family = "binomial")
    logit_data = logit %>% sjPlot::get_model_data(type="est") %>% cbind(Gene = gene)
    logit_gene_var = rbind(logit_gene_var, logit_data)
}

# for each gene
D = logit_gene_var %>%
    filter(!term %in% c("GenderFemale", "race_b")) %>%
    mutate(
        term = c(
            'therapy_binarytreated' = 'Therapy',
            'smoke_bin1' = 'Smoking',
            'age_scaled' = 'Age'
        )[as.character(term)]
    ) %>%
    mutate(term = factor(term, c("Age", "Therapy", "Smoking"))) %>%
    mutate(p_fdr = p.adjust(p.value, method = "fdr")) %>%
    mutate(termGene = paste0(term, Gene)) %>%
    arrange(estimate, Gene) %>%
    mutate(termGene = factor(termGene, levels = termGene)) %>%
    mutate(gene_cat = case_when(
        Gene %in% DTA ~ 'DTA', 
        Gene %in% DDR ~ 'DDR', 
        Gene %in% SPL ~ 'Splicing', 
        T ~ 'Other'
      )
    ) %>% 
    mutate(gene_cat = factor(gene_cat, c('DDR', 'DTA', 'Splicing', 'Other'))) %>%
    mutate(
        q.value = p.adjust(p.value, n = nrow(.), method = 'fdr'),
        q.label = paste0(signif(estimate, 2), signif.num(q.value)),
        q.star = signif.num(q.value)
    )

According to the organized data structure, use custom function to plot

p_forest = plot_forest(
      D,
      x = "termGene",
      label = 'q.star',
      eb_w = 0,
      eb_s = 0.3,
      ps = 1.5,
      or_s = 2,
      nudge = -0.3,
      col = 'gene_cat'
  ) + 
  facet_wrap(~term, scale = 'free_y', ncol = 1) +
  scale_x_discrete(
      breaks = D$termGene,
      labels = D$Gene,
      expand = c(0.1,0)
  ) +
  xlab('') + ylab('Odds Ratio of CH-PD') +
  scale_color_nejm() +
  panel_theme +
  theme(
    axis.text = element_text(size = font_size),
    axis.title = element_text(size = font_size),
    strip.text = element_text(size = font_size),
    legend.position = 'top',
    legend.title = element_blank(),
    legend.text = element_text(size = font_size/1.2),
    legend.key.size = unit(3, "mm")
  ) 


OR with 95% confidence interval for CH-PD(clonal hematopoiesis mutations that we detected were classified as putative cancer-driver mutations) mutation in the ten most commonly mutated genes, with top, increasing age (n = 10,138); middle, for patients
previously exposed to cancer therapy (n = 5,978) compared with those with no exposure (n = 4,160); bottom, for current/former smokers (n = 4,989)
compared with nonsmokers (n = 5,145), in multivariable logistic regression models adjusted for therapy, smoking, ancestry, age, sex and time from
diagnosis to blood draw. Q value (FDR-corrected P value) < 0.05, Q < 0.01, Q < 0.001. Age is expressed as the mean centered values.

Interpretation of results

Age, smoking and cancer treatment have an impact on the clonal hematopoiesis of some genes.

References

Bolton, K.L., Ptashkin, R.N., Gao, T. et al. Cancer therapy shapes the fitness landscape of clonal hematopoiesis. Nat Genet 52, 1219–1226 (2020). https://doi.org/10.1038/s41588-020-00710-0

Original code

https://github.com/papaemmelab/bolton_NG_CH

4 thoughts on “The forest plot of hazard ratio shows that clonal hematopoietic mutations of some genes are related to treatment

  1. whoah this weblog is wonderful i love reading your articles.
    Keep up the great work! You realize, a lot of individuals are searching round for this information, you could help them greatly.

  2. Hi there, I discovered your web site by way of Google whilst searching
    for a similar topic, your web site came up, it appears to be like
    good. I have bookmarked it in my google bookmarks.

    Hi there, simply changed into alert to your weblog through
    Google, and located that it is truly informative.
    I am going to be careful for brussels. I will appreciate
    in the event you proceed this in future.
    A lot of folks will be benefited out of your writing.
    Cheers!

  3. Whats up this is somewhat of off topic but I was wanting to know if blogs use
    WYSIWYG editors or if you have to manually code with HTML.
    I’m starting a blog soon but have no coding knowledge so I wanted
    to get advice from someone with experience. Any help would be enormously appreciated!

Leave a Reply

Your email address will not be published. Required fields are marked *