Extrachromosomal DNA is associated with decreased immune cell infiltration and antigen presentation, represents a potential cancer immune evasion mechanism
library(dplyr)
library(ggplot2)
library(tidyr)
library(ggpubr)
library(NeoEnrichment)
library(ggprism)
library(cowplot)
library(patchwork)
library(reshape2)
library(stringr)
Sample Distribution
EcDNA status information was determined using AmpliconArchitect from WGS data as described previously (Kim et al., 2020). Gene expression data are available for the majority of TCGA but not PCAWG datasets. So for downstream immune infiltration and gene expression analysis, we only keep TCGA samples.
We first look at the ecDNA status distribution of samples:
##keep TCGA samples
<- readxl::read_xlsx("../data/Extrachromosomal DNA is associated with oncogene amplification and poor outcome across multiple cancers.xlsx",sheet = 3) %>%
dt filter(tumor_or_normal!="normal") %>%
filter(grepl("TCGA",sample_barcode)) %>%
group_by(sample_barcode) %>%
summarise(ecDNA=ifelse(any(sample_classification=="Circular"),"yes","no"))
$cancer <- get_cancer_type(dt$sample_barcode)
dt
%>% group_by(cancer) %>%
dt summarise(total_sample=n(),
ecDNA_postive=sum(ecDNA=="yes")) %>% arrange(desc(total_sample)) %>%
pivot_longer(cols = c("total_sample","ecDNA_postive"),
names_to = "type",
values_to = "counts") -> summ
$cancer <- factor(summ$cancer,levels = unique(summ$cancer))
summggplot(data=summ,aes(x=cancer,y=counts,fill=type))+
geom_bar(stat="identity",position = "dodge")+
theme_prism()+
theme(axis.text.x = element_text(angle = 45,vjust = 1, hjust = 1))+
theme(axis.title.x = element_blank())
And when combined with RNA expression data:
<- readRDS("../../tmp/tpm_gene_log2.rds")
tpm_gene_log2 <- dt %>% filter(sample_barcode %in% colnames(tpm_gene_log2))
dt_exp %>% group_by(cancer) %>%
dt_exp summarise(total_sample=n(),
ecDNA_postive=sum(ecDNA=="yes")) %>%
arrange(desc(total_sample)) %>%
pivot_longer(cols = c("total_sample","ecDNA_postive"),
names_to = "type",
values_to = "counts") -> summ
$cancer <- factor(summ$cancer,levels = unique(summ$cancer))
summggplot(data=summ,aes(x=cancer,y=counts,fill=type))+
geom_bar(stat="identity",position = "dodge")+
theme_prism()+
theme(axis.text.x = element_text(angle = 45,vjust = 1, hjust = 1))+
theme(axis.title.x = element_blank())
Immune cell infiltration status
We used various methods to compare immune infiltration status between samples with and without ecDNA. We first check the total immune infiltration levels quantified by ESTIMATE, Xcell immune score and Leukocyte fraction.
##ESTIMATE
<- readRDS("../data/pancancer_estimate_score.rds")
pancancer_estimate_score <- inner_join(
dt_immune1 %>% rename(sample=sample_barcode),
dt
pancancer_estimate_score)%>% group_by(cancer) %>% summarise(c=sum(ecDNA=="yes")) %>%
dt_immune1 filter(c>20)-> summ
<- dt_immune1 %>%
dt_immune1 filter(cancer %in% summ$cancer)
###MCPcounter cytotoxicity score and Xcell immune score
<- data.table::fread("../data/infiltration_estimation_for_tcga.csv") %>%
immune rename(sample=cell_type) %>%
select(sample,`cytotoxicity score_MCPCOUNTER`,`immune score_XCELL`)
<- inner_join(
dt_immune2 %>% rename(sample=sample_barcode),
dt
immune)%>% group_by(cancer) %>%
dt_immune2 summarise(c=sum(ecDNA=="yes"),a_c=n(),per=c/a_c) %>%
filter(c>20)-> summ
<- dt_immune2 %>%
dt_immune2 filter(cancer %in% summ$cancer)
##Leukocyte fraction
<- readRDS("../data/immune_landscape.rds")
immune_landscape <- inner_join(
dt_immune3 %>% rename(sample=sample_barcode) %>%
dt mutate(sample = substr(sample,1,12)),
%>%
immune_landscape select(`TCGA Participant Barcode`,`Immune Subtype`,`Leukocyte Fraction`,`Intratumor Heterogeneity`) %>%
rename(sample=`TCGA Participant Barcode`)
%>% filter(!is.na(`Leukocyte Fraction`))
) %>% group_by(cancer) %>%
dt_immune3 summarise(c=sum(ecDNA=="yes"),a_c=n(),per=c/a_c) %>%
filter(c>20)-> summ
<- dt_immune3 %>%
dt_immune3 filter(cancer %in% summ$cancer)
<- ggplot(data=dt_immune1,aes(x=ecDNA,y=ImmuneScore))+
p1 geom_boxplot(size=1)+
stat_compare_means(label.x=1.2,size=5)+
theme_prism()+
labs(y="ImmuneScore (Estimate)")+
scale_x_discrete(labels=c("Non-ecDNA","ecDNA"))+
theme(axis.title.x = element_blank())
<- ggplot(data=dt_immune2,aes(x=ecDNA,y=log(`cytotoxicity score_MCPCOUNTER`+1)))+
p2 geom_boxplot(size=1)+
stat_compare_means(label.x=1.2,size=5)+
theme_prism()+
labs(y="Cytotoxicity score (MCPCOUNTER)")+
scale_x_discrete(labels=c("Non-ecDNA","ecDNA"))+
theme(axis.title.x = element_blank())
<- ggplot(data=dt_immune2,aes(x=ecDNA,y=`immune score_XCELL`))+
p3 geom_boxplot(size=1)+
stat_compare_means(label.x=1.2,size=5)+
theme_prism()+
labs(y="Immune score (XCELL)")+
scale_x_discrete(labels=c("Non-ecDNA","ecDNA"))+
theme(axis.title.x = element_blank())
<- ggplot(data=dt_immune3,aes(x=ecDNA,y=`Leukocyte Fraction`))+
p4 geom_boxplot(size=1)+
stat_compare_means(label.x=1.2,size=5)+
theme_prism()+
scale_x_discrete(labels=c("Non-ecDNA","ecDNA"))+
theme(axis.title.x = element_blank())
+ p3 + p4 + p2+ plot_layout(ncol = 2,nrow=2) p1
With different methods, tumors with ecDNA consistently show significantly decreased immune scores.
We also calculated other infiltration scores including IPS Z-score, CYT and TIS:
<- readRDS("../data/IPS.rds")
IPS <- inner_join(
dt_IPS %>% rename(sample=sample_barcode),
dt
IPS)%>% group_by(cancer) %>% summarise(c=sum(ecDNA=="yes")) %>%
dt_IPS filter(c>20)-> summ
<- dt_IPS %>%
dt_IPS filter(cancer %in% summ$cancer)
<- ggplot(data=dt_IPS,aes(x=ecDNA,y=AZ))+
p5 geom_boxplot(size=1)+
stat_compare_means(label.x=1.2,size=5)+
theme_prism()+
scale_x_discrete(labels=c("Non-ecDNA","ecDNA"))+
theme(axis.title.x = element_blank())+
labs(y="IPS Z-score")
<- read.table("../data/tis_signature.txt")
tis $sample <- rownames(tis)
tis<- inner_join(
dt_tis %>% rename(sample=sample_barcode),
dt
tis)%>% group_by(cancer) %>% summarise(c=sum(ecDNA=="yes")) %>%
dt_tis filter(c>20)-> summ
<- dt_tis %>%
dt_tis filter(cancer %in% summ$cancer)
<- ggplot(data=dt_tis,aes(x=ecDNA,y=V1))+
p6 geom_boxplot(size=1)+
stat_compare_means(label.x=1.2,size=5)+
theme_prism()+
scale_x_discrete(labels=c("Non-ecDNA","ecDNA"))+
theme(axis.title.x = element_blank())+
labs(y="TIS")
<- readRDS("../data/cyts.rds")
cyts <- inner_join(
dt_cyts %>% rename(sample=sample_barcode),
dt
cyts)%>% group_by(cancer) %>% summarise(c=sum(ecDNA=="yes")) %>%
dt_cyts filter(c>20)-> summ
<- dt_cyts %>%
dt_cyts filter(cancer %in% summ$cancer)
<- ggplot(data=dt_cyts,aes(x=ecDNA,y=cyts))+
p7 geom_boxplot(size=1)+
stat_compare_means(label.x=1.2,size=5)+
theme_prism()+
scale_x_discrete(labels=c("Non-ecDNA","ecDNA"))+
theme(axis.title.x = element_blank())+
labs(y="CYT")
+ p6 + p7 p5
Then we look close into the difference of different immune cell types between samples with and without ecDNA using above methods:
load("../data/ecDNA.Rdata")
<- read.table("../data/infiltration_estimation_for_tcga.csv",header = T,sep = ",")
immune_data <- ecData3 %>% filter(sample_type %in% c("TM","TP")) %>%
ecDNA_tcga filter(grepl("^TCGA.",sample_barcode)) %>%
mutate(ecDNA=ifelse(sample_classification=="Circular","ecDNA","Non-ecDNA")) %>%
mutate(tcga_id=get_cancer_type(sample_barcode))
<- ecDNA_tcga %>% filter(sample_barcode %in% immune_data$cell_type)
ecDNA_imm_info %>% group_by(tcga_id) %>%
ecDNA_imm_info summarise(.,ecDNA=sum(ifelse(ecDNA=="ecDNA",1,0))) %>%
filter(ecDNA>20) -> ecDNA_20_info
<- ecDNA_tcga %>% filter(tcga_id %in% ecDNA_20_info$tcga_id)
heat_data ##Cibersort ABS
<- immune_data[,c(1,grep("*CIBERSORT.ABS$",colnames(immune_data)))]
CiberABS <- c("B.cell.memory_CIBERSORT.ABS","B.cell.plasma_CIBERSORT.ABS",
CiberABS_names "NK.cell.activated_CIBERSORT.ABS","T.cell.CD8._CIBERSORT.ABS")
<- CiberABS[,c("cell_type",CiberABS_names)]
CiberABS
<- CiberABS[which(CiberABS$cell_type %in% heat_data[which(heat_data$ecDNA == "ecDNA"),]$sample_barcode),]
imm_ecDNA <- CiberABS[which(CiberABS$cell_type %in% heat_data[which(heat_data$ecDNA == "Non-ecDNA"),]$sample_barcode),]
imm_NonecDNA
$group <- "ecDNA"
imm_ecDNA$group <- "Non-ecDNA"
imm_NonecDNA<- rbind(imm_ecDNA,imm_NonecDNA)
Ciber_Data
<- melt(Ciber_Data)
Ciber_Data colnames(Ciber_Data) <- c("Sample","Group","Celltype","Composition")
$Celltype <- str_sub(Ciber_Data$Celltype,1,-12)
Ciber_Data
##Xcell
<- immune_data[,c(1,grep("*_XCELL$",colnames(immune_data)))] ##ƥ??_XCELL????
Xcell <- c("B.cell.plasma_XCELL","NK.cell_XCELL","T.cell.CD4..central.memory_XCELL",
Xcell_names "T.cell.CD4..effector.memory_XCELL","T.cell.CD8._XCELL","T.cell.CD8..central.memory_XCELL",
"T.cell.CD8..effector.memory_XCELL","T.cell.NK_XCELL")
<- Xcell[,c("cell_type",Xcell_names)]
Xcell
<- Xcell[which(Xcell$cell_type %in% heat_data[which(heat_data$ecDNA == "ecDNA"),]$sample_barcode),]
imm_ecDNA <- Xcell[which(Xcell$cell_type %in% heat_data[which(heat_data$ecDNA == "Non-ecDNA"),]$sample_barcode),]
imm_NonecDNA
$group <- "ecDNA"
imm_ecDNA$group <- "Non-ecDNA"
imm_NonecDNA<- rbind(imm_ecDNA,imm_NonecDNA)
Xcell_Data
<- melt(Xcell_Data)
Xcell_Data colnames(Xcell_Data) <- c("Sample","Group","Celltype","Composition")
$Celltype <- str_sub(Xcell_Data$Celltype,1,-4)
Xcell_Data
##Timer
<- immune_data[,c(1,grep("*_TIMER$",colnames(immune_data)))] ##ƥ??_XCELL????
Timer <- c("B.cell_TIMER","T.cell.CD4._TIMER","T.cell.CD8._TIMER")
Timer_names <- Timer[,c("cell_type",Timer_names)]
Timer
<- Timer[which(Timer$cell_type %in% ecDNA_tcga[which(ecDNA_tcga$ecDNA == "ecDNA"),]$sample_barcode),]
imm_ecDNA <- Timer[which(Timer$cell_type %in% ecDNA_tcga[which(ecDNA_tcga$ecDNA == "Non-ecDNA"),]$sample_barcode),]
imm_NonecDNA
$group <- "ecDNA"
imm_ecDNA$group <- "Non-ecDNA"
imm_NonecDNA<- rbind(imm_ecDNA,imm_NonecDNA)
Timer_Data
<- melt(Timer_Data)
Timer_Data colnames(Timer_Data) <- c("Sample","Group","Celltype","Composition")
$Celltype <- str_sub(Timer_Data$Celltype,1,-4)
Timer_Data
##MCPCOUNTER
<- immune_data[,c(1,grep("*MCPCOUNTER$",colnames(immune_data)))]
MCPcounter <- c("B.cell_MCPCOUNTER","NK.cell_MCPCOUNTER","T.cell_MCPCOUNTER","T.cell.CD8._MCPCOUNTER")
MCP_names <- MCPcounter[,c("cell_type",MCP_names)]
MCPcounter
<- MCPcounter[which(MCPcounter$cell_type %in% heat_data[which(heat_data$ecDNA == "ecDNA"),]$sample_barcode),]
imm_ecDNA <- MCPcounter[which(MCPcounter$cell_type %in% heat_data[which(heat_data$ecDNA == "Non-ecDNA"),]$sample_barcode),]
imm_NonecDNA
$group <- "ecDNA"
imm_ecDNA$group <- "Non-ecDNA"
imm_NonecDNA<- rbind(imm_ecDNA,imm_NonecDNA)
MCPcounter_Data
<- melt(MCPcounter_Data)
MCPcounter_Data colnames(MCPcounter_Data) <- c("Sample","Group","Celltype","Composition")
$Celltype <- str_sub(MCPcounter_Data$Celltype,1,-9)
MCPcounter_Data$Composition <- log(MCPcounter_Data$Composition+1)/10
MCPcounter_Data
##QUANTISEQ
<- immune_data[,c(1,grep("*QUANTISEQ$",colnames(immune_data)))]
Quantiseq <- c("B.cell_QUANTISEQ","NK.cell_QUANTISEQ","T.cell.CD8._QUANTISEQ")
QUAN_names <- Quantiseq[,c("cell_type",QUAN_names)]
Quantiseq
<- Quantiseq[which(Quantiseq$cell_type %in% heat_data[which(heat_data$ecDNA == "ecDNA"),]$sample_barcode),]
imm_ecDNA <- Quantiseq[which(Quantiseq$cell_type %in% heat_data[which(heat_data$ecDNA == "Non-ecDNA"),]$sample_barcode),]
imm_NonecDNA
$group <- "ecDNA"
imm_ecDNA$group <- "Non-ecDNA"
imm_NonecDNA<- rbind(imm_ecDNA,imm_NonecDNA)
Quantiseq_Data
<- melt(Quantiseq_Data)
Quantiseq_Data colnames(Quantiseq_Data) <- c("Sample","Group","Celltype","Composition")
$Celltype <- str_sub(Quantiseq_Data$Celltype,1,-9)
Quantiseq_Data
<- rbind(Ciber_Data,Xcell_Data,Timer_Data,MCPcounter_Data,Quantiseq_Data)
draw_data $Celltype <- factor(draw_data$Celltype,levels = c("B.cell.memory_CI","B.cell.plasma_CI","NK.cell.activated_CI","T.cell.CD8._CI",
draw_data"B.cell.plasma_XC","NK.cell_XC","T.cell.CD4..central.memory_XC","T.cell.CD4..effector.memory_XC",
"T.cell.CD8..central.memory_XC","T.cell.CD8..effector.memory_XC","T.cell.CD8._XC","T.cell.NK_XC",
"B.cell_TI","T.cell.CD4._TI","T.cell.CD8._TI",
"B.cell_MC","NK.cell_MC","T.cell.CD8._MC","T.cell_MC",
"B.cell_Q","NK.cell_Q","T.cell.CD8._Q"))
$Group <- factor(draw_data$Group,levels = c("Non-ecDNA","ecDNA"))
draw_data
library(ggprism)
ggplot(data=draw_data,aes(x=Celltype,y=Composition,fill=Group))+
ylim(0,1)+
geom_boxplot()+
stat_compare_means(aes(label = ..p.signif..))+
theme_prism()+
theme(axis.text.x = element_text(angle = 45,vjust = 1, hjust = 1))+
labs(y="Composition",x= NULL)+
scale_x_discrete(breaks=c("B.cell.memory_CI","B.cell.plasma_CI","NK.cell.activated_CI","T.cell.CD8._CI",
"B.cell.plasma_XC","NK.cell_XC","T.cell.CD4..central.memory_XC","T.cell.CD4..effector.memory_XC",
"T.cell.CD8..central.memory_XC","T.cell.CD8..effector.memory_XC","T.cell.CD8._XC","T.cell.NK_XC",
"B.cell_TI","T.cell.CD4._TI","T.cell.CD8._TI",
"B.cell_MC","NK.cell_MC","T.cell.CD8._MC","T.cell_MC",
"B.cell_Q","NK.cell_Q","T.cell.CD8._Q"),#???ú?????????
labels=c("B cell memory","B cell plasma","NK cell activated","T cell CD8",
"B cell plasma","NK cell","T cell CD4 central memory","T cell CD4 effector memory",
"T cell CD8 central memory","T cell CD8 effector memory","T cell CD8","T cell NK",
"B cell","T cell CD4","T cell CD8",
"B cell","NK cell","T cell CD8","T cell",
"B cell","NK cell","T cell CD8"))
Following is the full set of immune cells in various methods:
##Boxplot
library(reshape2)
library(ggprism)
library(ggpubr)
library(stringr)
##XCELL
<- immune_data[,c(1,grep("*_XCELL$",colnames(immune_data)))]
CiberABS <- CiberABS[which(CiberABS$cell_type %in% heat_data[which(heat_data$ecDNA == "ecDNA"),]$sample_barcode),]
imm_ecDNA <- CiberABS[which(CiberABS$cell_type %in% heat_data[which(heat_data$ecDNA == "Non-ecDNA"),]$sample_barcode),]
imm_NonecDNA $group <- "ecDNA"
imm_ecDNA$group <- "Non-ecDNA"
imm_NonecDNA<- rbind(imm_ecDNA,imm_NonecDNA)
CiberDraw <- CiberDraw[,c(1:37,41)]
ScoreDraw <- c("B.cell_XCELL","T.cell.CD4..memory_XCELL","T.cell.CD4...non.regulatory._XCELL",
cell_names "T.cell.CD8..naive_XCELL","Common.lymphoid.progenitor_XCELL","Myeloid.dendritic.cell_XCELL",
"Macrophage_XCELL","Monocyte_XCELL","T.cell.CD4..Th1_XCELL","T.cell.CD4..Th2_XCELL",
"B.cell.memory_XCELL","B.cell.naive_XCELL","T.cell.gamma.delta_XCELL")
<- ScoreDraw[,-which(colnames(ScoreDraw) %in% cell_names)]
ScoreDraw <- melt(ScoreDraw)
ScoreDraw colnames(ScoreDraw) <- c("Sample","Group","Celltype","Composition")
$Celltype <- str_sub(ScoreDraw$Celltype,1,-7)
ScoreDraw<- ScoreDraw
draw_data <- c("B.cell.plasma","Neutrophil","NK.cell","Mast.cell","Eosinophil","Cancer.associated.fibroblast",
nams_id "Class.switched.memory.B.cell","Common.myeloid.progenitor","Endothelial.cell","Granulocyte.monocyte.progenitor",
"Hematopoietic.stem.cell","Macrophage.M1","Macrophage.M2","Myeloid.dendritic.cell.activated",
"Plasmacytoid.dendritic.cell","T.cell.CD4..central.memory","T.cell.CD4..effector.memory",
"T.cell.CD4..naive","T.cell.CD8.","T.cell.CD8..central.memory","T.cell.CD8..effector.memory",
"T.cell.NK","T.cell.regulatory..Tregs.")
$Celltype <- factor(draw_data$Celltype,levels = nams_id)
draw_data$Group <- factor(draw_data$Group,levels = c("Non-ecDNA","ecDNA"))
draw_data<- ggplot(data=draw_data,aes(x=Celltype,y=Composition,fill=Group))+
boxplot_xcell geom_boxplot()+
stat_compare_means(aes(label = ..p.signif..))+
theme_prism()+
theme(axis.text.x = element_text(angle = 45,vjust = 1, hjust = 1) )+
labs(y="Composition",x= NULL,title = "XCell")+
scale_x_discrete(breaks=nams_id,
labels=c("B cell plasma","Neutrophil","NK cell","Mast cell","Eosinophil","Cancer associated fibroblast",
"Class switched memory B cell","Common myeloid progenitor","Endothelial cell",
"Granulocyte monocyte progenitor",
"Hematopoietic stem cell","Macrophage M1","Macrophage M2","Myeloid dendritic cell activated",
"Plasmacytoid dendritic cell","T cell CD4 central memory","T cell CD4 effector memory",
"T cell CD4 naive","T cell CD8","T cell CD8 central memory","T cell CD8 effector memory",
"T cell NK","T cell regulatory Tregs"))
##CIBERSORT.ABS
<- immune_data[,c(1,grep("*CIBERSORT.ABS$",colnames(immune_data)))]
CiberABS
<- c("B.cell.naive_CIBERSORT.ABS","T.cell.CD4..memory.resting_CIBERSORT.ABS",
cell_names "T.cell.follicular.helper_CIBERSORT.ABS","NK.cell.resting_CIBERSORT.ABS",
"Macrophage.M0_CIBERSORT.ABS","Myeloid.dendritic.cell.resting_CIBERSORT.ABS",
"Myeloid.dendritic.cell.activated_CIBERSORT.ABS","Mast.cell.resting_CIBERSORT.ABS",
"Neutrophil_CIBERSORT.ABS","T.cell.CD4..naive_CIBERSORT.ABS",
"Eosinophil_CIBERSORT.ABS")
<- CiberABS[,-which(colnames(CiberABS) %in% cell_names)]
CiberABS <- CiberABS[which(CiberABS$cell_type %in% heat_data[which(heat_data$ecDNA == "ecDNA"),]$sample_barcode),]
imm_ecDNA <- CiberABS[which(CiberABS$cell_type %in% heat_data[which(heat_data$ecDNA == "Non-ecDNA"),]$sample_barcode),]
imm_NonecDNA $group <- "ecDNA"
imm_ecDNA$group <- "Non-ecDNA"
imm_NonecDNA<- rbind(imm_ecDNA,imm_NonecDNA)
CiberDraw <- melt(CiberDraw)
CiberDraw colnames(CiberDraw) <- c("Sample","Group","Celltype","Composition")
$Celltype <- str_sub(CiberDraw$Celltype,1,-15)
CiberDraw<- CiberDraw
draw_data <- c("B.cell.memory","B.cell.plasma","Macrophage.M1","Macrophage.M2","Mast.cell.activated",
nams_id "Monocyte","NK.cell.activated","T.cell.CD4..memory.activated","T.cell.CD8.",
"T.cell.gamma.delta","T.cell.regulatory..Tregs." )
$Celltype <- factor(draw_data$Celltype,levels = nams_id)
draw_data$Group <- factor(draw_data$Group,levels = c("Non-ecDNA","ecDNA"))
draw_data<- ggplot(data=draw_data,aes(x=Celltype,y=Composition,fill=Group))+
boxplot_cibersortABS geom_boxplot()+
stat_compare_means(aes(label = ..p.signif..))+
theme_prism()+
theme(axis.text.x = element_text(angle = 45,vjust = 1, hjust = 1) )+
labs(y="Composition",x= NULL,title = "CibersortABS")+
scale_x_discrete(breaks=nams_id,
labels=c("B cell memory","B cell plasma","Macrophage M1","Macrophage M2","Mast cell activated",
"Monocyte","NK cell activated","T cell CD4 memory activated","T cell CD8",
"T cell gamma delta","T cell regulatory Tregs" ))
##Timer
<- immune_data[,c(1,grep("*_TIMER$",colnames(immune_data)))]
CiberABS <- CiberABS[which(CiberABS$cell_type %in% ecDNA_tcga[which(ecDNA_tcga$ecDNA == "ecDNA"),]$sample_barcode),]
imm_ecDNA <- CiberABS[which(CiberABS$cell_type %in% ecDNA_tcga[which(ecDNA_tcga$ecDNA == "Non-ecDNA"),]$sample_barcode),]
imm_NonecDNA $group <- "ecDNA"
imm_ecDNA$group <- "Non-ecDNA"
imm_NonecDNA<- rbind(imm_ecDNA,imm_NonecDNA)
CiberDraw for (i in 2:7) {
<- (CiberDraw[,i] - min(CiberDraw[,i]))/ (max(CiberDraw[,i]) - min(CiberDraw[,i]))
CiberDraw[,i]
}<- melt(CiberDraw)
ScoreDraw colnames(ScoreDraw) <- c("Sample","Group","Celltype","Composition")
$Celltype <- str_sub(ScoreDraw$Celltype,1,-7)
ScoreDraw<- ScoreDraw[-which(ScoreDraw$Composition==1),]
ScoreDraw <- ScoreDraw
draw_data <- c("B.cell","Macrophage","Myeloid.dendritic.cell",
nams_id "Neutrophil","T.cell.CD4.","T.cell.CD8.")
$Celltype <- factor(draw_data$Celltype,levels = nams_id)
draw_data$Group <- factor(draw_data$Group,levels = c("Non-ecDNA","ecDNA"))
draw_data<- ggplot(data=draw_data,aes(x=Celltype,y=Composition,fill=Group))+
boxplot_timer geom_boxplot()+
stat_compare_means(aes(label = ..p.signif..))+
theme_prism()+
theme(axis.text.x = element_text(angle = 45,vjust = 1, hjust = 1) )+
labs(y="Composition",x= NULL,title = "Timer")+
scale_x_discrete(breaks=nams_id,
labels=c("B cell","Macrophage","Myeloid dendritic cell",
"Neutrophil","T cell CD4","T cell CD8"))
##MCPCOUNTER
<- immune_data[,c(1,grep("*MCPCOUNTER$",colnames(immune_data)))]
CiberABS <- CiberABS[which(CiberABS$cell_type %in% heat_data[which(heat_data$ecDNA == "ecDNA"),]$sample_barcode),]
imm_ecDNA <- CiberABS[which(CiberABS$cell_type %in% heat_data[which(heat_data$ecDNA == "Non-ecDNA"),]$sample_barcode),]
imm_NonecDNA $group <- "ecDNA"
imm_ecDNA$group <- "Non-ecDNA"
imm_NonecDNA<- rbind(imm_ecDNA,imm_NonecDNA)
CiberDraw <- c("Cancer.associated.fibroblast_MCPCOUNTER")
cell_names <- CiberDraw[,-which(colnames(CiberDraw) %in% cell_names)]
CiberDraw <- melt(CiberDraw)
CiberDraw colnames(CiberDraw) <- c("Sample","Group","Celltype","Composition")
$Celltype <- str_sub(CiberDraw$Celltype,1,-12)
CiberDraw$Composition <- log(CiberDraw$Composition+1)/10
CiberDraw<- CiberDraw
draw_data <- c("B.cell","cytotoxicity.score","Endothelial.cell","Macrophage.Monocyte",
nams_id "Monocyte","Myeloid.dendritic.cell","Neutrophil","NK.cell","T.cell","T.cell.CD8.")
$Celltype <- factor(draw_data$Celltype,levels = nams_id)
draw_data$Group <- factor(draw_data$Group,levels = c("Non-ecDNA","ecDNA"))
draw_data<- ggplot(data=draw_data,aes(x=Celltype,y=Composition,fill=Group))+
boxplot_MCPCOUNTER geom_boxplot()+
stat_compare_means(aes(label = ..p.signif..))+
theme_prism()+
theme(axis.text.x = element_text(angle = 45,vjust = 1, hjust = 1) )+
labs(y="Composition",x= NULL,title = "MCPCOUNTER")+
scale_x_discrete(breaks=nams_id,
labels= c("B cell","Cytotoxicity score","Endothelial cell","Macrophage monocyte",
"Monocyte","Myeloid dendritic cell","Neutrophil","NK cell","T cell","T cell CD8"))
##QUANTISEQ
<- immune_data[,c(1,grep("*QUANTISEQ$",colnames(immune_data)))]
CiberABS <- CiberABS[which(CiberABS$cell_type %in% heat_data[which(heat_data$ecDNA == "ecDNA"),]$sample_barcode),]
imm_ecDNA <- CiberABS[which(CiberABS$cell_type %in% heat_data[which(heat_data$ecDNA == "Non-ecDNA"),]$sample_barcode),]
imm_NonecDNA $group <- "ecDNA"
imm_ecDNA$group <- "Non-ecDNA"
imm_NonecDNA<- rbind(imm_ecDNA,imm_NonecDNA)
CiberDraw <- c("uncharacterized.cell_QUANTISEQ","T.cell.CD4...non.regulatory._QUANTISEQ","Monocyte_QUANTISEQ")
cell_names <- CiberDraw[,-which(colnames(CiberDraw) %in% cell_names)]
CiberDraw <- melt(CiberDraw)
CiberDraw colnames(CiberDraw) <- c("Sample","Group","Celltype","Composition")
$Celltype <- str_sub(CiberDraw$Celltype,1,-11)
CiberDraw<- CiberDraw
draw_data <- c("B.cell","Macrophage.M1","Macrophage.M2","Myeloid.dendritic.cell","Neutrophil",
nams_id "NK.cell","T.cell.CD8.","T.cell.regulatory..Tregs.")
$Celltype <- factor(draw_data$Celltype,levels = nams_id)
draw_data$Group <- factor(draw_data$Group,levels = c("Non-ecDNA","ecDNA"))
draw_data<- ggplot(data=draw_data,aes(x=Celltype,y=Composition,fill=Group))+
boxplot_QUANTISEQ ylim(0,0.3)+
geom_boxplot()+
stat_compare_means(aes(label = ..p.signif..))+
theme_prism()+
theme(axis.text.x = element_text(angle = 45,vjust = 1, hjust = 1) )+
labs(y="Composition",x= NULL,title = "QUANTISEQ")+
scale_x_discrete(breaks=nams_id,
labels= c("B cell","Macrophage M1","Macrophage M2","Myeloid dendritic cell","Neutrophil",
"NK cell","T cell CD8","T cell regulatory Tregs"))
##joint figure
library(customLayout)
<- lay_new(matrix(1:4, nc = 2),widths = c(1, 1),heights = c(1, 1))
lay1 <- lay_new(matrix(1, nc = 1))
lay2 <- lay_bind_row(lay1, lay2, heights = c(2, 1))
cl <- list(boxplot_cibersortABS,boxplot_MCPCOUNTER,boxplot_timer,boxplot_QUANTISEQ,boxplot_xcell)
plots2 lay_grid(plots2, cl)
Then we checked the cancer specific difference, and the heatmap color indicates median ratio of infiltration level for specific immune cell and specific cancer type between ecDNA and non-ecDNA samples
##Heatmap
<- ecDNA_20_info$tcga_id
tumor_type <- function(heat_data2,cell_names){
heatmap_imm
if (length(cell_names>0)) {
<- heat_data2[,-which(colnames(heat_data2) %in% cell_names)]
heat_data2
}
<- heat_data2[which(heat_data2$ecDNA=="ecDNA"),]
imm_ecDNA <- heat_data2[which(heat_data2$ecDNA=="Non-ecDNA"),]
imm_NonecDNA
<- matrix(NA, ncol = length(9:length(heat_data2)), nrow = 8)
heat_fig rownames(heat_fig) <- c("Pan-cancer",tumor_type)
colnames(heat_fig) <- colnames(heat_data2)[9:length(heat_data2)]
<- colnames(heat_data2)[9:length(heat_data2)]
cell_type
for (i in tumor_type) {
for (ii in cell_type) {
<- median(imm_ecDNA[which(imm_ecDNA$tcga_id==i),][,ii])
data1 <- median(imm_NonecDNA[which(imm_NonecDNA$tcga_id==i),][,ii])
data2
<- log(data1+1.001) / log(data2+1.001)
heat_fig[i,ii]
}
}
for (ii in cell_type) {
<- median(imm_ecDNA[,ii])
data1 <- median(imm_NonecDNA[,ii])
data2 "Pan-cancer",ii] <- log(data1+1.001) / log(data2+1.001)
heat_fig[
}
<- heat_fig
heat_fig2 <- t(heat_fig2)
heat_fig2 return(heat_fig2)
}library(grid)
library(circlize)
= colorRamp2(c(0.34, 1, 3), c("blue", "white", "red"))
col_fun
##CibersortABS
<- immune_data[,c(1,grep("*CIBERSORT.ABS$",colnames(immune_data)))]
CiberABS colnames(CiberABS)[1] <- "sample_barcode"
<- merge(heat_data,CiberABS)
heat_data2 $CD8_Treg._CIBERSORT.ABS <- log(heat_data2[,"T.cell.CD8._CIBERSORT.ABS"]+1.001)/log(heat_data2[,"T.cell.regulatory..Tregs._CIBERSORT.ABS"]+1.001)
heat_data2$M1_M2._CIBERSORT.ABS <- log(heat_data2[,"Macrophage.M1_CIBERSORT.ABS"]+1.001)/log(heat_data2[,"Macrophage.M2_CIBERSORT.ABS"]+1.001)
heat_data2
<- c("B.cell.naive_CIBERSORT.ABS","T.cell.CD4..memory.resting_CIBERSORT.ABS",
cell_names "T.cell.follicular.helper_CIBERSORT.ABS","NK.cell.resting_CIBERSORT.ABS",
"Macrophage.M0_CIBERSORT.ABS","Myeloid.dendritic.cell.resting_CIBERSORT.ABS",
"Myeloid.dendritic.cell.activated_CIBERSORT.ABS","Mast.cell.resting_CIBERSORT.ABS",
"Neutrophil_CIBERSORT.ABS","T.cell.CD4..naive_CIBERSORT.ABS",
"Eosinophil_CIBERSORT.ABS")
<- heatmap_imm(heat_data2,cell_names)
heat_fig2 rownames(heat_fig2) <- c("B cell memory","B cell plasma","T cell CD8","T cell CD4 memory activated",
"T cell regulatory Tregs","T cell gamma delta","NK cell activated","Monocyte",
"Macrophage M1","Macrophage M2","Mast cell activated","CD8/Tregs","M1/M2")
<- ComplexHeatmap::Heatmap(heat_fig2,cluster_rows = F,cluster_columns = F,name = " ",
heatmap_cibersortABS row_title = "Cibersort",col = col_fun,
row_names_side = "right",column_names_rot = 45,column_names_gp = gpar(fontsize = 12),
row_names_gp = gpar(fontsize = 12))
##Timer
<- immune_data[,c(1,grep("*_TIMER$",colnames(immune_data)))]
CiberABS colnames(CiberABS)[1] <- "sample_barcode"
<- merge(heat_data,CiberABS)
heat_data2 <- NULL
cell_names <- heatmap_imm(heat_data2,cell_names)
heat_fig2 rownames(heat_fig2) <- c("B cell","T cell CD4","T cell CD8","Neutrophil","Macrophage","Myeloid dendritic cell")
<- ComplexHeatmap::Heatmap(heat_fig2,cluster_rows = F,cluster_columns = F,name = " ",
heatmap_timer row_title = "Timer",col = col_fun,
row_names_side = "right",column_names_rot = 45)
##Xcell
<- immune_data[,c(1,grep("*_XCELL$",colnames(immune_data)))]
CiberABS colnames(CiberABS)[1] <- "sample_barcode"
<- merge(heat_data,CiberABS)
heat_data2 <- c("B.cell_XCELL", "T.cell.CD4..memory_XCELL","T.cell.CD4...non.regulatory._XCELL","T.cell.CD8..naive_XCELL",
cell_names "Common.lymphoid.progenitor_XCELL","Myeloid.dendritic.cell_XCELL","Macrophage_XCELL","Monocyte_XCELL",
"T.cell.CD4..Th1_XCELL","T.cell.CD4..Th2_XCELL", "B.cell.memory_XCELL","B.cell.naive_XCELL","T.cell.gamma.delta_XCELL")
<- heatmap_imm(heat_data2,cell_names)
heat_fig2 rownames(heat_fig2) <- c("Myeloid dendritic cell activated","T cell CD4 naive","T cell CD4 central memory",
"T cell CD4 effector memory","T cell CD8","T cell CD8 central memory",
"T cell CD8 effector memory","Class switched memory B cell","Common myeloid progenitor",
"Endothelial cell","Eosinophil","Cancer associated fibroblast","Granulocyte monocyte progenitor",
"Hematopoietic stem cell","Macrophage M1","Macrophage M2","Mast cell","Neutrophil",
"NK cell","T cell NK","Plasmacytoid dendritic cell","B cell plasma","T cell regulatory Tregs",
"immune score","stroma score","microenvironment score")
<- ComplexHeatmap::Heatmap(heat_fig2,cluster_rows = F,cluster_columns = F,
heatmap_xcell row_title = "Xcell",col = col_fun,
name = " ",row_names_side ="right",column_names_rot = 45)
##MCPCOUNTER
<- immune_data[,c(1,grep("*MCPCOUNTER$",colnames(immune_data)))]
CiberABS colnames(CiberABS)[1] <- "sample_barcode"
<- merge(heat_data,CiberABS)
heat_data2 <- c("Cancer.associated.fibroblast_MCPCOUNTER")
cell_names <- heatmap_imm(heat_data2,cell_names)
heat_fig2 rownames(heat_fig2) <- c("T cell","T cell CD8","Cytotoxicity score","NK cell","B cell",
"Monocyte","Macrophage monocyte","Myeloid dendritic cell","Neutrophil","Endothelial cell")
<- ComplexHeatmap::Heatmap(heat_fig2, cluster_rows = F, cluster_columns = F, row_title = "MCPcounter",col = col_fun,
heatmap_MCPCOUNTER name = " ",row_names_side ="right",column_names_rot = 45)
##QUANTISEQ
<- immune_data[,c(1,grep("*QUANTISEQ$",colnames(immune_data)))]
CiberABS colnames(CiberABS)[1] <- "sample_barcode"
<- merge(heat_data,CiberABS)
heat_data2 <- c("uncharacterized.cell_QUANTISEQ","T.cell.CD4...non.regulatory._QUANTISEQ","Monocyte_QUANTISEQ")
cell_names <- heatmap_imm(heat_data2,cell_names)
heat_fig2 rownames(heat_fig2) <- c("B cell","Macrophage M1","Macrophage M2","Neutrophil",
"NK cell","T cell CD8","T cell regulatory Tregs","Myeloid dendritic cell")
<- ComplexHeatmap::Heatmap(heat_fig2,cluster_rows = F,cluster_columns = F,
heatmap_QUANTISEQrow_title = "Quantiseq",col = col_fun,
name = " ",row_names_side ="right",column_names_rot = 45)
##joint heatmap
library(ComplexHeatmap)
= heatmap_cibersortABS %v% heatmap_MCPCOUNTER %v% heatmap_timer %v% heatmap_QUANTISEQ %v% heatmap_xcell
ht_list draw(ht_list)
EcDNA and tumor immune typing
Recently, several studies have classified cancers into different immune subtypes and found each subtype related to specific microenvironment. Here we used Chisq-square test to check the association between ecDNA status and immune subtypes. We used two classification methods: 1) Pan-cancer immunogenomic analysis has classified cancer into six immune subtypes—wound healing, IFN-γ dominant, inflammatory, lymphocyte depleted, immunologically quiet, and TGF-β dominant (Thorsson et al., 2018), 2) The TME has been classified into 4 types according to the expression of immune and stromal related genes (Bagaev et al., 2021).
<- dt %>% group_by(cancer) %>% summarise(c=sum(ecDNA=="yes")) %>%
summ filter(c>20)
<- dt %>% filter(cancer %in% summ$cancer)
dt_filter
<- read.table("../data/annotation-tcga.tsv",sep = "\t",header = T)
anno <- inner_join(
dt_anno %>% rename(sample=sample_barcode) %>%
dt_filter mutate(sample = substr(sample,1,12)),
%>%
anno select(X,MFP) %>%
rename(sample=X)
)
chisq.test(table(dt_anno$ecDNA,dt_anno$MFP))
#>
#> Pearson's Chi-squared test
#>
#> data: table(dt_anno$ecDNA, dt_anno$MFP)
#> X-squared = 14.142, df = 3, p-value = 0.002718
<- dt_anno %>%
dt_anno group_by(ecDNA,MFP) %>% summarise(counts=n())
<- ggplot(data=dt_anno,aes(x=ecDNA,y=counts,fill=MFP))+
p8 geom_bar(stat = "identity",position="fill")+
theme_prism()+
labs(y="Percent of cases (%)",title = "Chi-squared test, P = 0.002718")+
scale_x_discrete(labels=c("Non-ecDNA","ecDNA"))+
theme(axis.title.x = element_blank())
<- readRDS("../data/immune_landscape.rds")
immune_landscape <- inner_join(
dt_immune %>% rename(sample=sample_barcode) %>%
dt_filter mutate(sample = substr(sample,1,12)),
%>%
immune_landscape select(`TCGA Participant Barcode`,`Immune Subtype`,`Leukocyte Fraction`,`Intratumor Heterogeneity`) %>%
rename(sample=`TCGA Participant Barcode`)
)
<- dt_immune %>% select(sample,ecDNA,`Immune Subtype`,cancer) %>%
dt_subtype filter(!is.na(`Immune Subtype`))
$`Immune Subtype` <- as.character(dt_subtype$`Immune Subtype`)
dt_subtypechisq.test(table(dt_subtype$ecDNA,dt_subtype$`Immune Subtype`))
#>
#> Pearson's Chi-squared test
#>
#> data: table(dt_subtype$ecDNA, dt_subtype$`Immune Subtype`)
#> X-squared = 25.383, df = 4, p-value = 4.212e-05
<- dt_subtype %>%
dt_subtype group_by(ecDNA,`Immune Subtype`) %>% summarise(counts=n())
<- ggplot(data=dt_subtype,aes(x=ecDNA,y=counts,fill=`Immune Subtype`))+
p9 geom_bar(stat = "identity",position="fill")+
theme_prism()+
labs(y="Percent of cases (%)",title = "Chi-squared test, P = 4.212e-05")+
scale_fill_manual(values=c("#FE0000","#FEFB04","#13FC00","#00FDFE","#FC2AFD"))+
scale_x_discrete(labels=c("Non-ecDNA","ecDNA"))+
theme(axis.title.x = element_blank())
+ p9 p8
In tumors with ecDNA, C4 (lymphocyte depleted) type TME is up-regulated, while C3 (inflammatory) and C6 (TGF-β dominant) types are down-regulated and immune-enriched, fibrotic (IE/F) type of TME is dramatically decreased, while immune desert type TME is significantly up-regulated.
EcDNA and tumor immune escape
We next checked the expression difference of immune inhibitory immune checkpoint genes, such as PD-L1, CTLA4 between samples with and without ecDNA:
<- c("ADORA2A", "CD276", "VTCN1", "BTLA", "CTLA4", "IDO1","LAG3","CYBB","PDCD1",
checkpoint "HAVCR2","C10orf54","SIGLEC7")
<- readRDS("../../tmp/tpm_gene_log2.rds")
tpm_gene_log2 <- tpm_gene_log2[!duplicated(tpm_gene_log2$gene),]
tpm_gene_log2 <- tpm_gene_log2 %>%
tpm_gene select(-id) %>%
filter(gene %in% checkpoint) %>%
select(gene,any_of(dt$sample_barcode))
rownames(tpm_gene) <- tpm_gene$gene
<- tpm_gene %>% select(-gene)
tpm_gene <- t(tpm_gene) %>% as.data.frame()
tpm_gene
$sample <- rownames(tpm_gene)
tpm_gene$cancer <- get_cancer_type(tpm_gene$sample)
tpm_gene
<- inner_join(tpm_gene,
tpm_gene %>% rename(sample=sample_barcode))
dt %>% group_by(cancer) %>%
tpm_gene summarise(ecdna_c=sum(ecDNA=="yes")) -> tpm_summ
<- tpm_summ %>% filter(ecdna_c>20)
tpm_summ <- tpm_gene %>% filter(cancer %in% tpm_summ$cancer)
tpm_gene
<- tpm_gene %>%
tpm_dt ::pivot_longer(cols = "LAG3":"PDCD1",names_to = "gene",values_to = "exp")
tidyr$exp <- as.numeric(tpm_dt$exp)
tpm_dt<- tpm_dt %>%
tpm_dt mutate(ecDNA = ifelse(ecDNA == "yes","ecDNA","Non-ecDNA"))
$ecDNA <- factor(tpm_dt$ecDNA,levels = c("Non-ecDNA","ecDNA"))
tpm_dt
ggplot(data=tpm_dt,aes(x=gene,y=exp,fill=ecDNA))+
geom_boxplot()+
stat_compare_means(aes(label = ..p.signif..))+
theme_prism()+
labs(y="log2(TPM + 0.001)")+
theme(axis.text.x = element_text(angle = 45,vjust = 1, hjust = 1))+
theme(axis.title.x = element_blank())
Then we checked the cancer type specific checkpoint expression difference, heatmap color indicates median difference of expression for specific gene in specific cancer type between ecDNA and non-ecDNA samples:
<- matrix(runif(12*7),nrow = 12,ncol = 7)
mat colnames(mat) <- unique(tpm_gene$cancer)
<- as.data.frame(mat)
mat $gene <- unique(tpm_dt$gene)
mat
for (i in colnames(mat)[1:(ncol(mat)-1)]){
for (j in mat$gene){
<- tpm_gene %>%
df select(j,cancer,ecDNA) %>%
filter(cancer == i)
colnames(df)[1] <- "tt"
$tt <- as.numeric(df$tt)
df$gene == j,i] <- median(df[df$ecDNA=="yes",1]) - median(df[df$ecDNA=="no",1])
mat[mat
}
}
rownames(mat) <- mat$gene
<- mat %>% select(-gene)
mat
library(grid)
library(circlize)
= colorRamp2(c(-3, 0, 1), c("blue", "white", "red"))
col_fun ::Heatmap(mat,cluster_rows = F,cluster_columns = F,name = " ",col = col_fun) ComplexHeatmap
These checkpoints are generally down-regulated, and this could also implicates that immune checkpoint inhibitor therapy alone may not work in tumors with ecDNA.
Potential mechanistic study
The immunogenicity of tumor cells determine the tumor associated immune response, and the antigenicity encoded by neoantigenic mutations is an important determinant of tumor immunogenicity. Therefore we checked the tumor mutation burden (TMB) and neo-antigens burden between samples with and without ecDNA.
###TMB
# mut <- read.table("~/useful_data/GDC-PANCAN.mutect2_snv.tsv",sep = "\t",header = T)
# mut <- mut %>% filter(effect == "missense_variant")
# mut_summ <- mut %>% group_by(Sample_ID) %>%
# summarise(tmb=n()/38) %>% rename(sample=Sample_ID)
# saveRDS(mut_summ,file = "data/mut_summ.rds")
<- readRDS("../data/mut_summ.rds")
mut_summ
<- inner_join(
dt_mut %>% rename(sample=sample_barcode),
dt %>% mutate(sample=substr(sample,1,15))
mut_summ
)%>% group_by(cancer) %>%
dt_mut summarise(c=sum(ecDNA=="yes")) %>% filter(c>20) -> summ
<- dt_mut %>% filter(cancer %in% summ$cancer)
dt_mut $ecDNA <- ifelse(dt_mut$ecDNA=="yes","ecDNA","Non-ecDNA")
dt_mut$ecDNA <- factor(dt_mut$ecDNA,levels = c("Non-ecDNA","ecDNA"))
dt_mutggplot(data=dt_mut,aes(x=cancer,y=log(tmb+1),fill=ecDNA))+
geom_boxplot()+
stat_compare_means(aes(label = ..p.signif..))+
theme_prism()+
labs(y="log(TMB + 1)")+
theme(axis.text.x = element_text(angle = 45,vjust = 1, hjust = 1) )+
theme(axis.title.x = element_blank())
##neoantigen
# all_mut <- readRDS("~/test/data/2021_04_01/all_mut.rds")
# all_mut <- all_mut %>%
# mutate(neo=ifelse(neo=="neo","yes","no"))
# all_mut %>% group_by(sample) %>% summarise(neo_c=sum(neo=="yes"),all_c=n()) -> neo_summ
# saveRDS(neo_summ,file = "data/neo_summ.rds")
<- readRDS("../data/neo_summ.rds")
neo_summ <- inner_join(
dt_neo %>% rename(sample=sample_barcode),
dt %>% mutate(sample = substr(sample,1,15))
neo_summ %>% mutate(neo_burden=neo_c/38)
) %>% group_by(cancer) %>%
dt_neo summarise(c=sum(ecDNA=="yes")) %>% filter(c>20) -> summ
<- dt_neo %>% filter(cancer %in% summ$cancer)
dt_neo $ecDNA <- ifelse(dt_neo$ecDNA=="yes","ecDNA","Non-ecDNA")
dt_neo$ecDNA <- factor(dt_neo$ecDNA,levels = c("Non-ecDNA","ecDNA"))
dt_neoggplot(data=dt_neo,aes(x=cancer,y=log(neo_burden+1),fill=ecDNA))+
geom_boxplot()+
stat_compare_means(aes(label = ..p.signif..))+
theme_prism()+
labs(y="log(Neoantigen burden + 1)")+
theme(axis.text.x = element_text(angle = 45,vjust = 1, hjust = 1) )+
theme(axis.title.x = element_blank())
Tumors with ecDNA show comparable TMB and neoantigen counts, suggesting a comparable antigenicity. This implicates that the decreased immunogenicity of ecDNA-containing tumors was not caused by impaired antigenicity.
Antigen presentation efficiency is another important determinant of tumor immunogenicity. Therefore, we first directly compared expression of MHC-I/MHC-II genes and related biosynthetic genes between samples with and without ecDNA:
<- readRDS("../../tmp/tpm_gene_log2.rds")
tpm_gene_log2 <- tpm_gene_log2 %>%
tpm_gene select(-id) %>%
filter(gene %in% c("HLA-DQB1","HLA-DRB1","HLA-DQA1",
"HLA-DRB5","HLA-DRA","HLA-C","HLA-A","HLA-DPB1",
"HLA-DPA1","HLA-DQB2","HLA-B","HLA-DQA2","HLA-DOA",
"HLA-DMA","HLA-DPB2","HLA-DRB6","HLA-DOB",
"HLA-DMB","HLA-E","HLA-F","HLA-G","HLA-H","CIITA","HSPH1","IFNL1",
"IL33","NLRC5","NLRP12","AZU1","CIITA","IFNG","IL10","IL33",
"IL4","JAK2","SIRT1","TLR4","TMEM106A","XBP1")) %>%
select(gene,any_of(dt$sample_barcode))
rownames(tpm_gene) <- tpm_gene$gene
<- tpm_gene %>% select(-gene)
tpm_gene <- t(tpm_gene) %>% as.data.frame()
tpm_gene
$sample <- rownames(tpm_gene)
tpm_gene$CIITA_1 <- tpm_gene$CIITA
tpm_gene$IL33_1 <- tpm_gene$IL33
tpm_gene
<- left_join(tpm_gene,
tpm_dt %>% rename(sample=sample_barcode))
dt
%>% group_by(cancer) %>%
tpm_dt summarise(total_sample=n(),
ecDNA_postive=sum(ecDNA=="yes")) -> tpm_summ
<- tpm_summ %>% arrange(desc(total_sample))
tpm_summ <- tpm_summ %>% filter(ecDNA_postive > 20)
need_cancer <- need_cancer$cancer
need_cancer
<- tpm_dt %>% filter(cancer %in% need_cancer)
tpm_dt <- tpm_dt %>% select(c("CIITA_1","IL33_1"),everything())
tpm_dt
<- tpm_dt %>%
tpm_dt ::pivot_longer(cols = "CIITA_1":"HLA-DMB",names_to = "gene",values_to = "exp")
tidyr$exp <- as.numeric(tpm_dt$exp)
tpm_dt
$cancer <- get_cancer_type(tpm_dt$sample)
tpm_dt$gene <- factor(tpm_dt$gene,levels = c("HLA-A","HLA-B","HLA-C","HLA-E","HLA-F","HLA-G","HLA-H",
tpm_dt"HLA-DQB1","HLA-DRB1","HLA-DQA1",
"HLA-DRB5","HLA-DRA","HLA-DPB1",
"HLA-DPA1","HLA-DQB2","HLA-DQA2","HLA-DOA",
"HLA-DMA","HLA-DPB2","HLA-DRB6","HLA-DOB",
"HLA-DMB","CIITA","HSPH1","IFNL1",
"IL33","NLRC5","NLRP12","AZU1","IFNG","IL10",
"IL4","JAK2","SIRT1","TLR4","TMEM106A","XBP1","CIITA_1","IL33_1"))
ggplot(data=tpm_dt,aes(x=gene,y=exp,fill=ecDNA))+
geom_boxplot()+
stat_compare_means(aes(label = ..p.signif..))+
theme_prism()+
labs(y="log2(TPM + 0.001)")+
theme(axis.text.x = element_text(angle = 45,vjust = 1, hjust = 1))+
theme(axis.title.x = element_blank())+
guides(fill=F)
We also checked the cancer type specific MHC-I/II and related genes expression difference, heatmap color indicates median difference of expression for specific gene in specific cancer type between ecDNA and non-ecDNA samples:
<- matrix(runif(37*7),nrow = 37,ncol = 7)
mat colnames(mat) <- need_cancer
<- as.data.frame(mat)
mat $gene <- levels(tpm_dt$gene)[1:37]
mat
<- tpm_gene %>% select(-CIITA_1,-IL33_1)
tpm_gene $cancer <- get_cancer_type(tpm_gene$sample)
tpm_gene<- left_join(tpm_gene,
tpm_gene %>% rename(sample=sample_barcode))
dt
for (i in colnames(mat)[1:(ncol(mat)-1)]){
for (j in mat$gene){
<- tpm_gene %>%
df select(j,cancer,ecDNA) %>%
filter(cancer == i)
colnames(df)[1] <- "tt"
$tt <- as.numeric(df$tt)
df$gene == j,i] <- median(df[df$ecDNA=="yes",1]) - median(df[df$ecDNA=="no",1])
mat[mat
}
}
rownames(mat) <- mat$gene
<- mat %>% select(-gene)
mat
library(grid)
library(circlize)
= colorRamp2(c(-2, 0, 2), c("blue", "white", "red"))
col_fun ::Heatmap(mat,cluster_rows = F,cluster_columns = F,name = " ",col = col_fun,
ComplexHeatmaprow_split = c(rep(c("MHC-I"),7), rep(c("MHC-II"),15),
rep(c("MHC-I_BIOSYNTHETIC"),6),rep(c("MHC-II_BIOSYNTHETIC"),9))
)
In tumors with ecDNA significantly decreased MHC I genes’ expression is observed, the expression of MHC II genes are even more significantly down-regulated in tumors with ecDNA.
We further quantified the MHC-I/II antigen processing and presentation pathway activities by GSVA, and compared the GSVA values between samples with and without ecDNA:
###gsva score caculate code can be found in code/gsva.R
<- readRDS("../data/GSVA_pathway.rds")
GSVA <- t(GSVA) %>% as.data.frame()
gsva colnames(gsva) <- c("GO MHC-I","GO MHC-II","REACTOME MHC-I","REACTOME MHC-II")
$sample <- rownames(gsva)
gsva<- left_join(gsva,dt %>% rename(sample=sample_barcode))
gsva
<- gsva %>%
gsva_l ::pivot_longer(cols = "GO MHC-I":"REACTOME MHC-II",names_to = "type",values_to = "score")
tidyr<- gsva_l %>%
gsva_l mutate(ecDNA = ifelse(ecDNA == "yes","ecDNA","Non-ecDNA"))
$ecDNA <- factor(gsva_l$ecDNA,levels = c("Non-ecDNA","ecDNA"))
gsva_lggplot(data=gsva_l,aes(x=type,y=score,fill=ecDNA))+
geom_boxplot()+
stat_compare_means(aes(label = ..p.signif..))+
theme_prism()+
theme(axis.title.x = element_blank())+
labs(y="GSVA Score")
Cytoplasmic DNA is known to stimulate immune response through cGAS-STING pathway, and in tumors with ecDNA, this pathway is not over-activated:
##cGAS-STING
<- readRDS("../../tmp/tpm_gene_log2.rds")
tpm_gene_log2 <- tpm_gene_log2 %>%
tpm_gene select(-id) %>%
filter(gene %in% c("XRCC5", "IRF3", "TRIM21", "IFI16", "STAT6", "NLRC3", "DDX41",
"TBK1", "XRCC6", "TREX1", "PRKDC", "MB21D1", "TMEM173")) %>%
select(gene,any_of(dt$sample_barcode))
rownames(tpm_gene) <- tpm_gene$gene
<- tpm_gene %>% select(-gene)
tpm_gene <- t(tpm_gene) %>% as.data.frame()
tpm_gene
$sample <- rownames(tpm_gene)
tpm_gene
<- left_join(tpm_gene,
tpm_dt %>% rename(sample=sample_barcode))
dt
%>% group_by(cancer) %>%
tpm_dt summarise(total_sample=n(),
ecDNA_postive=sum(ecDNA=="yes")) -> tpm_summ
<- tpm_summ %>% arrange(desc(total_sample))
tpm_summ <- tpm_summ %>% filter(ecDNA_postive > 20)
need_cancer <- need_cancer$cancer
need_cancer <- tpm_dt %>% filter(cancer %in% need_cancer)
tpm_dt
<- tpm_dt %>%
tpm_dt ::pivot_longer(cols = "XRCC5":"PRKDC",names_to = "gene",values_to = "exp")
tidyr$exp <- as.numeric(tpm_dt$exp)
tpm_dt$ecDNA <- ifelse(tpm_dt$ecDNA=="yes","ecDNA","Non-ecDNA")
tpm_dt$ecDNA <- factor(tpm_dt$ecDNA,levels = c("Non-ecDNA","ecDNA"))
tpm_dtggplot(data=tpm_dt,aes(x=gene,y=exp,fill=ecDNA))+
geom_boxplot()+
stat_compare_means(aes(label = ..p.signif..))+
theme_prism()+
labs(y="log2(TPM + 0.001)")+
theme(axis.text.x = element_text(angle = 45,vjust = 1, hjust = 1))+
theme(axis.title.x = element_blank())