In this section, we use epigenomic annotations (ChIP-seq) from CNS cell-types (Nott et al. 2019) to pinpoint which fine-mapped SNPs overlap. We also use microglia ATAC-seq annotations (Kosoy et al. 2022) to pinpoint overlapping SNPs.
NOTE: the fine-mapped SNPs used in this analysis are from the SuSiE method + LD matrices calculated from the EADB GWAS (Bellenguez et al. 2022) data.
We will filter for only fine-mapped variants with PIP >= 0.1 for this analysis to prioritize variants that overlap with epigenomic annotations.
Here, we show the number of fine-mapped SNPs prioritized by SuSiE + GWAS LD with PIP > 0.1 that fall within a credible set.
Figure 2c: In the first plot, we show the number of credible sets per locus. Some loci have multiple credible sets, such as PLCG2 and TREM2, where we show that fine-mapping prioritizes known signals/missense variants that have previously been identified, as well as noncoding variants that fall within a different credible set/signal.
Figure 2d: We determine the number of fine-mapped SNPs that fall in different epigenomic annotations. These include enhancer and promoter marks from microglia, astrocytes, oligodendrocytes, and neurons from the Nott et al. 2019 paper.
We provide a table of variants that fall in a credible set with PIP > 0.1 that fall in an CNS cell-type epigenomic annotation. (Supplementary Table 3).
Figure 2c-d: Here, we subset 12 loci for AD that show overlaps in epigenomic peaks: PLCG2, TREM2, PICALM, BIN1, ECHDC3, EPHA1, ABI3, RASGEF1C, APH1B, SLC24A4, INPP5D, SPI1.
library(readr)
library(dplyr)
library(regioneR)
library(ggplot2)
library(GenomicRanges)
set.seed(252)
thousand_genome_panel_hg38 <- data.frame()
setwd('/Users/ashvinravi/Desktop/finemapping_MPRA_project/thousand_genome_panel_hg38_by_chr/')
chr_panel = list.files('/Users/ashvinravi/Desktop/finemapping_MPRA_project/thousand_genome_panel_hg38_by_chr/')
for (f in chr_panel) {
df = read_tsv(f)
thousand_genome_panel_hg38 <- rbind(df, thousand_genome_panel_hg38)
}
# thousand_genome_panel_hg38 = read_tsv("/Users/ashvinravi/Desktop/finemapping_MPRA_project/")
susie_ld_gwas_0.001 <- susie_ld_gwas_rsids %>% filter(PIP >= 0.001) %>%
mutate(end=start + 1, chrom=paste0('chr', CHR)) %>% select(chrom, start, end, RSID)
susie_ld_gwas_0.01 <- susie_ld_gwas_rsids %>% filter(PIP >= 0.01) %>%
mutate(end=start + 1, chrom=paste0('chr', CHR)) %>% select(chrom, start, end, RSID)
susie_ld_gwas_0.1 <- susie_ld_gwas_rsids %>% filter(PIP >= 0.1) %>%
mutate(end=start + 1, chrom=paste0('chr', CHR)) %>% select(chrom, start, end, RSID)
susie_ld_gwas_0.5 <- susie_ld_gwas_rsids %>% filter(PIP >= 0.5) %>%
mutate(end=start + 1, chrom=paste0('chr', CHR)) %>% select(chrom, start, end, RSID)
microglia_enhancers_GR = toGRanges(data.frame(chr=microglia_enhancer$V1,
start=microglia_enhancer$V2,
end=microglia_enhancer$V3))
microglia_promoters_GR = toGRanges(data.frame(chr=microglia_promoter$V1,
start=microglia_promoter$V2,
end=microglia_promoter$V3))
astrocyte_enhancers_GR = toGRanges(data.frame(chr=astrocyte_enhancer$V1,
start=astrocyte_enhancer$V2,
end=astrocyte_enhancer$V3))
astrocyte_promoters_GR = toGRanges(data.frame(chr=astrocyte_promoter$V1,
start=astrocyte_promoter$V2,
end=astrocyte_promoter$V3))
neuronal_enhancers_GR = toGRanges(data.frame(chr=neuronal_enhancer$V1,
start=neuronal_enhancer$V2,
end=neuronal_enhancer$V3))
neuronal_promoters_GR = toGRanges(data.frame(chr=neuronal_promoter$V1,
start=neuronal_promoter$V2,
end=neuronal_promoter$V3))
oligo_enhancers_GR = toGRanges(data.frame(chr=oligodendrocyte_enhancer$V1,
start=oligodendrocyte_enhancer$V2,
end=oligodendrocyte_enhancer$V3))
oligo_promoters_GR = toGRanges(data.frame(chr=oligodendrocyte_promoter$V1,
start=oligodendrocyte_promoter$V2,
end=oligodendrocyte_promoter$V3))
susie_ld_gwas_0.001_GR = toGRanges(data.frame(chr=susie_ld_gwas_0.001$chrom,
start=susie_ld_gwas_0.001$start,
end=susie_ld_gwas_0.001$end))
susie_ld_gwas_0.01_GR = toGRanges(data.frame(chr=susie_ld_gwas_0.01$chrom,
start=susie_ld_gwas_0.01$start,
end=susie_ld_gwas_0.01$end))
susie_ld_gwas_0.1_GR = toGRanges(data.frame(chr=susie_ld_gwas_0.1$chrom,
start=susie_ld_gwas_0.1$start,
end=susie_ld_gwas_0.1$end))
susie_ld_gwas_0.5_GR = toGRanges(data.frame(chr=susie_ld_gwas_0.5$chrom,
start=susie_ld_gwas_0.5$start,
end=susie_ld_gwas_0.5$end))
ME_overlap_0.001 = numOverlaps(susie_ld_gwas_0.001_GR, microglia_enhancers_GR)
ME_overlap_0.01 = numOverlaps(susie_ld_gwas_0.01_GR, microglia_enhancers_GR)
ME_overlap_0.1 = numOverlaps(susie_ld_gwas_0.1_GR, microglia_enhancers_GR)
ME_overlap_0.5 = numOverlaps(susie_ld_gwas_0.5_GR, microglia_enhancers_GR)
calculate_random_overlap <- function(input_dataset) {
ME_randomized_overlap = c()
MP_randomized_overlap = c()
AE_randomized_overlap = c()
AP_randomized_overlap = c()
NE_randomized_overlap = c()
NP_randomized_overlap = c()
OE_randomized_overlap = c()
OP_randomized_overlap = c()
print("Generating random regions...")
for (x in 1:5000) {
# Sample Random gwas_snps from panel from 1000G panel.
randomized_snps <- thousand_genome_panel_hg38 %>%
slice_sample(n = length(input_dataset), replace=T) %>%
ungroup()
if (x %% 100 == 0) {
print(x)
}
randomized_snps_GR = toGRanges(data.frame(chr=randomized_snps$chrom,
start=randomized_snps$start,
end=as.numeric(randomized_snps$end)))
ME_randomized_overlap = c(ME_randomized_overlap, numOverlaps(randomized_snps_GR, microglia_enhancers_GR))
MP_randomized_overlap = c(MP_randomized_overlap, numOverlaps(randomized_snps_GR, microglia_promoters_GR))
AE_randomized_overlap = c(AE_randomized_overlap, numOverlaps(randomized_snps_GR, astrocyte_enhancers_GR))
AP_randomized_overlap = c(AP_randomized_overlap, numOverlaps(randomized_snps_GR, astrocyte_promoters_GR))
NE_randomized_overlap = c(NE_randomized_overlap, numOverlaps(randomized_snps_GR, neuronal_enhancers_GR))
NP_randomized_overlap = c(NP_randomized_overlap, numOverlaps(randomized_snps_GR, neuronal_promoters_GR))
OE_randomized_overlap = c(OE_randomized_overlap, numOverlaps(randomized_snps_GR, oligo_enhancers_GR))
OP_randomized_overlap = c(OP_randomized_overlap, numOverlaps(randomized_snps_GR, oligo_promoters_GR))
}
overlap_table = cbind(ME_randomized_overlap, MP_randomized_overlap,
AE_randomized_overlap, AP_randomized_overlap,
NE_randomized_overlap, NP_randomized_overlap,
OE_randomized_overlap, OP_randomized_overlap)
return(overlap_table)
}
randomized_overlap_0.001 = calculate_random_overlap(susie_ld_gwas_0.001_GR)
randomized_overlap_0.01 = calculate_random_overlap(susie_ld_gwas_0.01_GR)
randomized_overlap_0.1 = calculate_random_overlap(susie_ld_gwas_0.1_GR)
randomized_overlap_0.5 = calculate_random_overlap(susie_ld_gwas_0.5_GR)
ME_enrichment = c(numOverlaps(susie_ld_gwas_0.001_GR, microglia_enhancers_GR) / mean(randomized_overlap_0.001[,1]),
numOverlaps(susie_ld_gwas_0.01_GR, microglia_enhancers_GR) / mean(randomized_overlap_0.01[,1]),
numOverlaps(susie_ld_gwas_0.1_GR, microglia_enhancers_GR) / mean(randomized_overlap_0.1[,1]),
numOverlaps(susie_ld_gwas_0.5_GR, microglia_enhancers_GR) / mean(randomized_overlap_0.5[,1]) )
MP_enrichment = c(numOverlaps(susie_ld_gwas_0.001_GR, microglia_promoters_GR) / mean(randomized_overlap_0.001[,2]),
numOverlaps(susie_ld_gwas_0.01_GR, microglia_promoters_GR) / mean(randomized_overlap_0.01[,2]),
numOverlaps(susie_ld_gwas_0.1_GR, microglia_promoters_GR) / mean(randomized_overlap_0.1[,2]),
numOverlaps(susie_ld_gwas_0.5_GR, microglia_promoters_GR) / mean(randomized_overlap_0.5[,2]) )
AE_enrichment = c(numOverlaps(susie_ld_gwas_0.001_GR, astrocyte_enhancers_GR) / mean(randomized_overlap_0.001[,3]),
numOverlaps(susie_ld_gwas_0.01_GR, astrocyte_enhancers_GR) / mean(randomized_overlap_0.01[,3]),
numOverlaps(susie_ld_gwas_0.1_GR, astrocyte_enhancers_GR) / mean(randomized_overlap_0.1[,3]),
numOverlaps(susie_ld_gwas_0.5_GR, astrocyte_enhancers_GR) / mean(randomized_overlap_0.5[,3]) )
AP_enrichment = c(numOverlaps(susie_ld_gwas_0.001_GR, astrocyte_promoters_GR) / mean(randomized_overlap_0.001[,4]),
numOverlaps(susie_ld_gwas_0.01_GR, astrocyte_promoters_GR) / mean(randomized_overlap_0.01[,4]),
numOverlaps(susie_ld_gwas_0.1_GR, astrocyte_promoters_GR) / mean(randomized_overlap_0.1[,4]),
numOverlaps(susie_ld_gwas_0.5_GR, astrocyte_promoters_GR) / mean(randomized_overlap_0.5[,4]) )
NE_enrichment = c(numOverlaps(susie_ld_gwas_0.001_GR, neuronal_enhancers_GR) / mean(randomized_overlap_0.001[,5]),
numOverlaps(susie_ld_gwas_0.01_GR, neuronal_enhancers_GR) / mean(randomized_overlap_0.01[,5]),
numOverlaps(susie_ld_gwas_0.1_GR, neuronal_enhancers_GR) / mean(randomized_overlap_0.1[,5]),
numOverlaps(susie_ld_gwas_0.5_GR, neuronal_enhancers_GR) / mean(randomized_overlap_0.5[,5]) )
NP_enrichment = c(numOverlaps(susie_ld_gwas_0.001_GR, neuronal_promoters_GR) / mean(randomized_overlap_0.001[,6]),
numOverlaps(susie_ld_gwas_0.01_GR, neuronal_promoters_GR) / mean(randomized_overlap_0.01[,6]),
numOverlaps(susie_ld_gwas_0.1_GR, neuronal_promoters_GR) / mean(randomized_overlap_0.1[,6]),
numOverlaps(susie_ld_gwas_0.5_GR, neuronal_promoters_GR) / mean(randomized_overlap_0.5[,6]) )
OE_enrichment = c(numOverlaps(susie_ld_gwas_0.001_GR, neuronal_promoters_GR) / mean(randomized_overlap_0.001[,7]),
numOverlaps(susie_ld_gwas_0.01_GR, neuronal_promoters_GR) / mean(randomized_overlap_0.01[,7]),
numOverlaps(susie_ld_gwas_0.1_GR, neuronal_promoters_GR) / mean(randomized_overlap_0.1[,7]),
numOverlaps(susie_ld_gwas_0.5_GR, neuronal_promoters_GR) / mean(randomized_overlap_0.5[,7]) )
OP_enrichment = c(numOverlaps(susie_ld_gwas_0.001_GR, oligo_promoters_GR) / mean(randomized_overlap_0.001[,8]),
numOverlaps(susie_ld_gwas_0.01_GR, oligo_promoters_GR) / mean(randomized_overlap_0.01[,8]),
OP_enrichment_0.1 = numOverlaps(susie_ld_gwas_0.1_GR, oligo_promoters_GR) / mean(randomized_overlap_0.1[,8]),
OP_enrichment_0.5 = numOverlaps(susie_ld_gwas_0.5_GR, oligo_promoters_GR) / mean(randomized_overlap_0.5[,8]) )
enrichment_table = cbind(ME_enrichment, MP_enrichment, AE_enrichment, AP_enrichment, NE_enrichment, NP_enrichment, OE_enrichment, OP_enrichment)
rownames(enrichment_table) = c('0.001', '0.01', '0.1', '0.5')
enrichment_plot_table = melt(enrichment_table)
enrichment_plot_table$cell_type <- 'Microglia'
enrichment_plot_table$cell_type[grepl('A', enrichment_plot_table$X2)] <- 'Astrocyte'
enrichment_plot_table$cell_type[grepl('O', enrichment_plot_table$X2)] <- 'Oligodendrocyte'
enrichment_plot_table$cell_type[grepl('N', enrichment_plot_table$X2)] <- 'Neuron'
# Create the line plot
enrichment_plot <- ggplot(enrichment_plot_table, aes(x = X1, y = value, color = X2, group = X2)) +
geom_line() + # Optional: to add points at each data value
labs(title = "Enrichment Levels", x = "Enrichment Level", y = "Value") +
scale_x_log10() +
theme_bw()Figure 2e: Finally, let’s see how enriched fine-mapped SNPs are in CNS cell types as we increase the posterior probability threshold. As we increase the PIP threshold, we should see that the enrichment for microglia increases, as opposed to the other cell types.
For this analysis, we calculate the proportion of fine-mapped variants that overlap promoters and enhancers in microglia, astrocytes, oligodendrocytes, and neurons over 100 thresholds from 0.0001 to 0.99. We report the fraction of variants overlapping each annotation over these 100 thresholds.
library(readr)
library(dplyr)
library(regioneR)
library(ggplot2)
library(GenomicRanges)
PIP_thresholds <- exp(seq(log(0.0001), log(0.99), length.out = 100))
ME_percent_overlap = c()
MP_percent_overlap = c()
AE_percent_overlap = c()
AP_percent_overlap = c()
NE_percent_overlap = c()
NP_percent_overlap = c()
OE_percent_overlap = c()
OP_percent_overlap = c()
microglia_enhancers_GR = toGRanges(data.frame(chr=microglia_enhancer$V1,
start=microglia_enhancer$V2,
end=microglia_enhancer$V3))
microglia_promoters_GR = toGRanges(data.frame(chr=microglia_promoter$V1,
start=microglia_promoter$V2,
end=microglia_promoter$V3))
astrocyte_enhancers_GR = toGRanges(data.frame(chr=astrocyte_enhancer$V1,
start=astrocyte_enhancer$V2,
end=astrocyte_enhancer$V3))
astrocyte_promoters_GR = toGRanges(data.frame(chr=astrocyte_promoter$V1,
start=astrocyte_promoter$V2,
end=astrocyte_promoter$V3))
neuronal_enhancers_GR = toGRanges(data.frame(chr=neuronal_enhancer$V1,
start=neuronal_enhancer$V2,
end=neuronal_enhancer$V3))
neuronal_promoters_GR = toGRanges(data.frame(chr=neuronal_promoter$V1,
start=neuronal_promoter$V2,
end=neuronal_promoter$V3))
oligo_enhancers_GR = toGRanges(data.frame(chr=oligodendrocyte_enhancer$V1,
start=oligodendrocyte_enhancer$V2,
end=oligodendrocyte_enhancer$V3))
oligo_promoters_GR = toGRanges(data.frame(chr=oligodendrocyte_promoter$V1,
start=oligodendrocyte_promoter$V2,
end=oligodendrocyte_promoter$V3))
for (threshold in PIP_thresholds) {
susie_ld_gwas_filtered <- susie_ld_gwas_rsids %>% filter(PIP >= threshold) %>%
mutate(end=start + 1, chrom=paste0('chr', CHR)) %>% select(chrom, start, end, RSID)
susie_ld_gwas_filtered_GR = toGRanges(data.frame(chr=susie_ld_gwas_filtered$chrom,
start=susie_ld_gwas_filtered$start,
end=susie_ld_gwas_filtered$end))
ME_percent_overlap = c(ME_percent_overlap, numOverlaps(susie_ld_gwas_filtered_GR, microglia_enhancers_GR) / length(susie_ld_gwas_filtered_GR))
MP_percent_overlap = c(MP_percent_overlap, numOverlaps(susie_ld_gwas_filtered_GR, microglia_promoters_GR) / length(susie_ld_gwas_filtered_GR))
AE_percent_overlap = c(AE_percent_overlap, numOverlaps(susie_ld_gwas_filtered_GR, astrocyte_enhancers_GR) / length(susie_ld_gwas_filtered_GR))
AP_percent_overlap = c(AP_percent_overlap, numOverlaps(susie_ld_gwas_filtered_GR, astrocyte_promoters_GR) / length(susie_ld_gwas_filtered_GR))
NE_percent_overlap = c(NE_percent_overlap, numOverlaps(susie_ld_gwas_filtered_GR, neuronal_enhancers_GR) / length(susie_ld_gwas_filtered_GR))
NP_percent_overlap = c(NP_percent_overlap, numOverlaps(susie_ld_gwas_filtered_GR, neuronal_promoters_GR) / length(susie_ld_gwas_filtered_GR))
OE_percent_overlap = c(OE_percent_overlap, numOverlaps(susie_ld_gwas_filtered_GR, oligo_enhancers_GR) / length(susie_ld_gwas_filtered_GR))
OP_percent_overlap = c(OP_percent_overlap, numOverlaps(susie_ld_gwas_filtered_GR, oligo_promoters_GR) / length(susie_ld_gwas_filtered_GR))
}
percent_overlap = cbind(ME_percent_overlap, MP_percent_overlap,
AE_percent_overlap, AP_percent_overlap,
NE_percent_overlap, NP_percent_overlap,
OE_percent_overlap, OP_percent_overlap)
percent_overlap_table = melt(percent_overlap)
duplicate_PIP_thresholds = rep(PIP_thresholds, times = 8)
percent_overlap_table = cbind(percent_overlap_table, duplicate_PIP_thresholds)
library(scales)
# Create the line plot
percent_overlap_plot <- ggplot(percent_overlap_table, aes(x = duplicate_PIP_thresholds, y = value * 100, color = X2, group = X2)) +
geom_line() + # Optional: to add points at each data value
labs(title = "Enrichment Levels", x = "\nEnrichment Level", y = "Value\n") +
scale_color_manual(values = c("MP_percent_overlap" = "#419d5d", "ME_percent_overlap" = "#74C476",
"AP_percent_overlap" = "#B30000", "AE_percent_overlap" = "#ff2626",
"NP_percent_overlap" = "#084594", "NE_percent_overlap" = "#6BAED6",
"OP_percent_overlap" = "#FF8A28", "OE_percent_overlap" = "#FDAE6B")) +
scale_x_log10(labels = label_number()) +
labs(x = "\nPosterior Probability Threshold", y = "Fine-mapped Variants overlapping \nCNS Promoter/Enhancer Marks (%)\n") +
ylim(NA, 40) +
geom_hline(yintercept = 0, color = "black", linetype = "dashed") +
theme_bw() +
theme(
axis.title = element_text(size = 20, color = "black"),
axis.text = element_text(size = 16, color = "black"),
)
print(percent_overlap_plot)