For your first question, can you be a bit more specific on what is wrong with the normalized matrix? One thing to check - the input to the cicero pipeline (i.e. the input for make_cicero_cds) should be unnormalized counts. If you're converting from a Seurat object, I would definitely double check that it's not being pre-normalized.
For the second, I've dug up the original code for that figure (pasted below). You'll need to do a bit of rearranging etc to get it work, but the main inputs are the gencode GTF that you can get on their website and the phastCons that I'm sure I downloaded from UCSC genome browser.
#bedtools map -f 0.5 -a Projects/HSMM_sci_atac/peak_compare/HSMM_final_peaks.bed -b phastCons46wayPlacental.bed -c 4,4 -o mean,count > phastCons46wayPlacental_by_peak.txt
#awk 'BEGIN {OFS="\t"}; {print $1, $4, $5, $7, $18, $3}' genomes/human/hg19/transcriptome/GENCODE/version17/gencode_fixed_annotation/gencode.v17.annotation.gtf | grep 'CDS' - | bedtools sort -i - > gencodev17_CDS.bed
#bedtools map -f 0.5 -a gencodev17_CDS.bed -b phastCons46wayPlacental.bed -c 4,4 -o mean,count > phastCons46wayPlacental_by_gene.txt
gene_cons <- read.table("data/conservation/phastCons46wayPlacental_by_gene.txt")
gene_cons <- subset(gene_cons, V7 == "CDS")
gene_cons$V8 <- as.numeric(as.character(gene_cons$V8))
gene_cons <- ddply(gene_cons, .(V5), function(x) data.frame(mean_phastCons = mean(x$V8, na.rm=TRUE)))
cons <- read.table("data/conservation/phastCons46wayPlacental_by_peak.txt")
cons$peak <- paste(cons$V1, cons$V2, cons$V3, sep="_")
row.names(cons) <- cons$peak
cons$V1 <- NULL
cons$V2 <- NULL
cons$V3 <- NULL
cons$peak <- NULL
names(cons) <- c("phastCons", "phastCons_num")
cons$phastCons[cons$phastCons == "."] <- NA
cons$phastCons <- as.numeric(as.character(cons$phastCons))
load("pro_gene_list")
pro_gene_list <- subset(pro_gene_list, !
is.na( promoter_gene))
pro_gene_list <- merge(pro_gene_list, gene_cons, by.x="promoter_gene", by.y="V5", all.x=TRUE)
FIGURE 3B
enh_pro <- subset(cons_info, !
is.na(corr) & corr >= 0 & !
is.na(promoter_gene_1) &
is.na(promoter_gene_2))
enh_pro <- merge(enh_pro, cons, by.x="Peak2", by.y=0)
enh_pro$corr_cut <- cut(enh_pro$corr, breaks = c(-1, 0, .1, .2, .3, 1), labels = c(0, .1, .2, .3, ">0.3"))
enh_pro$dist_cut <- cut(x = enh_pro$dist, right=FALSE, breaks = seq(from=0, to=500000, by=50000), labels = seq(from=10000, to=500000, by=50000))
for_plot <- Rmisc::summarySE(subset(enh_pro, !
is.na(phastCons)), measurevar = "phastCons", groupvars=c("corr_cut","dist_cut") )
pdf("figs/Fig3_conservation_by_corr.pdf", width=1.5, height=1.2)
ggplot(aes(as.numeric(as.character(dist_cut))/1000,(phastCons),color=corr_cut, group = corr_cut), data=for_plot) +
geom_point(size=0.1, position = position_dodge(10)) +
geom_line(size = .3, position = position_dodge(10)) +
geom_errorbar(aes(ymin=phastCons-ci, ymax=phastCons+ci), size=.2, width=50, position = position_dodge(10)) + #, width=1, size=.2) +
#scale_x_log10() + #annotation_logticks() +
#scale_color_manual(name="", values=c("#29B270", "#1A6AAF", "#9147E0", "#240115", "#C33C54")) +
scale_color_manual(name="Coaccessibility\nscore", values=RColorBrewer::brewer.pal(name="Spectral", 11)[c(7:11, 1)]) +
#theme(legend.position = "top") +
ylim(c(100,140)) +
monocle:::monocle_theme_opts() +
labs(x="Distance between pairs (Kb)", y="Mean conservation\nscore") +
#theme(legend.position="none") +
guides(color = guide_legend(keywidth = .6, keyheight = .4)) +
theme(text = element_text(size=6)) +
theme(legend.position="none")
#scale_color_manual(values=c("#40476D", "#AD5D4E"))
dev.off()
pdf("figs/Fig3_conservation_by_corr_wl.pdf", width=1.5, height=1.5)
ggplot(aes(as.numeric(as.character(dist_cut))/1000,(phastCons),color=corr_cut, group = corr_cut), data=for_plot) +
geom_point(size=.2) +
geom_line(size=.5)+
geom_errorbar(aes(ymin=phastCons-ci, ymax=phastCons+ci), width=.2, size=.2) +
#scale_x_log10() + #annotation_logticks() +
#scale_color_manual(name="", values=c("#29B270", "#1A6AAF", "#9147E0", "#240115", "#C33C54")) +
scale_color_manual(name="Coaccessibility\nscore", values=RColorBrewer::brewer.pal(name="Spectral", 11)[c(7:11, 1)]) +
#theme(legend.position = "top") +
ylim(c(100,140)) +
monocle:::monocle_theme_opts() +
labs(x="Distance between pairs (Kb)", y="Mean phastCons score") +
#theme(legend.position="none") +
guides(color = guide_legend(keywidth = .6, keyheight = .4)) +
theme(text = element_text(size=6))
#scale_color_manual(values=c("#40476D", "#AD5D4E"))
dev.off()
FIGURE 3C
enh_pro <- merge(enh_pro, pro_gene_list, by.x="Peak1", by.y="site_name", all.x=TRUE)
#enh_pro$dist_quart <- ntile(enh_pro$dist, 4)
enh_pro$gene_phast_cut <- cut(x = enh_pro$mean_phastCons, right=FALSE, breaks = c(0, 100, 200, 300, 400, 500, 1500) , labels = c(100, 200, 300, 400, 500, ">500"))
enh_pro$enh_phast_cut <- cut(x = enh_pro$phastCons, right=FALSE, breaks = c(0, 100, 200, 300, 400, 500, 1500) , labels = c(100, 200, 300, 400, 500, ">500"))
for_plot <- Rmisc::summarySE(subset(enh_pro, !
is.na(phastCons)), measurevar = "phastCons", groupvars=c("corr_cut","gene_phast_cut") )
pdf("figs/Fig3_cons_by_cor_gene_phast_dist.pdf", width=1.5, height=1.2)
ggplot(aes(gene_phast_cut,(phastCons),color=corr_cut, group = corr_cut), data=subset(for_plot, !
is.na(gene_phast_cut))) +
geom_point(size=0.1, position = position_dodge(.1)) +
geom_line(size = .3, position = position_dodge(.1)) +
geom_errorbar(aes(ymin=phastCons-ci, ymax=phastCons+ci), size=.2, width=1, position = position_dodge(.1)) +
scale_color_manual(name="Coaccessibility\nscore", values=RColorBrewer::brewer.pal(name="Spectral", 11)[c(7:11, 1)]) +
monocle:::monocle_theme_opts() +
labs(x="Gene conservation", y="Mean enhancer\nconservation") +
guides(color = guide_legend(keywidth = .6, keyheight = .4)) +
theme(text = element_text(size=6))+
theme(legend.position="none")
dev.off()