library(tidyverse)
df1 <- read_tsv("~/Downloads/dat.txt")
ggplot(df1, aes(val)) +
stat_ecdf(geom="point", aes(colour = interaction(var1, var2)))
ggplot(df1, aes(val)) +
stat_ecdf(geom="point", aes(colour = var2)) +
facet_grid(. ~ var1)
ggplot(df1, aes(val)) +
stat_ecdf(geom="point", aes(colour = var1)) +
facet_grid(. ~ var2)
ggplot(df1, aes(val)) +
stat_ecdf(geom="point") +
facet_grid(var1 ~ var2)
--
--
You received this message because you are subscribed to the ggplot2 mailing list.
Please provide a reproducible example: https://github.com/hadley/devtools/wiki/Reproducibility
To post: email ggp...@googlegroups.com
To unsubscribe: email ggplot2+u...@googlegroups.com
More options: http://groups.google.com/group/ggplot2
---
You received this message because you are subscribed to the Google Groups "ggplot2" group.
To unsubscribe from this group and stop receiving emails from it, send an email to ggplot2+u...@googlegroups.com.
For more options, visit https://groups.google.com/d/optout.
A more robust solution using order and not sort inside the ecdf function ( which should correctly reorder all the df according to value in the correct order ) . Apologies for the mistake 😝
# Session ! Simple Analysis Script
library(tidyverse)
library(scales)
library(stringr)
library(extrafont)
# Read the source file
source_file_path<- paste('….path…. /dat.txt', sep = "")
source_data <- read.csv(file = source_file_path, sep = "\t")
source_data <- source_data %>% mutate_if(is.character, trimws )
source_data$unique_group <- interaction(source_data$var1, source_data$var2, sep = "_")
source_data <- source_data %>%
group_by(var1) %>%
mutate(count = n()) %>%
filter(count > 10 )
data_and_ecdf<- lapply(split(source_data,source_data$var1, drop = T), function(x){
x<- x[order(x$val, decreasing = F), ]
#xx <- sort(x$val) # x$folding
x$ecdf <- 1:length(x$val) / length(x$val)
#x$val <- xx
return(x)
} )
data_and_ecdf <- bind_rows(data_and_ecdf)
ecdf_overlap <- ggplot(data = data_and_ecdf, mapping = aes(x= val, y=ecdf, group= var1, colour= var2, shape=var1 ))+
geom_line(size=1)+
scale_shape_manual(values = c(5,15))+
geom_point(size=2.5)+
scale_color_viridis_d()+
theme_bw()
{windows(height = 15, width = 15)
ecdf_overlap
}
From: marios...@gmail.com <marios...@gmail.com>
Sent: Saturday, January 5, 2019 3:07 PM
To: 'Maria Lathouri' <mlat...@yahoo.gr>; 'Brian Shine' <brians...@gmail.com>
Subject: RE: ecdf different colour points and fit line ggplot
Kalimera Maria, all,
From your email what I understand is that you want to calculate the ecdf of your populations according to var1 group.
then you want to identify where these quantiles lie in your ecdf. if there's more then you actually need to be carefull cause you have too few obervations in var2 to actually think about making a statistic.
Now then under these assumptions, just manually calculate ecdf, which be definition is a cummulative sum of the ordered observations. Here's a code that solves your problem in the way i understood it.
There’s an even more neat solution to calculate the function using dplyr , the group_by, and do commands, I’ll let you crack your head around it.
A picture of hoz it looks like below.
Best,
Marios
P.S. Don’t try to use the high level functions before you understand the math behind, it just makes solving the problem that much more complicated.
#-------------------------------------------------------------------------------------------------------------------------------------
Remember to paste the right path in this line below
source_file_path<- paste('put your path here /dat.txt', sep = "")
# -------------------------------------------------------------------------------------------------------------------------------------------------
library(tidyverse)
library(scales)
library(stringr)
library(extrafont)
# Read the source file
source_file_path<- paste('put your path here /dat.txt', sep = "")
source_data <- read.csv(file = source_file_path, sep = "\t")
source_data <- source_data %>% mutate_if(is.character, trimws )
source_data$unique_group <- interaction(source_data$var1, source_data$var2, sep = "_")
source_data <- source_data %>%
group_by(var1) %>%
mutate(count = n()) %>%
filter(count > 10 )
data_and_ecdf<- lapply(split(source_data,source_data$var1, drop = T), function(x){
xx <- sort(x$val)
x$ecdf <- 1:length(xx) / length(xx)
x$val <- xx
return(x)
} )
data_and_ecdf <- bind_rows(data_and_ecdf)
ecdf_overlap <- ggplot(data = data_and_ecdf, mapping = aes(x= val, y=ecdf, group= var1, colour= var2, shape=var1 ))+
geom_line(size=1)+
scale_shape_manual(values = c(5,15))+
geom_point(size=2.5)+
scale_color_viridis_d()+
theme_bw()
{windows(height = 15, width = 15)
ecdf_overlap