The help for coord_trans()
explains that scale transformations (e.g., scale_y_log10()
) are performed before statistics are calculated, while coordinate transformations (e.g., coord_trans(y="log10")
) are performed after statistics are calculated.
In your case, this means that with scale_y_log10
the mean and se are being calculated on the log-transformed data, rather than on the original untransformed data. To calculate the statistics on the untransformed data, remove scale_y_log10()
and use coord_trans(y="log10")
.
The example below shows the values that ggplot is calculating internally and then reproduces those values by direct calculation:
library(tidyverse)
pde=1.1 #position dodge for error bars
pdp=0.35 #position dodge for points
p1 = ggplot(data=mtcars, aes(x=vs, y=mpg, colour=factor(am))) +
geom_point(position=position_dodge(width=pdp), size=3) +
stat_summary(fun = "mean", geom="point", size=2, stroke=1.1,
position=position_dodge(width=pde)) +
stat_summary( fun.data = "mean_se", geom = "errorbar",
width=0.15, position=position_dodge(width=pde)) +
theme_bw()
p2 = p1 + scale_y_log10()
# Get data frames for each set of mean/errorbar layers
# that ggplot calculates internally
p1dat = ggplot_build(p1)$data[[3]]
p2dat = ggplot_build(p2)$data[[3]]
p1dat %>% select(y, ymin, ymax)
#> y ymin ymax
#> 1 15.05000 14.24910 15.85090
#> 2 20.74286 19.80888 21.67683
#> 3 19.75000 18.11339 21.38661
#> 4 28.37143 26.57319 30.16967
p2dat %>% select(y, ymin, ymax) %>%
mutate(y.trans = 10^y,
ymax.trans = 10^ymax)
#> y ymin ymax y.trans ymax.trans
#> 1 1.170219 1.145648 1.194790 14.79853 15.65992
#> 2 1.314225 1.294657 1.333793 20.61699 21.56718
#> 3 1.288104 1.252044 1.324165 19.41353 21.09431
#> 4 1.447286 1.418346 1.476226 28.00826 29.93823
Now reproduce those same values by direct calculation:
mtcars %>%
group_by(am, vs) %>%
summarise(mean = mean(mpg),
mean.log = mean(log10(mpg)),
mean.log.trans = 10^mean.log,
mean.plus.se = mean + sqrt(var(mpg)/length(mpg)),
se.log = sqrt(var(log10(mpg))/length(mpg)),
mean.log.plus.se = mean.log + se.log,
mean.log.plus.se.trans = 10^mean.log.plus.se)
#> am vs mean mean.log mean.log.trans mean.plus.se se.log
#> 1 0 0 15.05000 1.170219 14.79853 15.85090 0.02457101
#> 2 0 1 20.74286 1.314225 20.61699 21.67683 0.01956814
#> 3 1 0 19.75000 1.288104 19.41353 21.38661 0.03606088
#> 4 1 1 28.37143 1.447286 28.00826 30.16967 0.02893993
#> mean.log.plus.se mean.log.plus.se.trans
#> 1 1.194790 15.65992
#> 2 1.333793 21.56718
#> 3 1.324165 21.09431
#> 4 1.476226 29.93823
And we can also see that coord_trans(y="log10")
calculates means and error bars before the log transformation:
p3 = p1 + coord_trans(y="log10")
p3dat = ggplot_build(p3)$data[[3]]
p3dat %>% select(y, ymin, ymax)
#> y ymin ymax
#> 1 15.05000 14.24910 15.85090
#> 2 20.74286 19.80888 21.67683
#> 3 19.75000 18.11339 21.38661
#> 4 28.37143 26.57319 30.16967