Here’s what we want to accomplish in this class:
- Finish discussion of data wrangling/factors [slides]. 1, Discuss the
intuition and formulas for avg and SD.
group_by()
/summarize()
[slides].
- Discuss normal approximation [slides]
- Discuss homework.
# load packages
library(tidyverse)
library(kableExtra)
# load an example dataset
data(gapminder, package = "gapminder")
# quick look at the example dataset
glimpse(gapminder)
## Rows: 1,704
## Columns: 6
## $ country <fct> "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", …
## $ continent <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, …
## $ year <int> 1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, …
## $ lifeExp <dbl> 28.801, 30.332, 31.997, 34.020, 36.088, 38.438, 39.854, 40.8…
## $ pop <int> 8425333, 9240934, 10267083, 11537966, 13079460, 14880372, 12…
## $ gdpPercap <dbl> 779.4453, 820.8530, 853.1007, 836.1971, 739.9811, 786.1134, …
# find the average lifeExp for each continent in 2007
smry <- gapminder %>%
filter(year == 2007) %>%
group_by(continent) %>%
summarize(avg_life_exp = mean(lifeExp)) %>%
glimpse()
## Rows: 5
## Columns: 2
## $ continent <fct> Africa, Americas, Asia, Europe, Oceania
## $ avg_life_exp <dbl> 54.80604, 73.60812, 70.72848, 77.64860, 80.71950
# make a table
smry %>%
mutate(avg_life_exp = round(avg_life_exp)) %>%
rename(Continent = continent, `Average Life Expectancy` = avg_life_exp) %>%
kable()
Continent
|
Average Life Expectancy
|
Africa
|
55
|
Americas
|
74
|
Asia
|
71
|
Europe
|
78
|
Oceania
|
81
|
# find the average lifeExp for each continent and year
smry <- gapminder %>%
group_by(continent, year) %>%
summarize(avg_life_exp = mean(lifeExp)) %>%
glimpse()
## `summarise()` has grouped output by 'continent'. You can override using the
## `.groups` argument.
## Rows: 60
## Columns: 3
## Groups: continent [5]
## $ continent <fct> Africa, Africa, Africa, Africa, Africa, Africa, Africa, A…
## $ year <int> 1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 199…
## $ avg_life_exp <dbl> 39.13550, 41.26635, 43.31944, 45.33454, 47.45094, 49.5804…
# make a table
smry %>%
mutate(avg_life_exp = round(avg_life_exp)) %>%
pivot_wider(names_from = year, values_from = avg_life_exp) %>%
rename(Continent = continent) %>%
kable()
Continent
|
1952
|
1957
|
1962
|
1967
|
1972
|
1977
|
1982
|
1987
|
1992
|
1997
|
2002
|
2007
|
Africa
|
39
|
41
|
43
|
45
|
47
|
50
|
52
|
53
|
54
|
54
|
53
|
55
|
Americas
|
53
|
56
|
58
|
60
|
62
|
64
|
66
|
68
|
70
|
71
|
72
|
74
|
Asia
|
46
|
49
|
52
|
55
|
57
|
60
|
63
|
65
|
67
|
68
|
69
|
71
|
Europe
|
64
|
67
|
69
|
70
|
71
|
72
|
73
|
74
|
74
|
76
|
77
|
78
|
Oceania
|
69
|
70
|
71
|
71
|
72
|
73
|
74
|
75
|
77
|
78
|
80
|
81
|
Carlisle Rainey