library(tidyverse)
<- read_csv("scores.csv")
games <- games |> filter(home_team_state == 'WV', visiting_team_state == 'WV') wv_games
Calculate the average differential by year for WV games
<- wv_games |>
wv_by_year group_by(year) |>
summarise(
min_diff = min(differential),
max_diff = max(differential),
avg_diff = mean(differential),
median_diff = median(differential)
)
Make a bar chart of average differential
ggplot() +
geom_bar(data=wv_by_year, aes(x=year, weight=avg_diff)) +
labs(
title="Pulling away",
subtitle = "Average margin of victory is more than 30 points this season",
x="Year",
y="Average Difference"
+ theme_minimal() )
Are there more 35+ margin games?
<- wv_games |>
all_games group_by(year) |>
summarise(all_games = n())
<- wv_games |>
thirtyfive_plus filter(differential >= 35) |>
group_by(year) |>
summarise(thirtyfive_plus_games = n())
<- all_games |>
thirtyfive_with_pct inner_join(thirtyfive_plus) |>
mutate(pct = thirtyfive_plus_games/all_games *100) |>
arrange(desc(pct))
Joining with `by = join_by(year)`
|>
all_games inner_join(thirtyfive_plus) |>
mutate(pct = thirtyfive_plus_games/all_games *100) |>
arrange(desc(pct))
Joining with `by = join_by(year)`
# A tibble: 14 × 4
year all_games thirtyfive_plus_games pct
<dbl> <int> <int> <dbl>
1 2023 499 227 45.5
2 2022 540 217 40.2
3 2016 518 169 32.6
4 2019 545 176 32.3
5 2018 558 171 30.6
6 2015 516 155 30.0
7 2021 545 163 29.9
8 2020 444 125 28.2
9 2014 516 144 27.9
10 2013 524 146 27.9
11 2012 511 131 25.6
12 2010 529 129 24.4
13 2011 512 123 24.0
14 2009 539 127 23.6
ggplot() +
geom_bar(data=thirtyfive_with_pct, aes(x=year, weight=pct)) +
labs(
title="West Virginia High School Football's Lopsided Games",
subtitle = "More than 45 percent of games this season have a margin of 35 or more points, a modern high.",
x="Year",
y="Percent"
+ theme_minimal() )
Teams with largest average margins in 2023
<- wv_games |>
home_margins mutate(home_team_margin = home_team_score - visiting_team_score) |>
group_by(year, home_team) |>
summarize(home_games = n(), home_margin = sum(home_team_margin))
`summarise()` has grouped output by 'year'. You can override using the
`.groups` argument.
<- wv_games |>
visiting_margins mutate(visiting_team_margin = visiting_team_score - home_team_score) |>
group_by(year, visiting_team) |>
summarize(visiting_games = n(), visiting_margin = sum(visiting_team_margin))
`summarise()` has grouped output by 'year'. You can override using the
`.groups` argument.
<- home_margins |> left_join(visiting_margins, join_by(home_team == visiting_team, year == year)) |>
team_margins mutate(home_games = replace_na(home_games, 0)) |>
mutate(visiting_games = replace_na(visiting_games, 0)) |>
mutate(home_margin = replace_na(home_margin, 0)) |>
mutate(visiting_margin = replace_na(visiting_margin, 0)) |>
mutate(total_games = home_games + visiting_games, total_margin = home_margin + visiting_margin) |>
rename(team = home_team) |>
group_by(team, year) |>
summarize(avg_margin = total_margin/total_games) |>
arrange(desc(avg_margin))
`summarise()` has grouped output by 'team'. You can override using the
`.groups` argument.
There are more teams with average margins of 35 points or more this season than any recent year
|>
team_margins filter(avg_margin >= 35) |>
group_by(year) |>
summarise(teams = n()) |>
arrange(desc(teams))
# A tibble: 14 × 2
year teams
<dbl> <int>
1 2023 7
2 2010 5
3 2015 5
4 2009 4
5 2016 4
6 2022 4
7 2013 3
8 2014 3
9 2019 3
10 2021 3
11 2011 2
12 2012 2
13 2018 2
14 2020 2
Teams with the largest average margin of victory in 2023
|>
team_margins filter(avg_margin >= 35, year == 2023)
# A tibble: 7 × 3
# Groups: team [7]
team year avg_margin
<chr> <dbl> <dbl>
1 Huntington 2023 46.3
2 Williamstown 2023 44.3
3 Greenbrier West 2023 42.6
4 Cameron 2023 42
5 Martinsburg 2023 41.2
6 Wahama 2023 38.3
7 Hurricane 2023 35.4
Runaway Games - 49+ point margins
|>
wv_games filter(differential >= 49) |>
group_by(year) |>
summarise(games = n()) |>
arrange(desc(games))
# A tibble: 14 × 2
year games
<dbl> <int>
1 2023 85
2 2022 73
3 2015 54
4 2021 53
5 2016 50
6 2013 49
7 2018 48
8 2019 48
9 2014 44
10 2020 40
11 2010 38
12 2009 37
13 2012 35
14 2011 34
Runaway Games - 70+ point margins
|>
wv_games filter(differential >= 70) |>
group_by(year) |>
summarise(games = n()) |>
arrange(desc(games))
# A tibble: 13 × 2
year games
<dbl> <int>
1 2023 13
2 2014 5
3 2018 5
4 2013 4
5 2019 4
6 2020 4
7 2021 4
8 2015 3
9 2009 2
10 2012 2
11 2010 1
12 2011 1
13 2022 1