ggplot(data = surveys,
mapping = aes(x = species, y = weight)) +
geom_boxplot(outliers = FALSE) +
geom_jitter(color = "steel blue", alpha = 0.08) +
labs(title = "Distribution of Weight by Species Boxplot",
subtitle = "Vertical Orientation",
x = "Species Name", y = "Weight (g)") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45),
axis.ticks.length.x = unit(8, "mm"),
axis.ticks.x = element_line(color = "white"),
panel.border = element_rect(color = "black", linewidth = 1),
plot.title = element_text(face = "bold"),
plot.subtitle = element_text(color = "steelblue"),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank())
Gallary of Exemplary Student Work
Challenge 2
Here are some of the exemplary assignments from your peers!
Themes
This code is brought to us by Jonathan Flores! I really appreciated the extensive themes they included and how these themes make the plot clearer to the reader.
Unique Colors
This code is brought to us by Fisher Fraley! I really appreciated how they found a unique color theme from a context they liked!
# Color Palette: https://color.adobe.com/TRON-color-theme-6970180/
<- c("#DF7212", "#6FC3DF", "#E6FFFF", "#FFE64D", "#0C141F")
tron_colors
ggplot(data = surveys, aes(x = weight, y = species, color = sex)) +
geom_boxplot(outlier.shape = NA, width = 0.6) +
geom_jitter(width = 0.15, height = 0, alpha = 0.3, size = 1) +
scale_color_manual(values = tron_colors) +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
labs(
title = "Rodent Weight by Species",
subtitle = "Y-Axis = Rodent Species",
x = "Rodent Weight (g)",
y = NULL,
caption = "Source: Portal Project Database"
)
Removing the Legend (and More)
This code is brought to us by Lucy Thackray! I thought it was special that Lucy stacked all three challenged to make a clean, easy to read plot.
<- c("#f07857", "#4fb06d" )
colors
ggplot(data = surveys,
mapping = aes(x = weight, y = species)) +
geom_jitter( alpha = 0.5) +
geom_boxplot(outliers = FALSE, mapping = aes(color = sex)) +
labs(x = "Weight (g)", y = "Species", color = "Sex",
subtitle = "<span style = 'color:#f07857;'>Female</span> vs <span style = 'color:#4fb06d;'>Male</span> rodent species weights") +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
theme(
legend.position = "none",
plot.subtitle = element_markdown()) +
theme(axis.ticks.y = element_blank()) +
scale_color_manual(values = colors)
Lab 3
across()
+ mutate()
This code is brought to us by a student who wishes to remain anonymous! What I appreciated about this code was the use of across()
when converting multiple columns to numeric. I also loved their use of the most current syntax for the across()
function!
<- teacher_evals |>
teacher_evals_clean rename(sex = gender) |>
mutate(teacher_id = as.character(teacher_id),
question_no = as.character(question_no),
across(
.cols = c(no_participants,
resp_share,
SET_score_avg,
percent_failed_cur,
seniority),.fns = ~ as.numeric(.x)
)|>
)filter(no_participants > 10) |>
select(course_id,
teacher_id,
question_no,
no_participants,
resp_share,
SET_score_avg,
percent_failed_cur,
academic_degree,
seniority, sex)
Confirming Unique Row Identification
This code is brought to us by Akshat Khandelwal! I thought it was very clever how Akshat played with different combinations of variables to confirm which combination uniquely identified a row.
<- teacher_evals |>
teacher_question distinct(teacher_id, question_no)
print(nrow(teacher_question) == nrow(teacher_evals)) # False
[1] FALSE
<- teacher_evals |>
teacher_question_course distinct(teacher_id, question_no, course_id)
print(nrow(teacher_question_course) == nrow(teacher_evals)) # True
[1] TRUE
<- teacher_evals |>
course_question distinct(question_no, course_id)
print(nrow(course_question) == nrow(teacher_evals)) # False
[1] FALSE
Proportions in Demographic Table
This code is brought to us by Arturo Ordaz-Gutierrez! I really appreciated how Art added the proportions to this demographic table so it is easier to see the representation of each group.
|>
teacher_evals_clean distinct(teacher_id, .keep_all = TRUE) |>
count(academic_degree, sex) |>
mutate(percent = round(100 * n / sum(n), 1)) |>
arrange(desc(percent))
# A tibble: 8 × 4
academic_degree sex n percent
<chr> <chr> <int> <dbl>
1 dr male 95 32.3
2 dr female 73 24.8
3 ma male 39 13.3
4 ma female 36 12.2
5 no_dgr female 24 8.2
6 no_dgr male 19 6.5
7 prof male 7 2.4
8 prof female 1 0.3
across()
+ summarize()
This code is brought to us by Rachel Survilas! I loved how Rachel noticed they were using the same n_distinct()
function across multiple columns and decided to use across()
! Also, the syntax they are using is on point!
|>
teacher_evals_clean summarize(
across(
.cols = c(teacher_id, course_id),
.fns = ~ n_distinct(.x))
)
# A tibble: 1 × 2
teacher_id course_id
<int> <int>
1 294 921
Slicing the Max & Min Together
This code is brought to us by a student who wishes to remain anonymous! I thought this code was great because they got both the maximum values and the minimum values in one pipeline. Very efficient!
|>
teacher_evals_clean filter(seniority == 1) |>
group_by(teacher_id) |>
summarize(
avg_rating = mean(percent_failed_cur, na.rm = TRUE),
.groups = "drop"
|>
) filter(
== min(avg_rating)|
avg_rating == max(avg_rating)
avg_rating |>
) arrange(avg_rating)
# A tibble: 7 × 2
teacher_id avg_rating
<chr> <dbl>
1 102379 0
2 103092 0
3 106126 0
4 86222 0
5 98650 0
6 98651 0
7 106692 0.68
Using kable()
for Nicer Looking Tables
This code is brought to us by a student who wishes to remain anonymous! I appreciated that they read the instructions to use the kable()
function to get nicer looking tables. The code is very simple, but the tables look much nicer!
<- teacher_evals_clean |>
percent_failed group_by(teacher_id) |>
filter(seniority == 1) |>
summarize(avg_percent_failed = mean(percent_failed_cur))
|>
percent_failed slice_min(avg_percent_failed) |>
kable()
teacher_id | avg_percent_failed |
---|---|
102379 | 0 |
103092 | 0 |
106126 | 0 |
86222 | 0 |
98650 | 0 |
98651 | 0 |
Challenge 3
Hypothesis Test Conclusion
These statements are brought to us by Abel Alcala and Sebastien Montgrain! I really appreciated the context they infused into their hypothesis test conclusion!
Based on the results of the Chi-Squared test of Independence, we have a p-value of 0.006075 and a X^2 value of 10.207. Since the p-value is less than α = 0.05, we reject the null hypotheses indicating a statistically significant association between seniority level and SET evaluation results. This suggests that instructor experience across junior, senior, and very senior appears to influence how students perceive the use of engaging activities in class.
Because the p-value of our Chi-squared test of independence is less than 0.05 (0.006075), we know that there is an association between a teachers seniority and their SET scores from question 3 on if the professor used activities to make the class more engaging. Combining this Chi-squared test with the graph, we can see that more experienced teachers do a better job of implementing engaging activities is their classes.