### --- SGTA Week 10 --- ### ## Question 1 turtles <- read.table("data/turtles.csv", sep = ",", header = TRUE) # a turtles_lm <- lm(eggs ~ carapace, data = turtles) plot(turtles_lm, which = 1) # variance ok, clear nonlinearity plot(turtles_lm, which = 2) # lots of points far away from the line, doesnt look normal # b turtles_quad <- lm(eggs ~ carapace + I(carapace^2), data = turtles) turtles_cube <- lm(eggs ~ carapace + I(carapace^2) + I(carapace^3), data = turtles) # c anova(turtles_quad) anova(turtles_cube) # cubic effect is not a significant improvement to quadratic model # d summary(turtles_quad) plot(turtles_quad, which = 1) plot(turtles_quad, which = 2) # diagnostics are better, because: # # # e plot(eggs ~ carapace, data = turtles) abline(turtles_lm, col = "firebrick") # THIS IS NOT THE CORRECT MODEL x <- seq(min(turtles$carapace), max(turtles$carapace), length.out = 100) y <- predict(turtles_quad, newdata = data.frame(carapace = x)) lines(x, y, col = "cornflowerblue") # yhat = -899.9 + 5.871 * carap. -0.0094 * carap.^2