diff --git a/.RData b/.RData
index 1477de8..cf0ebc4 100644
Binary files a/.RData and b/.RData differ
diff --git a/.Rhistory b/.Rhistory
index fda2fd8..affce0f 100644
--- a/.Rhistory
+++ b/.Rhistory
@@ -1,400 +1,512 @@
-12/89
-2/46
-10/43
-66/85
-65/81
-View(loan50)
-loan50 <- read.csv("https://www.openintro.org/stat/data/csv/loan50.csv")
-View(loan50)
-?loan50
-??loan50
-install.packages("shiny")
-install github("StatsWithR/statsr")
-install_github("StatsWithR/statsr", force =TRUE)
-install github("StatsWithR/statsr", force =TRUE)
-library(devtools)
-install_github("StatsWithR/statsr")
-library("tidyverse")
-library("shiny")
-data(rbuthnot)
-library(dplyr)
-library(ggplot2)
-library(statsr)
-data(rbuthnot)
-data(arbuthnot)
-arbuthnot
-tail(arbuthnot)
-View(arbuthnot)
-arbuthnot <- arbuthnot %>%
-mutate(total = boys + girls)
-arbuthnot$total
-ggplot(arbuthnot)+
-mapping= aes(x = total, y = years)
-ggplot(data = arbuthnot, aes(x = year, y = ottal))+
+geom_box()
+library(tidyverse)
+ggplot(data = age, aes(x = X_ageg5yr)) +
+geom_bar()
+brfss_sl_chl <- brfss_clean %>%
+select(sleptim1, children) %>%
+drop_na() %>%
+filter(children < 20 & sleptim1 < 24)
+summary(brfss_sl_chl)
+p_chl <- ggplot(data = brfss_sl_chl, aes(x = children, y = sleptim1))
+p_chl + geom_point() + geom_smooth()
+brfss_sl_chl <- brfss_clean %>%
+select(sleptim1, children) %>%
+drop_na() %>%
+filter(children < 20 & sleptim1 < 24)
+summary(brfss_sl_chl)
+p_chl <- ggplot(data = brfss_sl_chl, aes(x = children, y = sleptim1))
+p_chl + geom_point() + geom_smooth()
+ggplot(data = brfss_sl_chl, aes(x = sleptim1, y = children)) +
+geom_histogram(binwidth = 1)
+brfss_sl_chl <- brfss_clean %>%
+select(sleptim1, children) %>%
+drop_na() %>%
+filter(children < 20 & sleptim1 < 24)
+summary(brfss_sl_chl)
+p_chl <- ggplot(data = brfss_sl_chl, aes(x = children, y = sleptim1))
+p_chl + geom_point() + geom_smooth()
+p_chl + geom_jitter() + coord_flip()
+ggplot(data = brfss_sl_chl, aes(x = sleptim1, y = children)) +
+geom_histogram()
+?geom_point()
+#prepare dataset, remove obvious outliers and na
+brfss_sl_chl <- brfss2013 %>%
+select(sleptim1, children, X_ageg5yr) %>%
+drop_na() %>%
+filter(children < 20 & sleptim1 < 24)
+#look at the data with tables and numerical summary
+summary(brfss_sl_chl)
+#look at the data visually
+p_chl <- ggplot(data = brfss_sl_chl, aes(x = children, y = sleptim1))
+p_chl + geom_point() + geom_smooth()
+p_chl + geom_jitter() + coord_flip()
+ggplot(data = brfss_sl_chl, aes(x = sleptim1, y = children)) +
+geom_point(color = factor(X_ageg5yr))
+#prepare dataset, remove obvious outliers and na
+brfss_sl_chl <- brfss2013 %>%
+select(sleptim1, children, X_ageg5yr) %>%
+drop_na() %>%
+filter(children < 20 & sleptim1 < 24)
+#look at the data with tables and numerical summary
+summary(brfss_sl_chl)
+#look at the data visually
+p_chl <- ggplot(data = brfss_sl_chl, aes(x = children, y = sleptim1))
+p_chl + geom_point() + geom_smooth()
+p_chl + geom_jitter() + coord_flip()
+ggplot(data = brfss_sl_chl, aes(x = sleptim1, y = children)) +
+geom_point(color = X_ageg5yr)
+#prepare dataset, remove obvious outliers and na
+brfss_sl_chl <- brfss2013 %>%
+select(sleptim1, children, X_ageg5yr) %>%
+drop_na() %>%
+filter(children < 20 & sleptim1 < 24)
+#look at the data with tables and numerical summary
+View(brfss_sl_chl)
+summary(brfss_sl_chl)
+#look at the data visually
+p_chl <- ggplot(data = brfss_sl_chl, aes(x = children, y = sleptim1))
+p_chl + geom_point() + geom_smooth()
+p_chl + geom_jitter() + coord_flip()
+ggplot(data = brfss_sl_chl, aes(x = sleptim1, y = children)) +
geom_point()
-ggplot(data = arbuthnot, aes(x = year, y = total))+
+#prepare dataset, remove obvious outliers and na
+brfss_sl_chl <- brfss2013 %>%
+select(sleptim1, children, X_ageg5yr) %>%
+drop_na() %>%
+filter(children < 20 & sleptim1 < 24)
+#look at the data with tables and numerical summary
+head(brfss_sl_chl)
+summary(brfss_sl_chl)
+#look at the data visually
+p_chl <- ggplot(data = brfss_sl_chl, aes(x = children, y = sleptim1))
+p_chl + geom_point() + geom_smooth()
+p_chl + geom_jitter() + coord_flip()
+ggplot(data = brfss_sl_chl, aes(x = sleptim1, y = children)) +
geom_point()
-ggplot(data = arbuthnot, aes(x = year, y = total)) +
-geom_line() +
+ggplot(data = brfss_sl_chl, aes(x = X_ageg5yr, y = children)) +
+geom_boxplot() + coord_flip()
+?geom_boxplot()
+#prepare dataset, remove obvious outliers and na
+brfss_sl_chl <- brfss2013 %>%
+select(sleptim1, children, X_ageg5yr) %>%
+drop_na() %>%
+filter(children < 20 & sleptim1 < 24)
+#look at the data with tables and numerical summary
+head(brfss_sl_chl)
+summary(brfss_sl_chl)
+brfss_sl_chl %>%
+group_by(X_ageg5yr) %>%
+summerise(mean = mean(children), median = median(children))
+#prepare dataset, remove obvious outliers and na
+brfss_sl_chl <- brfss2013 %>%
+select(sleptim1, children, X_ageg5yr) %>%
+drop_na() %>%
+filter(children < 20 & sleptim1 < 24)
+#look at the data with tables and numerical summary
+head(brfss_sl_chl)
+summary(brfss_sl_chl)
+brfss_sl_chl %>%
+group_by(X_ageg5yr) %>%
+summarise(mean = mean(children), median = median(children))
+#look at the data visually
+p_chl <- ggplot(data = brfss_sl_chl, aes(x = children, y = sleptim1))
+p_chl + geom_point() + geom_smooth()
+p_chl + geom_jitter() + coord_flip()
+ggplot(data = brfss_sl_chl, aes(x = sleptim1, y = children)) +
geom_point()
-arbuthnot <- arbuthnot %>%
-mutate(boyprop = boys / total)
-ggplot(data = arbuthot, aes(x = year, y = boyprop))+
-geom_line()
-ggplot(data = arbuthnot, aes(x = year, y = boyprop))+
-geom_line()
-arbuthnot <- arbuthnot %>%
-mutate(more_boys = boys > girls)
-data(present)
-data(present)
-force(present)
-View(present)
-devtools::install_github('rstudio/rmarkdown')
-# type your code for Question 6 here, and Knit
-present <- present %>%
-mutate(more_boys = boys > girls)
-ggplot(data = present, aes(x = year, y = more_boys))+
-geom_line()
-View(arbuthnot)
-# type your code for Question 7 here, and Knit
-present <- present %>%
-mutate(prop_boy_girl = boys / girls) %>%
-ggplot(data = present, aes(x = year, y = more_boys))+
-geom_line()
-# type your code for Question 7 here, and Knit
-present <- present %>%
-mutate(prop_boy_girl = boys / girls) %>%
-ggplot(data = present, aes(x = year, y = more_boys))+
-geom_line()
-# type your code for Question 7 here, and Knit
-present <- present %>%
-mutate(prop_boy_girl = boys / girls)
-ggplot(data = present, aes(x = year, y = more_boys))+
-geom_line()
-# type your code for Question 7 here, and Knit
-present <- present %>%
-mutate(prop_boy_girl = boys / girls)
-ggplot(data = present, aes(x = year, y = prop_boy_girl))+
-geom_line()
-# type your code for Question 8 here
-# sample code is provided below, edit as necessary, uncomment, and then Knit
-#present %>%
-# mutate(total = ?) %>%
-# arrange(desc(total))
-present %>%
-arrange(desc(total))
-# type your code for Question 8 here
-# sample code is provided below, edit as necessary, uncomment, and then Knit
-#present %>%
-# mutate(total = ?) %>%
-# arrange(desc(total))
-present %>%
-mutate(total = boys + girls) %>%
-arrange(desc(total))
-library(statsr)
-library(dplyr)
-library(ggplot2)
-data(nycflights)
-force(nycflights)
-names(nycflights)
-str(nycflights)
-ggplot(data = nycflights, aes(x = dep_delay)) +
-geom_histogram()
-ggplot(data = mycflights, aes(x = dep_delay))+
-geom_histogram(binwidth = 15)
-ggplot(data = nycflights, aes(x = dep_delay)) +
-geom_histogram(binwidth = 15)
-ggplot(data = nycflights, aes(x = dep_delay))+
-geom_histogram(binwidth = 150)
-rdu_flights <- nycflights %>%
-filter(dest == "RDU")
-ggplot(data = rdu_flights, aes(x = dest_delay))+
-geom_histogram()
-ggplot(data = rdu_flights, aes(x = dep_delay))+
-geom_histogram()
-rdu_flights %>%
-summarise(mean_dd = mean(rdu_flights), sd_dd = sd(rdu_flights), n = n())
-str(rdu_flights)
-rdu_flights %>%
-summarise(mean_dd = mean(dep_delay), sd_dd = sd(dep_delay), n = n())
-sfo_feb_flights <- nyc_flights %>%
-filter(dest == "SFO", month == 2)
-sfo_feb_flights <- nycflights %>%
-filter(dest == "SFO", month == 2)
-ggplot(data = sfo_feb_flights, aes(x = dep_delay))+
-geom_histogram(())
-ggplot(data = sfo_feb_flights, aes(x = dep_delay))+
-geom_histogram()
-str(sfo_feb_flights)
-?summarise
-# type your code for Question 4 here, and Knit
-"July"
-# type your code for Question 5 here, and Knit
-nycflights %>%
-group_by(month) %>%
-summarise(median(dep_delay))
-# type your code for Question 5 here, and Knit
-nycflights %>%
-group_by(month) %>%
-summarise(median_dd = median(dep_delay)) %>%
-arrange(desc(median_dd))
-nycflights <- nycflights %>%
-mutate(dep_type = ifelse(dep_delay < 5, "on time", "delayed"))
-View(nycflights)
-nycflights %>%
-group_by(origin) %>%
-summarise(ot_dep_rate = sum(dep_type == "on time") / n()) %>%
-arrange(desc(ot_dep_rate))
-ggplot(data = nycflights, aes(x = origin, fill = dep_type)) +
-geom_bar()
-?nycflights
-# type your code for Question 9 here, and Knit
-ggplot(data = nycflights, aes(x = avg_speed, y = distance))+
+ag_ch <- ggplot(data = brfss_sl_chl, aes(x = X_ageg5yr, y = children)) +
+ag_ch + geom_boxplot() + coord_flip()
+#prepare dataset, remove obvious outliers and na
+brfss_sl_chl <- brfss2013 %>%
+select(sleptim1, children, X_ageg5yr) %>%
+drop_na() %>%
+filter(children < 20 & sleptim1 < 24)
+#look at the data with tables and numerical summary
+head(brfss_sl_chl)
+summary(brfss_sl_chl)
+brfss_sl_chl %>%
+group_by(X_ageg5yr) %>%
+summarise(mean = mean(children), median = median(children))
+#look at the data visually
+p_chl <- ggplot(data = brfss_sl_chl, aes(x = children, y = sleptim1))
+p_chl + geom_point() + geom_smooth()
+p_chl + geom_jitter() + coord_flip()
+ggplot(data = brfss_sl_chl, aes(x = sleptim1, y = children)) +
geom_point()
-library(statsr)
-library(dplyr)
-library(ggplot2)
-data(nycflights)
-names(nycflights)
-str(nycflights)
-ggplot(data = nycflights, aes(x = dep_delay)) +
-geom_histogram()
-ggplot(data = nycflights, aes(x = dep_delay)) +
-geom_histogram(binwidth = 15)
-ggplot(data = nycflights, aes(x = dep_delay)) +
-geom_histogram(binwidth = 150)
-rdu_flights <- nycflights %>%
-filter(dest == "RDU")
-ggplot(data = rdu_flights, aes(x = dep_delay)) +
-geom_histogram()
-rdu_flights %>%
-summarise(mean_dd = mean(dep_delay), sd_dd = sd(dep_delay), n = n())
-sfo_feb_flights <- nycflights %>%
-filter(dest == "SFO", month == 2)
-# type your code for Question 1 here, and Knit
-68
-# type your code for Question 2 here, and Knit
-ggplot(data = sfo_feb_flights, aes(x = arr_delay))+
-geom_histogram()
-sfo_feb_flights %>%
-summarise(median(arr_delay))
-rdu_flights %>%
-group_by(origin) %>%
-summarise(mean_dd = mean(dep_delay), sd_dd = sd(dep_delay), n = n())
-# type your code for Question 3 here, and Knit
-sfo_feb_flights %>%
-group_by(carrier) %>%
-summarise(median(arr_delay), sd(arr_delay))
-nycflights %>%
-group_by(month) %>%
-summarise(mean_dd = mean(dep_delay)) %>%
-arrange(desc(mean_dd))
-# type your code for Question 4 here, and Knit
-"July"
-# type your code for Question 5 here, and Knit
-nycflights %>%
-group_by(month) %>%
-summarise(median_dd = median(dep_delay)) %>%
-arrange(desc(median_dd))
-ggplot(nycflights, aes(x = factor(month), y = dep_delay)) +
-geom_boxplot()
-nycflights <- nycflights %>%
-mutate(dep_type = ifelse(dep_delay < 5, "on time", "delayed"))
-nycflights %>%
-group_by(origin) %>%
-summarise(ot_dep_rate = sum(dep_type == "on time") / n()) %>%
-arrange(desc(ot_dep_rate))
-# type your code for Question 7 here, and Knit
-"LGA"
-ggplot(data = nycflights, aes(x = origin, fill = dep_type)) +
-geom_bar()
-# type your code for Question 8 here, and Knit
-nycflights %>%
-nycflights <- mutate(avg_speed = (distance / (air_time / 60))) %>%
-arrange(desc(avg_speed)) %>%
-select(avg_speed, tailnum)
-library(statsr)
-library(dplyr)
-library(ggplot2)
-data(nycflights)
-names(nycflights)
-str(nycflights)
-ggplot(data = nycflights, aes(x = dep_delay)) +
-geom_histogram()
-ggplot(data = nycflights, aes(x = dep_delay)) +
-geom_histogram(binwidth = 15)
-ggplot(data = nycflights, aes(x = dep_delay)) +
-geom_histogram(binwidth = 150)
-rdu_flights <- nycflights %>%
-filter(dest == "RDU")
-ggplot(data = rdu_flights, aes(x = dep_delay)) +
-geom_histogram()
-rdu_flights %>%
-summarise(mean_dd = mean(dep_delay), sd_dd = sd(dep_delay), n = n())
-sfo_feb_flights <- nycflights %>%
-filter(dest == "SFO", month == 2)
-# type your code for Question 1 here, and Knit
-68
-# type your code for Question 2 here, and Knit
-ggplot(data = sfo_feb_flights, aes(x = arr_delay))+
-geom_histogram()
-sfo_feb_flights %>%
-summarise(median(arr_delay))
-rdu_flights %>%
-group_by(origin) %>%
-summarise(mean_dd = mean(dep_delay), sd_dd = sd(dep_delay), n = n())
-# type your code for Question 3 here, and Knit
-sfo_feb_flights %>%
-group_by(carrier) %>%
-summarise(median(arr_delay), sd(arr_delay))
-nycflights %>%
-group_by(month) %>%
-summarise(mean_dd = mean(dep_delay)) %>%
-arrange(desc(mean_dd))
-# type your code for Question 4 here, and Knit
-"July"
-# type your code for Question 5 here, and Knit
-nycflights %>%
-group_by(month) %>%
-summarise(median_dd = median(dep_delay)) %>%
-arrange(desc(median_dd))
-ggplot(nycflights, aes(x = factor(month), y = dep_delay)) +
-geom_boxplot()
-nycflights <- nycflights %>%
-mutate(dep_type = ifelse(dep_delay < 5, "on time", "delayed"))
-nycflights %>%
-group_by(origin) %>%
-summarise(ot_dep_rate = sum(dep_type == "on time") / n()) %>%
-arrange(desc(ot_dep_rate))
-# type your code for Question 7 here, and Knit
-"LGA"
-ggplot(data = nycflights, aes(x = origin, fill = dep_type)) +
-geom_bar()
-# type your code for Question 8 here, and Knit
-nycflights <- nycflights %>%
-mutate(avg_speed = (distance / (air_time / 60))) %>%
-arrange(desc(avg_speed)) %>%
-select(avg_speed, tailnum)
-# type your code for Question 9 here, and Knit
-nycflights <- nycflights %>%
-mutate(avg_speed = (distance / (air_time / 60)))
-# type your code for Question 8 here, and Knit
-nycflights %>%
-mutate(avg_speed = (distance / (air_time / 60))) %>%
-arrange(desc(avg_speed)) %>%
-select(avg_speed, tailnum)
-?nycflights
-nycflights$distance
-names(nycflights)
-View(nycflights)
-data(nycflights)
-force(nycflights)
-library(statsr)
-library(dplyr)
-library(ggplot2)
-data(nycflights)
-names(nycflights)
-str(nycflights)
-ggplot(data = nycflights, aes(x = dep_delay)) +
-geom_histogram()
-ggplot(data = nycflights, aes(x = dep_delay)) +
-geom_histogram(binwidth = 15)
-ggplot(data = nycflights, aes(x = dep_delay)) +
-geom_histogram(binwidth = 150)
-rdu_flights <- nycflights %>%
-filter(dest == "RDU")
-ggplot(data = rdu_flights, aes(x = dep_delay)) +
-geom_histogram()
-rdu_flights %>%
-summarise(mean_dd = mean(dep_delay), sd_dd = sd(dep_delay), n = n())
-sfo_feb_flights <- nycflights %>%
-filter(dest == "SFO", month == 2)
-# type your code for Question 1 here, and Knit
-68
-# type your code for Question 2 here, and Knit
-ggplot(data = sfo_feb_flights, aes(x = arr_delay))+
-geom_histogram()
-sfo_feb_flights %>%
-summarise(median(arr_delay))
-rdu_flights %>%
-group_by(origin) %>%
-summarise(mean_dd = mean(dep_delay), sd_dd = sd(dep_delay), n = n())
-# type your code for Question 3 here, and Knit
-sfo_feb_flights %>%
-group_by(carrier) %>%
-summarise(median(arr_delay), sd(arr_delay))
-nycflights %>%
-group_by(month) %>%
-summarise(mean_dd = mean(dep_delay)) %>%
-arrange(desc(mean_dd))
-# type your code for Question 4 here, and Knit
-"July"
-# type your code for Question 5 here, and Knit
-nycflights %>%
-group_by(month) %>%
-summarise(median_dd = median(dep_delay)) %>%
-arrange(desc(median_dd))
-ggplot(nycflights, aes(x = factor(month), y = dep_delay)) +
-geom_boxplot()
-nycflights <- nycflights %>%
-mutate(dep_type = ifelse(dep_delay < 5, "on time", "delayed"))
-nycflights %>%
-group_by(origin) %>%
-summarise(ot_dep_rate = sum(dep_type == "on time") / n()) %>%
-arrange(desc(ot_dep_rate))
-# type your code for Question 7 here, and Knit
-"LGA"
-ggplot(data = nycflights, aes(x = origin, fill = dep_type)) +
-geom_bar()
-# type your code for Question 8 here, and Knit
-nycflights %>%
-mutate(avg_speed = (distance / (air_time / 60))) %>%
-arrange(desc(avg_speed)) %>%
-select(avg_speed, tailnum)
-# type your code for Question 9 here, and Knit
-ggplot(data = nycflights, aes(x = avg_speed, y = distance))+
+ag_ch <- ggplot(data = brfss_sl_chl, aes(x = X_ageg5yr, y = children))
+ag_ch + geom_boxplot() + coord_flip()
+ag_ch + geom_boxplot(outlier.shape = NA) + coord_flip() + geom_jitter(width = 0.2)
+#prepare dataset, remove obvious outliers and na
+brfss_sl_chl <- brfss2013 %>%
+select(sleptim1, children, X_ageg5yr) %>%
+drop_na() %>%
+filter(children < 20 & sleptim1 < 24)
+#look at the data with tables and numerical summary
+head(brfss_sl_chl)
+summary(brfss_sl_chl)
+brfss_sl_chl %>%
+group_by(X_ageg5yr) %>%
+summarise(mean = mean(children), median = median(children))
+#look at the data visually
+p_chl <- ggplot(data = brfss_sl_chl, aes(x = children, y = sleptim1))
+p_chl + geom_point() + geom_smooth()
+p_chl + geom_jitter() + coord_flip()
+ggplot(data = brfss_sl_chl, aes(x = sleptim1, y = children)) +
geom_point()
-View(nycflights)
-# type your code for Question 10 here, and Knit
-nycflights <- nycflights %>%
-mutate(arr_type = ifelse(arr_delay <= 0, "on time", "delayed"))
-View(nycflights)
-library(statsr)
-library(dplyr)
-library(ggplot2)
-data(kobe_basket)
-View(kobe_basket)
-?calc_streak()
-kobe_streak <- calc_streak(kobe_basket$shot)
-ggplot(data = kobe_streak, aes(x = length)) +
-geom_histogram(binwidth = 1)
-summarise(kobe_streak, IQR(), median(), max(), min())
-?summarise
-kobe_streak %>%
-summarise(median(length), IQR(length), min(length), max(length()))
-summary(length())
-kobe_streak %>%
-summarise(median(length), IQR(length), min(length), max(length))
-coin_outcomes <- c("heads", "tails")
-sample(coin_outcomes, size = 1, replace = TRUE)
-sample(coin_outcomes, size = 1, replace = TRUE)
-sample(coin_outcomes, size = 1, replace = TRUE)
-sample(coin_outcomes, size = 1, replace = TRUE)
-sample(coin_outcomes, size = 1, replace = TRUE)
-sample(coin_outcomes, size = 1, replace = TRUE)
-sample(coin_outcomes, size = 1, replace = TRUE)
-sample(coin_outcomes, size = 1, replace = TRUE)
-sample(coin_outcomes, size = 1, replace = TRUE)
-sample(coin_outcomes, size = 1, replace = TRUE)
-sim_fair_coin <- sample(coin_outcomes, size = 100, replace = TRUE)
-View(sim_fair_coin)
-sim_fair_coin
-table(sim_fair_coin)
-sim_unfair_coin <- sample(coin_outcomes, size = 100, replace = TRUE, prob = c(.2, .8))
-sim_unfair_coin
-table(sim_unfair_coin)
-?sample
-shot_outcomes <- c("H", "M")
-sim_basket <- sample(shot_outcomes, size = 1, replace = TRUE)
-# type your code for the Exercise here, and Knit
-sim_basket <- sample(shot_outcomes, size = 133, replace = TRUE, prob = c(.45))
+ag_ch <- ggplot(data = brfss_sl_chl, aes(x = X_ageg5yr, y = children))
+ag_ch + geom_boxplot() + coord_flip()
+a_c <- brfss_sl_chl %>%
+mutate(outlier.high = children > quantile(children, .75) + 1.50*IQR(children),
+outlier.low = children < quantile(children, .25) - 1.50*IQR(children))
+a_c <- a_c %>%
+mutate(outlier.color = case_when(outlier.high ~ "red",
+outlier.low ~ "steelblue"))
+a_c + geom_boxplot(outlier.shape = NA) + coord_flip() + geom_jitter(color = a_c$outlier.color, width = .2)
+#prepare dataset, remove obvious outliers and na
+brfss_sl_chl <- brfss2013 %>%
+select(sleptim1, children, X_ageg5yr) %>%
+drop_na() %>%
+filter(children < 20 & sleptim1 < 24)
+#look at the data with tables and numerical summary
+head(brfss_sl_chl)
+summary(brfss_sl_chl)
+brfss_sl_chl %>%
+group_by(X_ageg5yr) %>%
+summarise(mean = mean(children), median = median(children))
+#look at the data visually
+p_chl <- ggplot(data = brfss_sl_chl, aes(x = children, y = sleptim1))
+p_chl + geom_point()
+p_chl + geom_jitter() + coord_flip()
+ggplot(data = brfss_sl_chl, aes(x = sleptim1, y = children)) +
+geom_point()
+ag_ch <- ggplot(data = brfss_sl_chl, aes(x = X_ageg5yr, y = children))
+ag_ch + geom_boxplot() + coord_flip()
+a_c <- brfss_sl_chl %>%
+mutate(outlier.high = children > quantile(children, .75) + 1.50*IQR(children),
+outlier.low = children < quantile(children, .25) - 1.50*IQR(children))
+a_c <- a_c %>%
+mutate(outlier.color = case_when(outlier.high ~ "red",
+outlier.low ~ "steelblue"))
+head(a_c)
+a_c + geom_boxplot(outlier.shape = NA) + coord_flip() + geom_jitter(color = outlier.color, width = .2)
+#prepare dataset, remove obvious outliers and na
+brfss_sl_chl <- brfss2013 %>%
+select(sleptim1, children, X_ageg5yr) %>%
+drop_na() %>%
+filter(children < 20 & sleptim1 < 24)
+#look at the data with tables and numerical summary
+head(brfss_sl_chl)
+summary(brfss_sl_chl)
+brfss_sl_chl %>%
+group_by(X_ageg5yr) %>%
+summarise(mean = mean(children), median = median(children))
+#look at the data visually
+p_chl <- ggplot(data = brfss_sl_chl, aes(x = children, y = sleptim1))
+p_chl + geom_point()
+p_chl + geom_jitter() + coord_flip()
+ggplot(data = brfss_sl_chl, aes(x = sleptim1, y = children)) +
+geom_point()
+ag_ch <- ggplot(data = brfss_sl_chl, aes(x = X_ageg5yr, y = children))
+ag_ch + geom_boxplot() + coord_flip()
+a_c <- brfss_sl_chl %>%
+mutate(outlier_high = children > quantile(children, .75) + 1.50*IQR(children), +
+outlier_low = children < quantile(children, .25) - 1.50*IQR(children))
+#prepare dataset, remove obvious outliers and na
+brfss_sl_chl <- brfss2013 %>%
+select(sleptim1, children, X_ageg5yr) %>%
+drop_na() %>%
+filter(children < 20 & sleptim1 < 24)
+#look at the data with tables and numerical summary
+head(brfss_sl_chl)
+summary(brfss_sl_chl)
+brfss_sl_chl %>%
+group_by(X_ageg5yr) %>%
+summarise(mean = mean(children), median = median(children))
+#look at the data visually
+p_chl <- ggplot(data = brfss_sl_chl, aes(x = children, y = sleptim1))
+p_chl + geom_point()
+p_chl + geom_jitter() + coord_flip()
+ggplot(data = brfss_sl_chl, aes(x = sleptim1, y = children)) +
+geom_point()
+ag_ch <- ggplot(data = brfss_sl_chl, aes(x = X_ageg5yr, y = children))
+ag_ch + geom_boxplot() + coord_flip()
+a_c <- brfss_sl_chl %>%
+mutate(outlier_high = children > quantile(children, .75) + 1.50*IQR(children), outlier_low = children < quantile(children, .25) - 1.50*IQR(children))
+a_c <- a_c %>%
+mutate(outlier.color = case_when(outlier.high ~ "red", outlier.low ~ "steelblue"))
+head(a_c)
+?quantile()
+#prepare dataset, remove obvious outliers and na
+brfss_sl_chl <- brfss2013 %>%
+select(sleptim1, children, X_ageg5yr) %>%
+drop_na() %>%
+filter(children < 20 & sleptim1 < 24)
+#look at the data with tables and numerical summary
+head(brfss_sl_chl)
+summary(brfss_sl_chl)
+brfss_sl_chl %>%
+group_by(X_ageg5yr) %>%
+summarise(mean = mean(children), median = median(children))
+#look at the data visually
+p_chl <- ggplot(data = brfss_sl_chl, aes(x = children, y = sleptim1))
+p_chl + geom_point()
+p_chl + geom_jitter() + coord_flip()
+ggplot(data = brfss_sl_chl, aes(x = sleptim1, y = children)) +
+geom_point()
+ag_ch <- ggplot(data = brfss_sl_chl, aes(x = X_ageg5yr, y = children))
+ag_ch + geom_boxplot() + coord_flip()
+a_c <- brfss_sl_chl %>%
+mutate(outlier_high = children > quantile(children, .75) + 1.50*IQR(children), outlier_low = children < quantile(children, .25) - 1.50*IQR(children))
+a_c <- a_c %>%
+mutate(outlier_color = case_when(outlier_high ~ "red", outlier_low ~ "steelblue"))
+head(a_c)
+a_c + geom_boxplot(outlier.shape = NA) + coord_flip() + geom_jitter(color = outlier.color, width = .2)
+#prepare dataset, remove obvious outliers and na
+brfss_sl_chl <- brfss2013 %>%
+select(sleptim1, children, X_ageg5yr) %>%
+drop_na() %>%
+filter(children < 20 & sleptim1 < 24)
+#look at the data with tables and numerical summary
+head(brfss_sl_chl)
+summary(brfss_sl_chl)
+brfss_sl_chl %>%
+group_by(X_ageg5yr) %>%
+summarise(mean = mean(children), median = median(children))
+#look at the data visually
+p_chl <- ggplot(data = brfss_sl_chl, aes(x = children, y = sleptim1))
+p_chl + geom_point()
+p_chl + geom_jitter() + coord_flip()
+ggplot(data = brfss_sl_chl, aes(x = sleptim1, y = children)) +
+geom_point()
+ag_ch <- ggplot(data = brfss_sl_chl, aes(x = X_ageg5yr, y = children))
+ag_ch + geom_boxplot() + coord_flip()
+a_c <- brfss_sl_chl %>%
+mutate(outlier_high = children > quantile(children, .75) + 1.50*IQR(children), outlier_low = children < quantile(children, .25) - 1.50*IQR(children))
+a_c <- a_c %>%
+mutate(outlier_color = case_when(outlier_high ~ "red", outlier_low ~ "steelblue"))
+head(a_c)
+a_c + geom_boxplot(outlier.shape = NA) + coord_flip() + geom_jitter(color = outlier_color, width = .2)
+#prepare dataset, remove obvious outliers and na
+brfss_sl_chl <- brfss2013 %>%
+select(sleptim1, children, X_ageg5yr) %>%
+drop_na() %>%
+filter(children < 20 & sleptim1 < 24)
+#look at the data with tables and numerical summary
+head(brfss_sl_chl)
+summary(brfss_sl_chl)
+brfss_sl_chl %>%
+group_by(X_ageg5yr) %>%
+summarise(mean = mean(children), median = median(children))
+#look at the data visually
+p_chl <- ggplot(data = brfss_sl_chl, aes(x = children, y = sleptim1))
+p_chl + geom_point()
+p_chl + geom_jitter() + coord_flip()
+#prepare dataset, remove obvious outliers and na
+brfss_sl_chl <- brfss2013 %>%
+select(sleptim1, children, X_ageg5yr) %>%
+drop_na() %>%
+filter(children < 20 & sleptim1 < 24)
+#look at the data with tables and numerical summary
+head(brfss_sl_chl)
+summary(brfss_sl_chl)
+brfss_sl_chl %>%
+group_by(X_ageg5yr) %>%
+summarise(mean = mean(children), median = median(children))
+#look at the data visually
+p_chl <- ggplot(data = brfss_sl_chl, aes(x = children, y = sleptim1))
+p_chl + geom_point()
+p_chl + geom_jitter() + coord_flip()
+ggplot(data = brfss_sl_chl, aes(x = sleptim1, y = children)) +
+geom_point()
+ag_ch <- ggplot(data = brfss_sl_chl, aes(x = X_ageg5yr, y = children))
+ag_ch + geom_boxplot() + coord_flip()
+a_c <- brfss_sl_chl %>%
+mutate(outlier_high = children > quantile(children, .75) + 1.50*IQR(children), outlier_low = children < quantile(children, .25) - 1.50*IQR(children))
+a_c <- a_c %>%
+mutate(outlier_color = case_when(outlier_high ~ "red", outlier_low ~ "steelblue"))
+head(a_c)
+ggplot(data = a_c, aes(x = X_ageg5yr, y = children)) +
+geom_boxplot(outlier.shape = NA) + coord_flip() + geom_jitter(color = outlier_color, width = .2)
+#prepare dataset, remove obvious outliers and na
+brfss_sl_chl <- brfss2013 %>%
+select(sleptim1, children, X_ageg5yr) %>%
+drop_na() %>%
+filter(children < 20 & sleptim1 < 24)
+#look at the data with tables and numerical summary
+head(brfss_sl_chl)
+summary(brfss_sl_chl)
+brfss_sl_chl %>%
+group_by(X_ageg5yr) %>%
+summarise(mean = mean(children), median = median(children))
+#look at the data visually
+p_chl <- ggplot(data = brfss_sl_chl, aes(x = children, y = sleptim1))
+p_chl + geom_point()
+p_chl + geom_jitter() + coord_flip()
+ggplot(data = brfss_sl_chl, aes(x = sleptim1, y = children)) +
+geom_point()
+ag_ch <- ggplot(data = brfss_sl_chl, aes(x = X_ageg5yr, y = children))
+ag_ch + geom_boxplot() + coord_flip()
+a_c <- brfss_sl_chl %>%
+mutate(outlier_high = children > quantile(children, .75) + 1.50*IQR(children), outlier_low = children < quantile(children, .25) - 1.50*IQR(children))
+a_c <- a_c %>%
+mutate(outlier_color = case_when(outlier_high ~ "red", outlier_low ~ "steelblue"))
+head(a_c)
+ggplot(data = a_c, aes(x = X_ageg5yr, y = children)) +
+geom_boxplot(outlier.shape = NA) + coord_flip() + geom_jitter(color = a_c$outlier_color, width = .2)
+#prepare dataset, remove obvious outliers and na
+brfss_sl_chl <- brfss2013 %>%
+select(sleptim1, children, X_ageg5yr) %>%
+drop_na() %>%
+filter(children < 20 & sleptim1 < 24)
+#look at the data with tables and numerical summary
+head(brfss_sl_chl)
+summary(brfss_sl_chl)
+brfss_sl_chl %>%
+group_by(X_ageg5yr) %>%
+summarise(mean = mean(children), median = median(children))
+#look at the data visually
+p_chl <- ggplot(data = brfss_sl_chl, aes(x = children, y = sleptim1))
+p_chl + geom_point()
+p_chl + geom_jitter() + coord_flip()
+ggplot(data = brfss_sl_chl, aes(x = sleptim1, y = children)) +
+geom_point()
+ag_ch <- ggplot(data = brfss_sl_chl, aes(x = X_ageg5yr, y = children))
+ag_ch + geom_boxplot() + coord_flip()
+a_c <- brfss_sl_chl %>%
+group_by(X_ageg5yr) %>%
+mutate(outlier_high = children > quantile(children, .75) + 1.50*IQR(children), outlier_low = children < quantile(children, .25) - 1.50*IQR(children)) %>%
+ungroup()
+a_c <- a_c %>%
+mutate(outlier_color = case_when(outlier_high ~ "red", outlier_low ~ "steelblue"))
+head(a_c)
+ggplot(data = a_c, aes(x = X_ageg5yr, y = children)) +
+geom_boxplot(outlier.shape = NA) + coord_flip() + geom_jitter(color = a_c$outlier_color, width = .2)
+#prepare dataset, remove obvious outliers and na
+brfss_sl_chl <- brfss2013 %>%
+select(sleptim1, children, X_ageg5yr) %>%
+drop_na() %>%
+filter(children < 20 & sleptim1 < 24)
+#look at the data with tables and numerical summary
+head(brfss_sl_chl)
+summary(brfss_sl_chl)
+brfss_sl_chl %>%
+group_by(X_ageg5yr) %>%
+summarise(mean = mean(children), median = median(children))
+#look at the data visually
+p_chl <- ggplot(data = brfss_sl_chl, aes(x = children, y = sleptim1))
+p_chl + geom_point()
+#prepare dataset, remove obvious outliers and na
+brfss_sl_chl <- brfss2013 %>%
+select(sleptim1, children, X_ageg5yr) %>%
+drop_na() %>%
+filter(children < 20 & sleptim1 < 24)
+#look at the data with tables and numerical summary
+head(brfss_sl_chl)
+summary(brfss_sl_chl)
+brfss_sl_chl %>%
+group_by(X_ageg5yr) %>%
+summarise(mean = mean(children), median = median(children))
+#look at the data visually
+p_chl <- ggplot(data = brfss_sl_chl, aes(x = children, y = sleptim1))
+p_chl + geom_point()
+p_chl + geom_jitter() + coord_flip()
+ggplot(data = brfss_sl_chl, aes(x = sleptim1, y = children)) +
+geom_point()
+ag_ch <- ggplot(data = brfss_sl_chl, aes(x = X_ageg5yr, y = children))
+ag_ch + geom_boxplot() + coord_flip()
+a_c <- brfss_sl_chl %>%
+group_by(X_ageg5yr) %>%
+mutate(outlier_high = children > quantile(children, .75) + 1.50*IQR(children), outlier_low = children < quantile(children, .25) - 1.50*IQR(children)) %>%
+ungroup()
+a_c %>%
+group_by(X_ageg5yr) %>%
+mutate(outlier = children > median(children) + IQR(children) * 1.5) %>%
+ungroup()
+a_c <- a_c %>%
+mutate(outlier_color = case_when(outlier_high ~ "red", outlier_low ~ "steelblue"))
+head(a_c)
+ggplot(data = a_c, aes(x = X_ageg5yr, y = children)) +
+geom_boxplot(outlier.shape = NA) + coord_flip() + geom_jitter(color = a_c$outlier_color, width = .2)
+ggplot(data = a_c, aes(x = X_ageg5yr, y = children)) +
+geom_boxplot(outlier.shape = NA) + coord_flip() + geom_jitter(outlier, width = .2)
+#prepare dataset, remove obvious outliers and na
+brfss_sl_chl <- brfss2013 %>%
+select(sleptim1, children, X_ageg5yr) %>%
+drop_na() %>%
+filter(children < 20 & sleptim1 < 24)
+#look at the data with tables and numerical summary
+head(brfss_sl_chl)
+summary(brfss_sl_chl)
+brfss_sl_chl %>%
+group_by(X_ageg5yr) %>%
+summarise(mean = mean(children), median = median(children))
+#look at the data visually
+p_chl <- ggplot(data = brfss_sl_chl, aes(x = children, y = sleptim1))
+p_chl + geom_point()
+p_chl + geom_jitter() + coord_flip()
+ggplot(data = brfss_sl_chl, aes(x = sleptim1, y = children)) +
+geom_point()
+ag_ch <- ggplot(data = brfss_sl_chl, aes(x = X_ageg5yr, y = children))
+ag_ch + geom_boxplot() + coord_flip()
+a_c <- brfss_sl_chl %>%
+group_by(X_ageg5yr) %>%
+mutate(outlier_high = children > quantile(children, .75) + 1.50*IQR(children), outlier_low = children < quantile(children, .25) - 1.50*IQR(children)) %>%
+ungroup()
+a_c %>%
+group_by(X_ageg5yr) %>%
+mutate(outlier = children > median(children) + IQR(children) * 1.5) %>%
+ungroup()
+a_c <- a_c %>%
+mutate(outlier_color = case_when(outlier_high ~ "red", outlier_low ~ "steelblue"))
+head(a_c)
+ggplot(data = a_c, aes(x = X_ageg5yr, y = children)) +
+geom_boxplot(outlier.shape = NA) + coord_flip() + geom_jitter(color = a_c$outlier_color, width = .2)
+ggplot(data = a_c, aes(x = X_ageg5yr, y = children)) +
+geom_boxplot(outlier.shape = NA) + coord_flip() +
+geom_jitter(data = filter(a_c, outlier ==T), width = .2)
+#prepare dataset, remove obvious outliers and na
+brfss_sl_chl <- brfss2013 %>%
+select(sleptim1, children, X_ageg5yr) %>%
+drop_na() %>%
+filter(children < 20 & sleptim1 < 24)
+#look at the data with tables and numerical summary
+head(brfss_sl_chl)
+summary(brfss_sl_chl)
+brfss_sl_chl %>%
+group_by(X_ageg5yr) %>%
+summarise(mean = mean(children), median = median(children))
+#look at the data visually
+p_chl <- ggplot(data = brfss_sl_chl, aes(x = children, y = sleptim1))
+p_chl + geom_point()
+p_chl + geom_jitter() + coord_flip()
+ggplot(data = brfss_sl_chl, aes(x = sleptim1, y = children)) +
+geom_point()
+ag_ch <- ggplot(data = brfss_sl_chl, aes(x = X_ageg5yr, y = children))
+ag_ch + geom_boxplot() + coord_flip()
+a_c <- brfss_sl_chl %>%
+group_by(X_ageg5yr) %>%
+mutate(outlier_high = children > quantile(children, .75) + 1.50*IQR(children), outlier_low = children < quantile(children, .25) - 1.50*IQR(children)) %>%
+ungroup()
+a_c <- a_c %>%
+group_by(X_ageg5yr) %>%
+mutate(outlier = children > median(children) + IQR(children) * 1.5) %>%
+ungroup()
+a_c <- a_c %>%
+mutate(outlier_color = case_when(outlier_high ~ "red", outlier_low ~ "steelblue"))
+head(a_c)
+ggplot(data = a_c, aes(x = X_ageg5yr, y = children)) +
+geom_boxplot(outlier.shape = NA) + coord_flip() + geom_jitter(color = a_c$outlier_color, width = .2)
+ggplot(data = a_c, aes(x = X_ageg5yr, y = children)) +
+geom_boxplot(outlier.shape = NA) + coord_flip() +
+geom_jitter(data = filter(a_c, outlier ==T), width = .2)
+source('~/.active-rstudio-document', echo=TRUE)
+?quantile
+url <- https://d18ky98rnyall9.cloudfront.net/_384b2d9eda4b29131fb681b243a7767d_brfss2013.RData?Expires=1569456000&Signature=d33tSiWG1HTbziLf5jLwq~7TgwKdv8JOinx5DL0XR927wK1gP7rh5uAltlxt4ZEBqVUQVnPVtn7LcdsxDK1mo7joxQkO0SqgpHbfexLWU1AOz~KY0S-PHzrcjDKE5fAffUdHQg6mVAah9ujDurpbREp6~q~dWLe4xSYEEXxQ1yY_&Key-Pair-Id=APKAJLTNE6QMUY6HBC5A
+url <- "https://d18ky98rnyall9.cloudfront.net/_384b2d9eda4b29131fb681b243a7767d_brfss2013.RData"
+destfile <- "./brfss2013.RData"
+if (!file.exists(destfile)) {
+setInternet2(TRUE)
+download.file(url ,destfile,method="auto")
+}
+load(destfile)
+source('~/.active-rstudio-document', echo=TRUE)
diff --git a/.Rproj.user/F65EB65E/pcs/workbench-pane.pper b/.Rproj.user/F65EB65E/pcs/workbench-pane.pper
index 92c5223..0e24b84 100644
--- a/.Rproj.user/F65EB65E/pcs/workbench-pane.pper
+++ b/.Rproj.user/F65EB65E/pcs/workbench-pane.pper
@@ -1,6 +1,6 @@
{
"TabSet1" : 0,
- "TabSet2" : 3,
+ "TabSet2" : 0,
"TabZoom" : {
}
}
\ No newline at end of file
diff --git a/.Rproj.user/F65EB65E/rmd-outputs b/.Rproj.user/F65EB65E/rmd-outputs
index 635a08a..58bbb46 100644
--- a/.Rproj.user/F65EB65E/rmd-outputs
+++ b/.Rproj.user/F65EB65E/rmd-outputs
@@ -1,8 +1,8 @@
-~/Data/OpenIntro Stats/week-3-lab.html
+~/Data/OpenIntro Stats/intro_data_prob_project.html
+~/Data/OpenIntro Stats/intro_data_prob_project.html
+~/Data/OpenIntro Stats/intro_data_prob_project.html
+~/Data/OpenIntro Stats/intro_data_prob_project.html
-~/Data/OpenIntro Stats/week-3-lab.html
-~/Data/OpenIntro Stats/week-3-lab.html
-~/Data/OpenIntro Stats/week-3-lab.html
diff --git a/.Rproj.user/F65EB65E/sources/prop/A8E8C529 b/.Rproj.user/F65EB65E/sources/prop/A8E8C529
index f52c169..043b0c6 100644
--- a/.Rproj.user/F65EB65E/sources/prop/A8E8C529
+++ b/.Rproj.user/F65EB65E/sources/prop/A8E8C529
@@ -1,4 +1,4 @@
{
- "cursorPosition" : "99,21",
- "scrollLine" : "71"
+ "cursorPosition" : "56,134",
+ "scrollLine" : "41"
}
\ No newline at end of file
diff --git a/.Rproj.user/F65EB65E/sources/s-6CF1D576/1A524EAD-contents b/.Rproj.user/F65EB65E/sources/s-6CF1D576/1A524EAD-contents
deleted file mode 100644
index 855653b..0000000
--- a/.Rproj.user/F65EB65E/sources/s-6CF1D576/1A524EAD-contents
+++ /dev/null
@@ -1,317 +0,0 @@
----
-title: "Probability"
-output: statsr:::statswithr_lab
----
-
-
-Complete all **Exercises**, and submit answers to **Questions** on the Coursera
-platform.
-
-
-## Hot Hands
-
-Basketball players who make several baskets in succession are described as
-having a *hot hand*. Fans and players have long believed in the hot hand
-phenomenon, which refutes the assumption that each shot is independent of the
-next. However, [a 1985 paper](http://www.sciencedirect.com/science/article/pii/0010028585900106) by Gilovich, Vallone, and Tversky collected evidence
-that contradicted this belief and showed that successive shots are independent
-events. This paper started a great controversy that continues to this day, as you can
-see by Googling *hot hand basketball*.
-
-We do not expect to resolve this controversy today. However, in this lab we'll
-apply one approach to answering questions like this. The goals for this lab are
-to (1) think about the effects of independent and dependent events, (2) learn
-how to simulate shooting streaks in R, and (3) to compare a simulation to actual
-data in order to determine if the hot hand phenomenon appears to be real.
-
-## Getting Started
-
-### Load packages
-
-In this lab we will explore the data using the `dplyr` package and visualize it
-using the `ggplot2` package for data visualization. The data can be found in the
-companion package for this course, `statsr`.
-
-Let's load the packages.
-
-```{r load-packages, message=FALSE}
-library(statsr)
-library(dplyr)
-library(ggplot2)
-```
-
-### Data
-
-Our investigation will focus on the performance of one player: Kobe Bryant of
-the Los Angeles Lakers. His performance against the Orlando Magic in the 2009
-NBA finals earned him the title *Most Valuable Player* and many spectators
-commented on how he appeared to show a hot hand. Let's load some necessary files
-that we will need for this lab.
-
-```{r load-data}
-data(kobe_basket)
-```
-
-This data frame contains 133 observations and 6 variables, where every
-row records a shot taken by Kobe Bryant. The `shot` variable in this dataset
-indicates whether the shot was a hit (`H`) or a miss (`M`).
-
-Just looking at the string of hits and misses, it can be difficult to gauge
-whether or not it seems like Kobe was shooting with a hot hand. One way we can
-approach this is by considering the belief that hot hand shooters tend to go on
-shooting streaks. For this lab, we define the length of a shooting streak to be
-the *number of consecutive baskets made until a miss occurs*.
-
-For example, in Game 1 Kobe had the following sequence of hits and misses from
-his nine shot attempts in the first quarter:
-
-\[ \textrm{H M | M | H H M | M | M | M} \]
-
-You can verify this by viewing the first 8 rows of the data in the data viewer.
-
-Within the nine shot attempts, there are six streaks, which are separated by a
-"|" above. Their lengths are one, zero, two, zero, zero, zero (in order of
-occurrence).
-
-1. Fill in the blank: A streak length of 1 means one \_\_\_ followed by one miss.
-
-- hit
-- miss
-
-
-
-2. Fill in the blank: A streak length of 0 means one \_\_\_ which must occur after a
-miss that ended the preceeding streak.
-
-- hit
-- miss
-
-
-Counting streak lengths manually for all 133 shots would get tedious, so we'll
-use the custom function `calc_streak` to calculate them, and store the results
-in a data frame called `kobe_streak` as the `length` variable.
-
-```{r calc-streak-kobe}
-kobe_streak <- calc_streak(kobe_basket$shot)
-```
-
-We can then take a look at the distribution of these streak lengths.
-
-```{r plot-streak-kobe}
-ggplot(data = kobe_streak, aes(x = length)) +
- geom_histogram(binwidth = 1)
-```
-
-3. Which of the following is false about the distribution of Kobe's streak lengths
-from the 2009 NBA finals.
-
-- The distribution of Kobe's streaks is unimodal and right skewed.
-- The typical length of a streak is 0 since the median of the distribution is at 0.
-- The IQR of the distribution is 1.
-
- The longest streak of baskets is of length 4.
-- The shortest streak is of length 1.
-
-
-## Compared to What?
-
-We've shown that Kobe had some long shooting streaks, but are they long enough
-to support the belief that he had hot hands? What can we compare them to?
-
-To answer these questions, let's return to the idea of *independence*. Two
-processes are independent if the outcome of one process doesn't effect the outcome
-of the second. If each shot that a player takes is an independent process,
-having made or missed your first shot will not affect the probability that you
-will make or miss your second shot.
-
-A shooter with a hot hand will have shots that are *not* independent of one
-another. Specifically, if the shooter makes his first shot, the hot hand model
-says he will have a *higher* probability of making his second shot.
-
-Let's suppose for a moment that the hot hand model is valid for Kobe. During his
-career, the percentage of time Kobe makes a basket (i.e. his shooting
-percentage) is about 45%, or in probability notation,
-
-\[ P(\textrm{shot 1 = H}) = 0.45 \]
-
-If he makes the first shot and has a hot hand (*not* independent shots), then
-the probability that he makes his second shot would go up to, let's say, 60%,
-
-\[ P(\textrm{shot 2 = H} \, | \, \textrm{shot 1 = H}) = 0.60 \]
-
-As a result of these increased probabilites, you'd expect Kobe to have longer
-streaks. Compare this to the skeptical perspective where Kobe does *not* have a
-hot hand, where each shot is independent of the next. If he hit his first shot,
-the probability that he makes the second is still 0.45.
-
-\[ P(\textrm{shot 2 = H} \, | \, \textrm{shot 1 = H}) = 0.45 \]
-
-In other words, making the first shot did nothing to effect the probability that
-he'd make his second shot. If Kobe's shots are independent, then he'd have the
-same probability of hitting every shot regardless of his past shots: 45%.
-
-Now that we've phrased the situation in terms of independent shots, let's return
-to the question: how do we tell if Kobe's shooting streaks are long enough to
-indicate that he has hot hands? We can compare his streak lengths to someone
-without hot hands: an independent shooter.
-
-## Simulations in R
-
-While we don't have any data from a shooter we know to have independent shots,
-that sort of data is very easy to simulate in R. In a simulation, you set the
-ground rules of a random process and then the computer uses random numbers to
-generate an outcome that adheres to those rules. As a simple example, you can
-simulate flipping a fair coin with the following.
-
-```{r head-tail}
-coin_outcomes <- c("heads", "tails")
-sample(coin_outcomes, size = 1, replace = TRUE)
-```
-
-The vector `outcomes` can be thought of as a hat with two slips of paper in it:
-one slip says `heads` and the other says `tails`. The function `sample` draws
-one slip from the hat and tells us if it was a head or a tail.
-
-Run the second command listed above several times. Just like when flipping a
-coin, sometimes you'll get a heads, sometimes you'll get a tails, but in the
-long run, you'd expect to get roughly equal numbers of each.
-
-If you wanted to simulate flipping a fair coin 100 times, you could either run
-the function 100 times or, more simply, adjust the `size` argument, which
-governs how many samples to draw (the `replace = TRUE` argument indicates we put
-the slip of paper back in the hat before drawing again). Save the resulting
-vector of heads and tails in a new object called `sim_fair_coin`.
-
-```{r sim-fair-coin}
-sim_fair_coin <- sample(coin_outcomes, size = 100, replace = TRUE)
-```
-
-To view the results of this simulation, type the name of the object and then use
-`table` to count up the number of heads and tails.
-
-```{r table-sim-fair-coin}
-sim_fair_coin
-table(sim_fair_coin)
-```
-
-Since there are only two elements in `outcomes`, the probability that we "flip"
-a coin and it lands heads is 0.5. Say we're trying to simulate an unfair coin
-that we know only lands heads 20% of the time. We can adjust for this by adding
-an argument called `prob`, which provides a vector of two probability weights.
-
-```{r sim-unfair-coin}
-sim_unfair_coin <- sample(coin_outcomes, size = 100, replace = TRUE,
- prob = c(0.2, 0.8))
-```
-
-`prob = c(0.2, 0.8)` indicates that for the two elements in the `outcomes` vector,
-we want to select the first one, `heads`, with probability 0.2 and the second
-one, `tails` with probability 0.8. Another way of thinking about this is to
-think of the outcome space as a bag of 10 chips, where 2 chips are labeled
-"head" and 8 chips "tail". Therefore at each draw, the probability of drawing a
-chip that says "head"" is 20%, and "tail" is 80%.
-
-
-**Exercise**: In your simulation of flipping the unfair coin 100 times, how many flips came up heads?
-
-
-In a sense, we've shrunken the size of the slip of paper that says "heads",
-making it less likely to be drawn and we've increased the size of the slip of
-paper saying "tails", making it more likely to be drawn. When we simulated the
-fair coin, both slips of paper were the same size. This happens by default if
-you don't provide a `prob` argument; all elements in the `outcomes` vector have
-an equal probability of being drawn.
-
-If you want to learn more about `sample` or any other function, recall that you
-can always check out its help file with `?sample`.
-
-
-## Simulating the Independent Shooter
-
-Simulating a basketball player who has independent shots uses the same mechanism
-that we use to simulate a coin flip. To simulate a single shot from an
-independent shooter with a shooting percentage of 50% we type,
-
-```{r sim-basket}
-shot_outcomes <- c("H", "M")
-sim_basket <- sample(shot_outcomes, size = 1, replace = TRUE)
-```
-
-To make a valid comparison between Kobe and our simulated independent shooter,
-we need to align both their shooting percentage and the number of attempted shots.
-
-
-
-**Exercise**: What change needs to be made to the `sample` function so that it reflects a shooting percentage of 45%? Make this adjustment, then run a simulation to sample 133 shots. Assign the output of this simulation to a new object called `sim_basket`.
-
-```{r}
-# type your code for the Exercise here, and Knit
-sim_basket <- sample(shot_outcomes, size = 133, replace = TRUE, prob = c(.45, 1-.45))
-
-table(sim_basket)
-```
-
-
-Note that we've named the new vector `sim_basket`, the same name that we gave to
-the previous vector reflecting a shooting percentage of 50%. In this situation,
-R overwrites the old object with the new one, so always make sure that you don't
-need the information in an old vector before reassigning its name.
-
-With the results of the simulation saved as `sim_basket`, we have the data
-necessary to compare Kobe to our independent shooter.
-
-Both data sets represent the results of 133 shot attempts, each with the same
-shooting percentage of 45%. We know that our simulated data is from a shooter
-that has independent shots. That is, we know the simulated shooter does not have
-a hot hand.
-
-### Comparing Kobe Bryant to the Independent Shooter
-
-
-**Exercise**: Using `calc_streak`, compute the streak lengths of `sim_basket`, and save the results in a data frame called `sim_streak`. Note that since the `sim_streak` object is just a vector and not a variable in a data frame, we don't need to first select it from a data frame like we did earlier when we calculated the streak lengths for Kobe's shots.
-
-```{r sim-streak-lengths}
-# type your code for the Exercise here, and Knit
-sim_streak <- calc_streak(sim_basket)
-```
-
-
-**Exercise**: Make a plot of the distribution of simulated streak lengths of the independent shooter. What is the typical streak length for this simulated independent shooter with a 45% shooting percentage? How long is the player's longest streak of baskets in 133 shots?
-
-```{r plot-sim-streaks}
-# type your code for the Exercise here, and Knit
-ggplot(sim_streak, aes(x = length)) +
- geom_histogram(binwidth = 1)
-
-sim_streak %>%
- summarise(max(length))
-```
-
-4. If you were to run the simulation of the independent shooter a second time, how
-would you expect its streak distribution to compare to the distribution from the
-exercise above?
-
-- Exactly the same
-- Somewhat similar
-- Totally different
-
-
-
-5. How does Kobe Bryant's distribution of streak lengths compare to the distribution
-of streak lengths for the simulated shooter? Using this comparison, do you have
-evidence that the hot hand model fits Kobe's shooting patterns?
-
-- The distributions look very similar. Therefore, there doesn't appear to be evidence for Kobe Bryant's hot hand.
-- The distributions look very similar. Therefore, there appears to be evidence for Kobe Bryant's hot hand.
-- The distributions look very different. Therefore, there doesn't appear to be evidence for Kobe Bryant's hot hand.
-- The distributions look very different. Therefore, there appears to be evidence for Kobe Bryant's hot hand.
-
-
-
-**Exercise**: What concepts from the course videos are covered in this lab? What
-concepts, if any, are not covered in the videos? Have you seen these concepts
-elsewhere, e.g. textbook, previous labs, or practice problems?
-
-
-
-This is a derivative of an [OpenIntro](https://www.openintro.org/stat/labs.php) lab, and is released under a [Attribution-NonCommercial-ShareAlike 3.0 United States](https://creativecommons.org/licenses/by-nc-sa/3.0/us/) license.
-
\ No newline at end of file
diff --git a/.Rproj.user/F65EB65E/sources/s-6CF1D576/370D4C4C-contents b/.Rproj.user/F65EB65E/sources/s-6CF1D576/370D4C4C-contents
deleted file mode 100644
index e69de29..0000000
diff --git a/.Rproj.user/F65EB65E/sources/s-6CF1D576/5F26E372-contents b/.Rproj.user/F65EB65E/sources/s-6CF1D576/5F26E372-contents
deleted file mode 100644
index e69de29..0000000
diff --git a/.Rproj.user/F65EB65E/sources/s-6CF1D576/86D16AD0 b/.Rproj.user/F65EB65E/sources/s-6CF1D576/86D16AD0
deleted file mode 100644
index a2d3f32..0000000
--- a/.Rproj.user/F65EB65E/sources/s-6CF1D576/86D16AD0
+++ /dev/null
@@ -1,33 +0,0 @@
-{
- "collab_server" : "",
- "contents" : "",
- "created" : 1569016556701.000,
- "dirty" : false,
- "encoding" : "",
- "folds" : "",
- "hash" : "0",
- "id" : "86D16AD0",
- "lastKnownWriteTime" : 2484833459456,
- "last_content_update" : 1569016556701,
- "path" : null,
- "project_path" : null,
- "properties" : {
- "cacheKey" : "8C046545",
- "caption" : "brfss_sl_al",
- "contentUrl" : "grid_resource/gridviewer.html?env=&obj=brfss_sl_al&cache_key=8C046545",
- "displayedObservations" : 229672,
- "environment" : "",
- "expression" : "brfss_sl_al",
- "object" : "brfss_sl_al",
- "preview" : 0,
- "totalObservations" : 229672,
- "variables" : 2
- },
- "read_only" : false,
- "read_only_alternatives" : [
- ],
- "relative_order" : 3,
- "source_on_save" : false,
- "source_window" : "",
- "type" : "r_dataframe"
-}
\ No newline at end of file
diff --git a/.Rproj.user/F65EB65E/sources/s-6CF1D576/86D16AD0-contents b/.Rproj.user/F65EB65E/sources/s-6CF1D576/86D16AD0-contents
deleted file mode 100644
index e69de29..0000000
diff --git a/.Rproj.user/F65EB65E/sources/s-6CF1D576/A0FB69B5 b/.Rproj.user/F65EB65E/sources/s-6CF1D576/A0FB69B5
deleted file mode 100644
index 6cba5f1..0000000
--- a/.Rproj.user/F65EB65E/sources/s-6CF1D576/A0FB69B5
+++ /dev/null
@@ -1,33 +0,0 @@
-{
- "collab_server" : "",
- "contents" : "",
- "created" : 1569016010177.000,
- "dirty" : false,
- "encoding" : "",
- "folds" : "",
- "hash" : "0",
- "id" : "A0FB69B5",
- "lastKnownWriteTime" : 2484693520344,
- "last_content_update" : 1569016010177,
- "path" : null,
- "project_path" : null,
- "properties" : {
- "cacheKey" : "B523010",
- "caption" : "brfss_clean",
- "contentUrl" : "grid_resource/gridviewer.html?env=&obj=brfss_clean&cache_key=B523010",
- "displayedObservations" : 491775,
- "environment" : "",
- "expression" : "brfss_clean",
- "object" : "brfss_clean",
- "preview" : 0,
- "totalObservations" : 491775,
- "variables" : 4
- },
- "read_only" : false,
- "read_only_alternatives" : [
- ],
- "relative_order" : 2,
- "source_on_save" : false,
- "source_window" : "",
- "type" : "r_dataframe"
-}
\ No newline at end of file
diff --git a/.Rproj.user/F65EB65E/sources/s-6CF1D576/A0FB69B5-contents b/.Rproj.user/F65EB65E/sources/s-6CF1D576/A0FB69B5-contents
deleted file mode 100644
index e69de29..0000000
diff --git a/.Rproj.user/F65EB65E/sources/s-6CF1D576/BA08E5BB b/.Rproj.user/F65EB65E/sources/s-6CF1D576/BA08E5BB
deleted file mode 100644
index 58d2115..0000000
--- a/.Rproj.user/F65EB65E/sources/s-6CF1D576/BA08E5BB
+++ /dev/null
@@ -1,25 +0,0 @@
-{
- "collab_server" : "",
- "contents" : "",
- "created" : 1569011028597.000,
- "dirty" : false,
- "encoding" : "UTF-8",
- "folds" : "",
- "hash" : "0",
- "id" : "BA08E5BB",
- "lastKnownWriteTime" : 1569018618,
- "last_content_update" : 1569018624652,
- "path" : "~/Data/OpenIntro Stats/intro_data_prob_project.Rmd",
- "project_path" : "intro_data_prob_project.Rmd",
- "properties" : {
- "cursorPosition" : "99,21",
- "scrollLine" : "71"
- },
- "read_only" : false,
- "read_only_alternatives" : [
- ],
- "relative_order" : 4,
- "source_on_save" : false,
- "source_window" : "",
- "type" : "r_markdown"
-}
\ No newline at end of file
diff --git a/.Rproj.user/F65EB65E/sources/s-6CF1D576/BA08E5BB-contents b/.Rproj.user/F65EB65E/sources/s-6CF1D576/BA08E5BB-contents
deleted file mode 100644
index 60bf5f2..0000000
--- a/.Rproj.user/F65EB65E/sources/s-6CF1D576/BA08E5BB-contents
+++ /dev/null
@@ -1,105 +0,0 @@
----
-title: "Exploring the BRFSS data"
-output:
- html_document:
- fig_height: 4
- highlight: pygments
- theme: spacelab
----
-
-## Setup
-
-### Load packages
-
-```{r load-packages, message = FALSE}
-library(ggplot2)
-library(dplyr)
-library(tidyverse)
-```
-
-### Load data
-
-```{r load-data}
-load("brfss2013.RData")
-```
-
-
-
-* * *
-
-## Part 1: Data
-
-BRFSS conducts both landline telephone and cellular telephone based surveys. In conducting the BRFSS landline telephone survey, interviewers collect data from a randomly selected adult in a household. In conducting the cellular telephone version of the BRFSS questionnaire, interviewers collect data from an adult who participates by using a cellular telephone and resides in a private residence or college housing. The telephone numbers dialed are generated using random number generation.
-
-Health characteristics estimated from the BRFSS pertain to the non-institutionalized adult population, aged 18 years or older, who reside in the US.
-
-Using this methodology helps control selection bias. However the survey is somewhat lengthy (often around 45 minutes to complete), and additionally pertains to potentaially sensitive topics which may introduce non-response bias.
-
-This is an observational study over a limited point in time.
-
-* * *
-
-## Part 2: Research questions
-The following research questions pertain to sleep amount.
-
-**Research quesion 1:**
-Is there a relationship between time slept and alcohol consumption?
-
-**Research quesion 2:**
-Is there a relationship between time slept and number of children in the household?
-
-**Research quesion 3:**
-Is there a relationship between time slept and the type of payment for work?
-
-* * *
-
-## Part 3: Exploratory data analysis
-
-Create dataset with relavent variables
-```{r}
-brfss_clean <- brfss2013 %>%
- select(sleptim1, avedrnk2, children, scntlpad)
-```
-**Research quesion 1:**
-
-```{r}
-brfss_sl_alc <- brfss_clean %>%
- select(sleptim1, avedrnk2) %>%
- drop_na()
-
-summary(brfss_sl_alc)
-
-ggplot(data = brfss_sl_alc, aes(x = sleptim1, y = avedrnk2)) +
- geom_point()
-```
-
-
-
-**Research quesion 2:**
-
-```{r}
-brfss_sl_chl <- brfss_clean %>%
- select(sleptim1, children) %>%
- drop_na()
-
-summary(brfss_sl_chl)
-
-ggplot(data = brfss_sl_chl, aes(x = children, y = sleptim1)) +
- geom_point()
-```
-
-
-
-**Research quesion 3:**
-
-```{r}
-brfss_sl_pay <- brfss_clean %>%
- select(sleptim1, scntlpad) %>%
- drop_na()
-
-summary(brfss_sl_pay)
-
-ggplot(data = brfss_sl_pay, aes(x = scntlpad, y = sleptim1)) +
- geom_boxplot()
-```
-
diff --git a/.Rproj.user/F65EB65E/sources/s-6CF1D576/lock_file b/.Rproj.user/F65EB65E/sources/s-6CF1D576/lock_file
deleted file mode 100644
index e69de29..0000000
diff --git a/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/F65EB65E6CF1D576/chunks.json b/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/F65EB65E6CF1D576/chunks.json
index 7661453..8c1abe8 100644
--- a/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/F65EB65E6CF1D576/chunks.json
+++ b/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/F65EB65E6CF1D576/chunks.json
@@ -1 +1 @@
-{"chunk_definitions":[{"chunk_id":"cnzitlqf52tib","chunk_label":"load-packages","document_id":"BA08E5BB","expansion_state":0,"options":{"engine":"r","label":"load-packages","message":false},"row":17,"row_count":1,"visible":true},{"chunk_id":"c2g7quw7eem19","chunk_label":"load-data","document_id":"BA08E5BB","expansion_state":0,"options":{"engine":"r","label":"load-data"},"row":23,"row_count":1,"visible":true},{"chunk_id":"cs7yetebuhqr9","chunk_label":"unnamed-chunk-1","document_id":"BA08E5BB","expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-11"},"row":61,"row_count":1,"visible":true},{"chunk_id":"c82pakogfn737","chunk_label":"unnamed-chunk-4","document_id":"BA08E5BB","expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-27"},"row":103,"row_count":1,"visible":true}],"doc_write_time":1569018618}
\ No newline at end of file
+{"chunk_definitions":[{"chunk_id":"cbwr2c6ltg73x","chunk_label":"load-packages","document_id":"6DC06A30","expansion_state":0,"options":{"engine":"r","label":"load-packages","message":false},"row":17,"row_count":1,"visible":true},{"chunk_id":"cf0rgtv18rg39","chunk_label":"unnamed-chunk-1","document_id":"6DC06A30","expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-2"},"row":32,"row_count":1,"visible":true}],"doc_write_time":1569053411}
\ No newline at end of file
diff --git a/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/c2g7quw7eem19/000002.csv b/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/c2g7quw7eem19/000002.csv
deleted file mode 100644
index d3c1af1..0000000
--- a/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/c2g7quw7eem19/000002.csv
+++ /dev/null
@@ -1 +0,0 @@
-"0","load(""brfss2013.RData"")"
diff --git a/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/c82pakogfn737/000002.csv b/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/c82pakogfn737/000002.csv
deleted file mode 100644
index e7fa889..0000000
--- a/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/c82pakogfn737/000002.csv
+++ /dev/null
@@ -1,36 +0,0 @@
-"0","brfss_sl_pay <- brfss_clean %>%"
-"0"," select(sleptim1, scntlpad) %>%"
-"0"," drop_na()"
-"0",""
-"0","summary(brfss_sl_pay)"
-"1",""
-"1"," sleptim1 "
-"1"," scntlpad "
-"1","
-"
-"1"," Min. : 1.000 "
-"1"," Paid by salary :10099 "
-"1","
-"
-"1"," 1st Qu.: 6.000 "
-"1"," Paid by the hour : 9089 "
-"1","
-"
-"1"," Median : 7.000 "
-"1"," Paid by the job / task: 1246 "
-"1","
-"
-"1"," Mean : 7.288 "
-"1"," Paid some other way : 1122 "
-"1","
-"
-"1"," 3rd Qu.: 8.000 "
-"1"," "
-"1","
-"
-"1"," Max. :24.000 "
-"1"," "
-"1","
-"
-"0","ggplot(data = brfss_sl_pay, aes(x = scntlpad, y = sleptim1)) +"
-"0"," geom_boxplot()"
diff --git a/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/c82pakogfn737/000003.metadata b/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/c82pakogfn737/000003.metadata
deleted file mode 100644
index f5c61eb..0000000
--- a/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/c82pakogfn737/000003.metadata
+++ /dev/null
@@ -1 +0,0 @@
-{"conditions":[],"height":432.6328800988875,"size_behavior":0,"width":700.0000000000000}
\ No newline at end of file
diff --git a/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/c82pakogfn737/000003.png b/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/c82pakogfn737/000003.png
deleted file mode 100644
index d5244fd..0000000
Binary files a/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/c82pakogfn737/000003.png and /dev/null differ
diff --git a/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/c82pakogfn737/000003.snapshot b/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/c82pakogfn737/000003.snapshot
deleted file mode 100644
index 42eb047..0000000
Binary files a/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/c82pakogfn737/000003.snapshot and /dev/null differ
diff --git a/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/cg9jvw9j4jst0/000002.csv b/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/cg9jvw9j4jst0/000002.csv
deleted file mode 100644
index 58345c9..0000000
--- a/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/cg9jvw9j4jst0/000002.csv
+++ /dev/null
@@ -1,38 +0,0 @@
-"0","brfss_sl_chl <- brfss_clean %>%"
-"0"," select(sleptim1, children) %>%"
-"0"," drop_na()"
-"0",""
-"0","summary(brfss_sl_chl)"
-"1",""
-"1"," sleptim1 "
-"1"," children "
-"1","
-"
-"1"," Min. : 1.000 "
-"1"," Min. : 0.0000 "
-"1","
-"
-"1"," 1st Qu.: 6.000 "
-"1"," 1st Qu.: 0.0000 "
-"1","
-"
-"1"," Median : 7.000 "
-"1"," Median : 0.0000 "
-"1","
-"
-"1"," Mean : 7.051 "
-"1"," Mean : 0.5202 "
-"1","
-"
-"1"," 3rd Qu.: 8.000 "
-"1"," 3rd Qu.: 1.0000 "
-"1","
-"
-"1"," Max. :103.000 "
-"1"," Max. :47.0000 "
-"1","
-"
-"0","ggplot(data = brfss_sl_chl, aes(x = children, y = sleptim1)) +"
-"0"," geom_histogram()"
-"2","Error: stat_bin() must not be used with a y aesthetic.
-"
diff --git a/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/cg9jvw9j4jst0/000003.error b/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/cg9jvw9j4jst0/000003.error
deleted file mode 100644
index 36c6ba0..0000000
--- a/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/cg9jvw9j4jst0/000003.error
+++ /dev/null
@@ -1 +0,0 @@
-{"frames":[{"character_number":0,"end_character_number":0,"end_line_number":0,"file":"","func":"(function (x, ...) \nUseMethod(\"print\"))(x)","line_number":0},{"character_number":0,"end_character_number":0,"end_line_number":0,"file":"","func":"print.ggplot(x)","line_number":0},{"character_number":0,"end_character_number":0,"end_line_number":0,"file":"","func":"ggplot_build(x)","line_number":0},{"character_number":0,"end_character_number":0,"end_line_number":0,"file":"","func":"ggplot_build.ggplot(x)","line_number":0},{"character_number":0,"end_character_number":0,"end_line_number":0,"file":"","func":"by_layer(function(l, d) l$compute_statistic(d, layout))","line_number":0},{"character_number":0,"end_character_number":0,"end_line_number":0,"file":"","func":"f(l = layers[[i]], d = data[[i]])","line_number":0},{"character_number":0,"end_character_number":0,"end_line_number":0,"file":"","func":"l$compute_statistic(d, layout)","line_number":0},{"character_number":0,"end_character_number":0,"end_line_number":0,"file":"","func":"f(..., self = self)","line_number":0},{"character_number":0,"end_character_number":0,"end_line_number":0,"file":"","func":"self$stat$setup_params(data, self$stat_params)","line_number":0},{"character_number":0,"end_character_number":0,"end_line_number":0,"file":"","func":"f(...)","line_number":0},{"character_number":0,"end_character_number":0,"end_line_number":0,"file":"","func":"stop(\"stat_bin() must not be used with a y aesthetic.\", call. = FALSE)","line_number":0}],"message":"Error: stat_bin() must not be used with a y aesthetic.\n"}
\ No newline at end of file
diff --git a/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/cg9jvw9j4jst0/000004.metadata b/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/cg9jvw9j4jst0/000004.metadata
deleted file mode 100644
index f5c61eb..0000000
--- a/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/cg9jvw9j4jst0/000004.metadata
+++ /dev/null
@@ -1 +0,0 @@
-{"conditions":[],"height":432.6328800988875,"size_behavior":0,"width":700.0000000000000}
\ No newline at end of file
diff --git a/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/cg9jvw9j4jst0/000004.png b/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/cg9jvw9j4jst0/000004.png
deleted file mode 100644
index 6848a51..0000000
Binary files a/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/cg9jvw9j4jst0/000004.png and /dev/null differ
diff --git a/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/chunks.json b/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/chunks.json
index 507cd14..aef21c9 100644
--- a/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/chunks.json
+++ b/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/chunks.json
@@ -1 +1 @@
-{"chunk_definitions":[{"chunk_id":"cnzitlqf52tib","chunk_label":"load-packages","document_id":"BA08E5BB","expansion_state":0,"options":{"engine":"r","label":"load-packages","message":false},"row":17,"row_count":1,"visible":true},{"chunk_id":"c2g7quw7eem19","chunk_label":"load-data","document_id":"BA08E5BB","expansion_state":0,"options":{"engine":"r","label":"load-data"},"row":23,"row_count":1,"visible":true},{"chunk_id":"cs7yetebuhqr9","chunk_label":"unnamed-chunk-1","document_id":"BA08E5BB","expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-11"},"row":61,"row_count":1,"visible":true},{"chunk_id":"cg9jvw9j4jst0","chunk_label":"unnamed-chunk-3","document_id":"BA08E5BB","expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-28"},"row":88,"row_count":1,"visible":true},{"chunk_id":"c82pakogfn737","chunk_label":"unnamed-chunk-4","document_id":"BA08E5BB","expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-27"},"row":103,"row_count":1,"visible":true}],"doc_write_time":1569018385}
\ No newline at end of file
+{"chunk_definitions":[{"chunk_id":"cbwr2c6ltg73x","chunk_label":"load-packages","document_id":"6DC06A30","expansion_state":0,"options":{"engine":"r","label":"load-packages","message":false},"row":15,"row_count":1,"visible":true},{"chunk_id":"cf0rgtv18rg39","chunk_label":"load-data","document_id":"6DC06A30","expansion_state":0,"options":{"engine":"r","label":"unnamed-chunk-2"},"row":30,"row_count":1,"visible":true}],"doc_write_time":1569366200}
\ No newline at end of file
diff --git a/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/cnzitlqf52tib/000002.csv b/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/cnzitlqf52tib/000002.csv
deleted file mode 100644
index fda055e..0000000
--- a/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/cnzitlqf52tib/000002.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-"0","library(ggplot2)"
-"0","library(dplyr)"
-"0","library(tidyverse)"
diff --git a/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/cs7yetebuhqr9/000004.csv b/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/cs7yetebuhqr9/000004.csv
deleted file mode 100644
index f1a2345..0000000
--- a/.Rproj.user/shared/notebooks/30DB09BC-intro_data_prob_project/1/s/cs7yetebuhqr9/000004.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-"0","brfss_clean <- brfss2013 %>%"
-"0"," select(sleptim1, avedrnk2, children, scntlpad)"
diff --git a/intro_data_prob_project.Rmd b/intro_data_prob_project.Rmd
index 60bf5f2..06875b9 100644
--- a/intro_data_prob_project.Rmd
+++ b/intro_data_prob_project.Rmd
@@ -1,5 +1,5 @@
---
-title: "Exploring the BRFSS data"
+title: "Exploring the BRFSS 2013 data"
output:
html_document:
fig_height: 4
@@ -12,15 +12,22 @@ output:
### Load packages
```{r load-packages, message = FALSE}
-library(ggplot2)
-library(dplyr)
library(tidyverse)
```
### Load data
+Get data file if it doesn't exist
```{r load-data}
-load("brfss2013.RData")
+url <- "https://d18ky98rnyall9.cloudfront.net/_384b2d9eda4b29131fb681b243a7767d_brfss2013.RData"
+
+ destfile <- "./brfss2013.RData"
+ if (!file.exists(destfile)) {
+ setInternet2(TRUE)
+ download.file(url ,destfile,method="auto")
+ }
+ load(destfile)
+
```
@@ -40,7 +47,14 @@ This is an observational study over a limited point in time.
* * *
## Part 2: Research questions
-The following research questions pertain to sleep amount.
+
+Sleep is a critical indicator of overall health and well-being. The exact sleep needs for individuals vary, however the National Sleep Foundation has provided the following sleep duration recommendations.[1](#myfootnote1)
+
+- Younger adults (18-25): 7-9 hours
+- Adults (26-64): 7-9 hours
+- Older adults (65+): 7-8 hours
+
+Using the BRFSS 2013 dataset we will examine potential relations between sleep duration and a variatey of health and social variables.
**Research quesion 1:**
Is there a relationship between time slept and alcohol consumption?
@@ -51,6 +65,9 @@ Is there a relationship between time slept and number of children in the househo
**Research quesion 3:**
Is there a relationship between time slept and the type of payment for work?
+**Research question 4:**
+What percentage of the population meets suggested sleep guidlines?
+
* * *
## Part 3: Exploratory data analysis
@@ -69,8 +86,9 @@ brfss_sl_alc <- brfss_clean %>%
summary(brfss_sl_alc)
-ggplot(data = brfss_sl_alc, aes(x = sleptim1, y = avedrnk2)) +
- geom_point()
+p_alc <- ggplot(data = brfss_sl_alc, aes(x = sleptim1, y = avedrnk2))
+
+p_alc + geom_point() + geom_smooth()
```
@@ -78,14 +96,58 @@ ggplot(data = brfss_sl_alc, aes(x = sleptim1, y = avedrnk2)) +
**Research quesion 2:**
```{r}
-brfss_sl_chl <- brfss_clean %>%
- select(sleptim1, children) %>%
- drop_na()
+#prepare dataset, remove obvious outliers and na
+brfss_sl_chl <- brfss2013 %>%
+ select(sleptim1, children, X_ageg5yr) %>%
+ drop_na() %>%
+ filter(children < 20 & sleptim1 < 24)
+
+#look at the data with tables and numerical summary
+head(brfss_sl_chl)
summary(brfss_sl_chl)
-ggplot(data = brfss_sl_chl, aes(x = children, y = sleptim1)) +
- geom_point()
+brfss_sl_chl %>%
+ group_by(X_ageg5yr) %>%
+ summarise(mean = mean(children), median = median(children))
+
+#look at the data visually
+p_chl <- ggplot(data = brfss_sl_chl, aes(x = children, y = sleptim1))
+
+p_chl + geom_point()
+
+p_chl + geom_jitter() + coord_flip()
+
+ggplot(data = brfss_sl_chl, aes(x = sleptim1, y = children)) +
+ geom_point()
+
+ag_ch <- ggplot(data = brfss_sl_chl, aes(x = X_ageg5yr, y = children))
+
+ag_ch + geom_boxplot() + coord_flip()
+
+a_c <- brfss_sl_chl %>%
+ group_by(X_ageg5yr) %>%
+ mutate(outlier_high = children > quantile(children, .75) + 1.50*IQR(children), outlier_low = children < quantile(children, .25) - 1.50*IQR(children)) %>%
+ ungroup()
+
+a_c <- a_c %>%
+ group_by(X_ageg5yr) %>%
+ mutate(outlier = children > median(children) + IQR(children) * 1.5) %>%
+ ungroup()
+
+a_c <- a_c %>%
+ mutate(outlier_color = case_when(outlier_high ~ "red", outlier_low ~ "steelblue"))
+
+head(a_c)
+
+
+ggplot(data = a_c, aes(x = X_ageg5yr, y = children)) +
+ geom_boxplot(outlier.shape = NA) + coord_flip() + geom_jitter(color = a_c$outlier_color, width = .2)
+
+ggplot(data = a_c, aes(x = X_ageg5yr, y = children)) +
+ geom_boxplot(outlier.shape = NA) + coord_flip() +
+ geom_jitter(data = filter(a_c, outlier ==T), width = .2)
+
```
@@ -97,9 +159,20 @@ brfss_sl_pay <- brfss_clean %>%
select(sleptim1, scntlpad) %>%
drop_na()
-summary(brfss_sl_pay)
+brfss_sl_pay %>%
+ group_by(scntlpad) %>%
+ summarise(median(sleptim1), mean(sleptim1), IQR(sleptim1))
+
+p_pay <- ggplot(data = brfss_sl_pay, aes(x = scntlpad, y = sleptim1))
+
+p_pay + geom_boxplot()
+```
+
+**Research question 4:**
+
+```{r}
-ggplot(data = brfss_sl_pay, aes(x = scntlpad, y = sleptim1)) +
- geom_boxplot()
```
+References:
+1: National Sleep Foundation's sleep time duration recommendations: methodology and results summary. Sleep Health. 2015 Mar;1(1):40-43. doi: 10.1016/j.sleh.2014.12.010. Epub 2015 Jan 8.
\ No newline at end of file
diff --git a/intro_data_prob_project.html b/intro_data_prob_project.html
index 4650901..f5cea50 100644
--- a/intro_data_prob_project.html
+++ b/intro_data_prob_project.html
@@ -12,7 +12,7 @@
-Exploring the BRFSS data
+Exploring the BRFSS 2013 data