We got this data from http://data.hartford.gov/
correction <- read_csv("Correctional_Facility_Daily_Population_Count_By_Facility.csv")
correction <- correction %>%
rename(name = `Facility Name`) %>%
rename(accused = `Accused/Other Status Count`) %>%
rename(sentenced = `Sentenced Status Count`) %>%
rename(total = `Total Facility Population Count`) %>%
mutate(Date = mdy(Date))
correction %>%
df_stats(accused~name, mean) %>%
arrange(desc(mean_accused))
## name mean_accused
## 1 HARTFORD CC 760.17326233
## 2 BRIDGEPORT CC 603.92241300
## 3 NEW HAVEN CC 576.88908273
## 4 CORRIGAN CI 398.67181333
## 5 YORK CI 349.01013125
## 6 WALKER RC 198.43179411
## 7 OSBORN CI 159.13378019
## 8 NIANTIC 143.98761714
## 9 GARNER 137.70524297
## 10 MANSON YI 121.23294463
## 11 MORGAN ST 109.32900081
## 12 NORTHERN CI 89.11250146
## 13 MACDOUGALL 83.53118399
## 14 UNION AVE 72.13440551
## 15 JENNINGS RD DC 71.65614237
## 16 RADGOWSKI 41.28059866
## 17 CHESHIRE CI 34.73041961
## 18 145 27.50000000
## 19 WILLARD-CYBULSKI CI 26.43879626
## 20 LITCHFIELD 25.29713952
## 21 ROBINSON CI 22.95712056
## 22 BROOKLYN CI 18.61325865
## 23 NIANTIC ANNEX 13.29213483
## 24 GATES CI 9.84133511
## 25 ENFIELD CI 9.67642140
## 26 WEBSTER CI 3.65321546
## 27 BERGIN CI 3.62180669
## 28 EDDY/DWI UNIT 0.50785855
## 29 HARTELL/DWI UNIT 0.39505155
## 30 CYBULSKI 0.06140879
## 31 WILLARD CI 0.01356050
## 32 GREENSVILLE 0.00000000
## 33 HARTELL/F 0.00000000
## 34 MALONEY CI 0.00000000
## 35 W SUB ABUSE 0.00000000
## 36 WALLENS RIDGE 0.00000000
hartford <- correction %>%
filter(name == "HARTFORD CC")
gf_point(accused~Date, data=hartford)
gf_line(accused~Date, data=hartford)
I wonder about how Osborn and Enfield look. Let’s filter it.
correction %>%
filter(name == "OSBORN CI" | name == "ENFIELD CI")
## # A tibble: 21,655 x 5
## Date name accused sentenced total
## <date> <chr> <int> <int> <int>
## 1 1988-10-01 ENFIELD CI 0 447 447
## 2 1988-10-01 OSBORN CI 12 1407 1419
## 3 1988-10-02 ENFIELD CI 0 447 447
## 4 1988-10-02 OSBORN CI 12 1407 1419
## 5 1988-10-03 ENFIELD CI 0 447 447
## 6 1988-10-03 OSBORN CI 12 1407 1419
## 7 1988-10-04 ENFIELD CI 0 470 470
## 8 1988-10-04 OSBORN CI 12 1399 1411
## 9 1988-10-05 ENFIELD CI 0 470 470
## 10 1988-10-05 OSBORN CI 11 1400 1411
## # ... with 21,645 more rows
(we copied this code from Danny’s resampling lesson so we didn’t have to start from scratch!)
Trials <-
do(2000) * {
correction %>%
sample_n(size = 50, replace=FALSE) %>%
df_stats( ~ total, mean)
}
gf_histogram(~mean_total, data=Trials)
Trials5 <-
do(2000) * {
correction %>%
sample_n(size = 5, replace=FALSE) %>%
df_stats( ~ total, mean)
}
Trials10 <-
do(2000) * {
correction %>%
sample_n(size = 10, replace=FALSE) %>%
df_stats( ~ total, mean)
}
Trials100 <-
do(2000) * {
correction %>%
sample_n(size = 100, replace=FALSE) %>%
df_stats( ~ total, mean)
}
alltrials <- data.frame(ss5 = Trials5$mean_total, ss10 = Trials10$mean_total, ss100 = Trials100$mean_total)
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:Matrix':
##
## expand
alltrials <- alltrials %>%
gather(key=which, value=mean)
gf_histogram(~mean, data=alltrials, fill = ~ which)
gf_violin(mean~which, data=alltrials)
df_stats(mean~which, data=alltrials, mean, sd)
## which mean_mean sd_mean
## 1 ss10 786.0325 130.82385
## 2 ss100 787.1333 41.07875
## 3 ss5 795.0815 188.78064
What about the means of the groups?
Trials %>%
df_stats(~mean_total, mean)
## mean_mean_total
## 1 787.365
What was our population mean?
correction %>%
df_stats(~total, mean)
## mean_total
## 1 788.0564