Class Activity 15

# load the necessary libraries
library(tidyverse)
library(dplyr)
library(stringr)


energy <- read_csv("https://raw.githubusercontent.com/deepbas/statdatasets/main/energy.csv",
                    col_type = cols(
                     .default = col_double(), 
                      Timestamp = col_datetime(format = ""),
                      dayWeek = col_factor(levels=c("Mon","Tues","Wed","Thurs","Fri","Sat","Sun"))
                     ))

Group Activity 1

a. if and for loop

Write a for loop to iterate over the columns of the ‘energy’ dataset and print the names of all columns containing the string “House”. Please use the function colnames() to extract the column names and store the results in a list.

Click for answer

Answer:

# Create an empty list to store the column names
house_columns <- list()

# Iterate over the columns of the 'energy' dataset
for (i in seq_along(colnames(energy))) {
  col_name <- colnames(energy)[i]
  
  # Check if the column name contains the string "House"
  if (str_detect(col_name, "House")) {
    # Add the column name to the list
    house_columns[[length(house_columns) + 1]] <- col_name
  }
}

# Print the list of house columns
house_columns <- unlist(house_columns)
house_columns
 [1] "Allen_House"                      "Alumni_Guest_House/Johnson_House"
 [3] "Benton_House"                     "Berg_House"                      
 [5] "Bird_House"                       "Chaney_House"                    
 [7] "Clader_House"                     "Dacie_Moses_House"               
 [9] "Douglas_House"                    "Farm_House"                      
[11] "Geffert_House"                    "Headley_House"                   
[13] "Henrickson_House"                 "Henry_House"                     
[15] "Hill_House"                       "Hilton_House"                    
[17] "Hoppin_House_(Alumni)"            "Huntington_House"                
[19] "Jewett_House"                     "Jones_House"                     
[21] "Nutting_House"                    "Page_House_West"                 
[23] "Parish_House_"                    "Parr_House"                      
[25] "Pollock_House"                    "Prentice_House"                  
[27] "Rayment_House"                    "Rice_House"                      
[29] "Rogers_House"                     "Ryberg_House"                    
[31] "Seccombe_House"                   "Sperry_House"                    
[33] "Stimson_House"                    "Strong_House"                    
[35] "Whittier_House"                   "Wilson_House"                    

b. for loop and mean

Using a for loop, calculate and print the mean energy consumption of houses you identified in part a.

Click for answer

Answer:

# Assuming the house_columns vector from the previous step

# Create an empty numeric vector to store the mean energy consumption
mean_energy_consumption <- numeric()

# Iterate over the house_columns vector
for (house_col in house_columns) {
  # Calculate the mean energy consumption for the current house column
  mean_val <- mean(energy[[house_col]], na.rm = TRUE)
  
  # Add the mean energy consumption to the vector
  mean_energy_consumption <- c(mean_energy_consumption, mean_val)
}

# Combine the house names and mean energy consumption into a dataframe
house_mean_energy <- bind_cols(House = house_columns, MeanEnergyConsumption = mean_energy_consumption)

# Print the dataframe
house_mean_energy %>%  knitr::kable()
House MeanEnergyConsumption
Allen_House 0.9821865
Alumni_Guest_House/Johnson_House 20.2631152
Benton_House 1.8849290
Berg_House 1.3174340
Bird_House 2.3222680
Chaney_House 1.0715123
Clader_House 0.4646776
Dacie_Moses_House 1.2776465
Douglas_House 0.7219500
Farm_House 5.0599020
Geffert_House 0.9360400
Headley_House 1.4555605
Henrickson_House 3.4407858
Henry_House 1.3639619
Hill_House 1.4735884
Hilton_House 0.4248030
Hoppin_House_(Alumni) 1.8760474
Huntington_House 1.2395238
Jewett_House 0.8987697
Jones_House 0.8680271
Nutting_House 4.3967234
Page_House_West 1.8923490
Parish_House_ 12.6793378
Parr_House 9.7210618
Pollock_House 1.1831426
Prentice_House 0.9089497
Rayment_House 0.8005664
Rice_House 1.1568457
Rogers_House 0.5634289
Ryberg_House 1.0729988
Seccombe_House 2.6874199
Sperry_House 0.7052983
Stimson_House 2.0659904
Strong_House 2.5410595
Whittier_House 1.0424369
Wilson_House 1.0435830

Group Activity 2

1. Make a data frame of quantiles for energy buildings in columns 9-90 (you will need na.rm = TRUE)

Click for answer

Answer:

qdf <- energy %>% select(9:90) %>%
  map_dfc(quantile, probs = seq(.1,.9,.1), na.rm = TRUE)
qdf
# A tibble: 9 × 82
  `100_Nevada_Street` `104_Maple_St.` `106_Winona_St.` Allen_House
                <dbl>           <dbl>            <dbl>       <dbl>
1              0.0972            1.04            0.601       0.756
2              0.120             1.11            0.632       0.781
3              0.183             1.18            0.673       0.941
4              0.461             1.18            0.681       0.983
5              0.710             1.42            0.692       1.00 
6              0.795             1.42            0.865       1.01 
7              0.915             1.54            1.10        1.07 
8              1.11              1.56            1.20        1.07 
9              1.24              1.67            1.27        1.25 
# ℹ 78 more variables: `Alumni_Guest_House/Johnson_House` <dbl>,
#   Arboretum_Office <dbl>, Art_Studios <dbl>, Benton_House <dbl>,
#   Berg_House <dbl>, Bird_House <dbl>, Boliou_Memorial_Art_Bldg. <dbl>,
#   Burton_Hall <dbl>, `Cassat_Hall_/_James_Hall` <dbl>,
#   `Center_for_Mathematics_&_Computing` <dbl>, Chaney_House <dbl>,
#   Clader_House <dbl>, College_Warehouse <dbl>, Cowling_Gym <dbl>,
#   Dacie_Moses_House <dbl>, Davis_Hall <dbl>, Douglas_House <dbl>, …

2. Add a variable to identify the quantile

Click for answer

Answer:

qdf <- energy %>% select(9:90) %>%
  map_dfc(quantile, probs = seq(.1,.9,.1), na.rm = TRUE) %>%  
  mutate(stat = str_c("quantile_", seq(10,90,10))) 
qdf
# A tibble: 9 × 83
  `100_Nevada_Street` `104_Maple_St.` `106_Winona_St.` Allen_House
                <dbl>           <dbl>            <dbl>       <dbl>
1              0.0972            1.04            0.601       0.756
2              0.120             1.11            0.632       0.781
3              0.183             1.18            0.673       0.941
4              0.461             1.18            0.681       0.983
5              0.710             1.42            0.692       1.00 
6              0.795             1.42            0.865       1.01 
7              0.915             1.54            1.10        1.07 
8              1.11              1.56            1.20        1.07 
9              1.24              1.67            1.27        1.25 
# ℹ 79 more variables: `Alumni_Guest_House/Johnson_House` <dbl>,
#   Arboretum_Office <dbl>, Art_Studios <dbl>, Benton_House <dbl>,
#   Berg_House <dbl>, Bird_House <dbl>, Boliou_Memorial_Art_Bldg. <dbl>,
#   Burton_Hall <dbl>, `Cassat_Hall_/_James_Hall` <dbl>,
#   `Center_for_Mathematics_&_Computing` <dbl>, Chaney_House <dbl>,
#   Clader_House <dbl>, College_Warehouse <dbl>, Cowling_Gym <dbl>,
#   Dacie_Moses_House <dbl>, Davis_Hall <dbl>, Douglas_House <dbl>, …

3. Reshape the data frame to make variables stat (describing the quantile), building and quant (quantile value)

Click for answer

Answer:

qdf <- energy %>% select(9:90) %>%
  map_dfc(quantile, probs = seq(.1,.9,.1), na.rm = TRUE) %>%  
  mutate(stat = str_c("quantile_", seq(10,90,10))) %>%   
  pivot_longer(names_to = "building", values_to = "quantiles", 1:82)
qdf
# A tibble: 738 × 3
   stat        building                         quantiles
   <chr>       <chr>                                <dbl>
 1 quantile_10 100_Nevada_Street                   0.0972
 2 quantile_10 104_Maple_St.                       1.04  
 3 quantile_10 106_Winona_St.                      0.601 
 4 quantile_10 Allen_House                         0.756 
 5 quantile_10 Alumni_Guest_House/Johnson_House   17.0   
 6 quantile_10 Arboretum_Office                    0.13  
 7 quantile_10 Art_Studios                         0.23  
 8 quantile_10 Benton_House                        1.59  
 9 quantile_10 Berg_House                          1.06  
10 quantile_10 Bird_House                          1.42  
# ℹ 728 more rows

4. Plot the KWH value for each quantile on the x-axis for the buildings Sayles-Hill, Language_&_Dining_Center, Olin_Hall_of_Science

Click for answer

Answer:

qdf %>% 
  filter(building %in% c("Sayles-Hill" ,"Language_&_Dining_Center", "Olin_Hall_of_Science"))  %>%  
  ggplot(aes(x=quantiles, y=parse_number(stat), color=building)) +
  geom_point() + 
  geom_line(aes(group=building)) + 
  labs(y="Percentile (%)",x="KWH") +
  scale_y_continuous(breaks=seq(10,90,by=10))