# load the necessary libraries
library(tidyverse)
library(dplyr)
library(stringr)
<- read_csv("https://raw.githubusercontent.com/deepbas/statdatasets/main/energy.csv",
energy col_type = cols(
.default = col_double(),
Timestamp = col_datetime(format = ""),
dayWeek = col_factor(levels=c("Mon","Tues","Wed","Thurs","Fri","Sat","Sun"))
))
Class Activity 15
Group Activity 1
a. if
and for
loop
Write a for loop to iterate over the columns of the ‘energy’ dataset and print the names of all columns containing the string “House”. Please use the function colnames()
to extract the column names and store the results in a list.
Click for answer
Answer:
# Create an empty list to store the column names
<- list()
house_columns
# Iterate over the columns of the 'energy' dataset
for (i in seq_along(colnames(energy))) {
<- colnames(energy)[i]
col_name
# Check if the column name contains the string "House"
if (str_detect(col_name, "House")) {
# Add the column name to the list
length(house_columns) + 1]] <- col_name
house_columns[[
}
}
# Print the list of house columns
<- unlist(house_columns)
house_columns house_columns
[1] "Allen_House" "Alumni_Guest_House/Johnson_House"
[3] "Benton_House" "Berg_House"
[5] "Bird_House" "Chaney_House"
[7] "Clader_House" "Dacie_Moses_House"
[9] "Douglas_House" "Farm_House"
[11] "Geffert_House" "Headley_House"
[13] "Henrickson_House" "Henry_House"
[15] "Hill_House" "Hilton_House"
[17] "Hoppin_House_(Alumni)" "Huntington_House"
[19] "Jewett_House" "Jones_House"
[21] "Nutting_House" "Page_House_West"
[23] "Parish_House_" "Parr_House"
[25] "Pollock_House" "Prentice_House"
[27] "Rayment_House" "Rice_House"
[29] "Rogers_House" "Ryberg_House"
[31] "Seccombe_House" "Sperry_House"
[33] "Stimson_House" "Strong_House"
[35] "Whittier_House" "Wilson_House"
b. for
loop and mean
Using a for loop, calculate and print the mean energy consumption of houses you identified in part a.
Click for answer
Answer:
# Assuming the house_columns vector from the previous step
# Create an empty numeric vector to store the mean energy consumption
<- numeric()
mean_energy_consumption
# Iterate over the house_columns vector
for (house_col in house_columns) {
# Calculate the mean energy consumption for the current house column
<- mean(energy[[house_col]], na.rm = TRUE)
mean_val
# Add the mean energy consumption to the vector
<- c(mean_energy_consumption, mean_val)
mean_energy_consumption
}
# Combine the house names and mean energy consumption into a dataframe
<- bind_cols(House = house_columns, MeanEnergyConsumption = mean_energy_consumption)
house_mean_energy
# Print the dataframe
%>% knitr::kable() house_mean_energy
House | MeanEnergyConsumption |
---|---|
Allen_House | 0.9821865 |
Alumni_Guest_House/Johnson_House | 20.2631152 |
Benton_House | 1.8849290 |
Berg_House | 1.3174340 |
Bird_House | 2.3222680 |
Chaney_House | 1.0715123 |
Clader_House | 0.4646776 |
Dacie_Moses_House | 1.2776465 |
Douglas_House | 0.7219500 |
Farm_House | 5.0599020 |
Geffert_House | 0.9360400 |
Headley_House | 1.4555605 |
Henrickson_House | 3.4407858 |
Henry_House | 1.3639619 |
Hill_House | 1.4735884 |
Hilton_House | 0.4248030 |
Hoppin_House_(Alumni) | 1.8760474 |
Huntington_House | 1.2395238 |
Jewett_House | 0.8987697 |
Jones_House | 0.8680271 |
Nutting_House | 4.3967234 |
Page_House_West | 1.8923490 |
Parish_House_ | 12.6793378 |
Parr_House | 9.7210618 |
Pollock_House | 1.1831426 |
Prentice_House | 0.9089497 |
Rayment_House | 0.8005664 |
Rice_House | 1.1568457 |
Rogers_House | 0.5634289 |
Ryberg_House | 1.0729988 |
Seccombe_House | 2.6874199 |
Sperry_House | 0.7052983 |
Stimson_House | 2.0659904 |
Strong_House | 2.5410595 |
Whittier_House | 1.0424369 |
Wilson_House | 1.0435830 |
Group Activity 2
1. Make a data frame of quantiles for energy
buildings in columns 9-90 (you will need na.rm = TRUE
)
Click for answer
Answer:
<- energy %>% select(9:90) %>%
qdf map_dfc(quantile, probs = seq(.1,.9,.1), na.rm = TRUE)
qdf
# A tibble: 9 × 82
`100_Nevada_Street` `104_Maple_St.` `106_Winona_St.` Allen_House
<dbl> <dbl> <dbl> <dbl>
1 0.0972 1.04 0.601 0.756
2 0.120 1.11 0.632 0.781
3 0.183 1.18 0.673 0.941
4 0.461 1.18 0.681 0.983
5 0.710 1.42 0.692 1.00
6 0.795 1.42 0.865 1.01
7 0.915 1.54 1.10 1.07
8 1.11 1.56 1.20 1.07
9 1.24 1.67 1.27 1.25
# ℹ 78 more variables: `Alumni_Guest_House/Johnson_House` <dbl>,
# Arboretum_Office <dbl>, Art_Studios <dbl>, Benton_House <dbl>,
# Berg_House <dbl>, Bird_House <dbl>, Boliou_Memorial_Art_Bldg. <dbl>,
# Burton_Hall <dbl>, `Cassat_Hall_/_James_Hall` <dbl>,
# `Center_for_Mathematics_&_Computing` <dbl>, Chaney_House <dbl>,
# Clader_House <dbl>, College_Warehouse <dbl>, Cowling_Gym <dbl>,
# Dacie_Moses_House <dbl>, Davis_Hall <dbl>, Douglas_House <dbl>, …
2. Add a variable to identify the quantile
Click for answer
Answer:
<- energy %>% select(9:90) %>%
qdf map_dfc(quantile, probs = seq(.1,.9,.1), na.rm = TRUE) %>%
mutate(stat = str_c("quantile_", seq(10,90,10)))
qdf
# A tibble: 9 × 83
`100_Nevada_Street` `104_Maple_St.` `106_Winona_St.` Allen_House
<dbl> <dbl> <dbl> <dbl>
1 0.0972 1.04 0.601 0.756
2 0.120 1.11 0.632 0.781
3 0.183 1.18 0.673 0.941
4 0.461 1.18 0.681 0.983
5 0.710 1.42 0.692 1.00
6 0.795 1.42 0.865 1.01
7 0.915 1.54 1.10 1.07
8 1.11 1.56 1.20 1.07
9 1.24 1.67 1.27 1.25
# ℹ 79 more variables: `Alumni_Guest_House/Johnson_House` <dbl>,
# Arboretum_Office <dbl>, Art_Studios <dbl>, Benton_House <dbl>,
# Berg_House <dbl>, Bird_House <dbl>, Boliou_Memorial_Art_Bldg. <dbl>,
# Burton_Hall <dbl>, `Cassat_Hall_/_James_Hall` <dbl>,
# `Center_for_Mathematics_&_Computing` <dbl>, Chaney_House <dbl>,
# Clader_House <dbl>, College_Warehouse <dbl>, Cowling_Gym <dbl>,
# Dacie_Moses_House <dbl>, Davis_Hall <dbl>, Douglas_House <dbl>, …
3. Reshape the data frame to make variables stat
(describing the quantile), building
and quant
(quantile value)
Click for answer
Answer:
<- energy %>% select(9:90) %>%
qdf map_dfc(quantile, probs = seq(.1,.9,.1), na.rm = TRUE) %>%
mutate(stat = str_c("quantile_", seq(10,90,10))) %>%
pivot_longer(names_to = "building", values_to = "quantiles", 1:82)
qdf
# A tibble: 738 × 3
stat building quantiles
<chr> <chr> <dbl>
1 quantile_10 100_Nevada_Street 0.0972
2 quantile_10 104_Maple_St. 1.04
3 quantile_10 106_Winona_St. 0.601
4 quantile_10 Allen_House 0.756
5 quantile_10 Alumni_Guest_House/Johnson_House 17.0
6 quantile_10 Arboretum_Office 0.13
7 quantile_10 Art_Studios 0.23
8 quantile_10 Benton_House 1.59
9 quantile_10 Berg_House 1.06
10 quantile_10 Bird_House 1.42
# ℹ 728 more rows
4. Plot the KWH value for each quantile on the x-axis for the buildings Sayles-Hill
, Language_&_Dining_Center
, Olin_Hall_of_Science
Click for answer
Answer:
%>%
qdf filter(building %in% c("Sayles-Hill" ,"Language_&_Dining_Center", "Olin_Hall_of_Science")) %>%
ggplot(aes(x=quantiles, y=parse_number(stat), color=building)) +
geom_point() +
geom_line(aes(group=building)) +
labs(y="Percentile (%)",x="KWH") +
scale_y_continuous(breaks=seq(10,90,by=10))