mydata <- tibble(
  id = 1:4, 
  grade=c("9th","10th","11th","9th")) %>%
  mutate(grade_fac = factor(grade)) 
levels(mydata$grade_fac)[1] "10th" "11th" "9th" 
STAT 220
See forcats cheatsheet and forcats vignette
fct_relevel()fct_collapse() and fct_lump()mydata <- tibble(loc = c("SW","NW","NW","NE","SE","SE"))
mydata %>% mutate(
  loc_fac = factor(loc),
  loc2 = fct_collapse(loc_fac,  # collapse levels                        
                      south = c("SW","SE"), 
                      north = c("NE","NW")), 
  loc3 = fct_lump(loc_fac, 
                  n=2,
                  other_level = "other") # most common 2 levels + other
  )# A tibble: 6 × 4
  loc   loc_fac loc2  loc3 
  <chr> <fct>   <fct> <fct>
1 SW    SW      south other
2 NW    NW      north NW   
3 NW    NW      north NW   
4 NE    NE      north other
5 SE    SE      south SE   
6 SE    SE      south SE   
fct_infreq()fct_infreq() : This function orders factor levels by their frequency in the data.
fct_rev() : Reverse the order of factor levelsfct_anon()fct_anon(): Anonymize factor levels by replacing them with unique, randomly generated character strings.

ca10-yourusername repository from Github10:00
gss_catA sample of data from the General Social Survey, a long-running US survey conducted by NORC at the University of Chicago.
# A tibble: 21,483 × 9
    year marital         age race  rincome        partyid    relig denom tvhours
   <int> <fct>         <int> <fct> <fct>          <fct>      <fct> <fct>   <int>
 1  2000 Never married    26 White $8000 to 9999  Ind,near … Prot… Sout…      12
 2  2000 Divorced         48 White $8000 to 9999  Not str r… Prot… Bapt…      NA
 3  2000 Widowed          67 White Not applicable Independe… Prot… No d…       2
 4  2000 Never married    39 White Not applicable Ind,near … Orth… Not …       4
 5  2000 Divorced         25 White Not applicable Not str d… None  Not …       1
 6  2000 Married          25 White $20000 - 24999 Strong de… Prot… Sout…      NA
 7  2000 Never married    36 White $25000 or more Not str r… Chri… Not …       3
 8  2000 Divorced         44 White $7000 to 7999  Ind,near … Prot… Luth…      NA
 9  2000 Married          44 White $25000 or more Not str d… Prot… Other       0
10  2000 Married          47 White $25000 or more Strong re… Prot… Sout…       3
# ℹ 21,473 more rows
Reorder the levels
Recode the levels
Collapse levels
Lump levels
How could we improve the partyid labels?
fct_recode()
gss_cat %>%
  drop_na(tvhours) %>%
  select(partyid, tvhours) %>%
    mutate(partyid = fct_recode(partyid,
    "Republican, strong"    = "Strong republican",
    "Republican, weak"      = "Not str republican",
    "Independent, near rep" = "Ind,near rep",
    "Independent, near dem" = "Ind,near dem",
    "Democrat, weak"        = "Not str democrat",
    "Democrat, strong"      = "Strong democrat")) %>% 
  group_by(partyid) %>%
  summarize(tvhours = mean(tvhours)) %>%
  ggplot(aes(tvhours, fct_reorder(partyid, tvhours))) +
  geom_point() + 
  labs(y = "partyid")fct_collapse()
gss_cat %>%
  drop_na(tvhours) %>%
  select(partyid, tvhours) %>%
  mutate(
    partyid = 
      fct_collapse(
        partyid,
        conservative = c("Strong republican", 
                         "Not str republican", 
                         "Ind,near rep"),
        liberal = c("Strong democrat", 
                    "Not str democrat", 
                    "Ind,near dem"))
  ) %>% 
  group_by(partyid) %>%
  summarize(tvhours = mean(tvhours)) %>%
  ggplot(aes(tvhours, fct_reorder(partyid, tvhours))) +
  geom_point() + 
  labs(y = "partyid")fct_lump()To enhance your data analysis, you can use the following factor manipulation techniques:

10:00