## Factors in RStudio: Analysing categorical data

`## Factors in R> > theory <- "R uses factors for categorical variables!"> > # factor refers to a statistical data type used to store categorical variables.> # Example gender> > ## What is factor and why should you use it?> > ## Different types of data - nominal, ordinal, interval and ratio> > gender_vector <- c("Male","Female","Female","Male","Male")> > # Define factor_gender_vector using 'factor()'> factor_gender_vector <- factor(gender_vector)> #print> factor_gender_vector[1] Male Female Female Male Male Levels: Female Male> > gender_vector <- c("1", "0", "0", "1", "1")> > factor_gender_vector <- factor(gender_vector)> #print> factor_gender_vector[1] 1 0 0 1 1Levels: 0 1> > > ## Animal vector ## No order> animals_vector <- c("Elephant", "Giraffe", "Donkey", "Horse")> > factor_animals_vector <- factor(animals_vector)> > factor_animals_vector[1] Elephant Giraffe Donkey Horse Levels: Donkey Elephant Giraffe Horse> > ## Factor levels> levels(factor_gender_vector)[1] "0" "1"> levels(factor_animals_vector)[1] "Donkey" "Elephant" "Giraffe" "Horse" > > ## Summarizing> > summary(factor_animals_vector)Donkey Elephant Giraffe Horse 1 1 1 1 > summary(factor_gender_vector)0 1 2 3 > > > ## ordered factors> > temperature_vector <- c("High", "Low", "High","Low", "Medium")> factor(temperature_vector)[1] High Low High Low MediumLevels: High Low Medium> factor_temperature_vector <- factor(temperature_vector, order = TRUE, levels = c("Low", "Medium", "High"))> factor_temperature_vector[1] High Low High Low MediumLevels: Low < Medium < High> > factor_temperature_vector <- factor(temperature_vector, order = FALSE, levels = c("Low", "Medium", "High"))> factor_temperature_vector[1] High Low High Low MediumLevels: Low Medium High> > ## Speed vector> speed_vector <-c("medium", "slow", "slow", "medium", "fast")> factor_speed_vector <- factor(speed_vector, ordered = TRUE, levels= c("slow", "medium", "fast"))> > # Print> factor_speed_vector[1] medium slow slow medium fast Levels: slow < medium < fast> summary(factor_speed_vector)slow medium fast 2 2 1 > > ## Comparing ordered factors> > # Factor value for second data analyst> da2 <- factor_speed_vector[2]> da2 [1] slowLevels: slow < medium < fast> # Factor value for fifth data analyst> da5 <- factor_speed_vector[5]> da5[1] fastLevels: slow < medium < fast> # Is data analyst 2 faster data analyst 5?> da2 > da5[1] FALSE`