Factors in RStudio: Analysing categorical data

## Factors in R
>
> theory <- "R uses factors for categorical variables!"
>
> # factor refers to a statistical data type used to store categorical variables.
> # Example gender
>
> ## What is factor and why should you use it?
>
> ## Different types of data - nominal, ordinal, interval and ratio
>
> gender_vector <- c("Male","Female","Female","Male","Male")
>
> # Define factor_gender_vector using 'factor()'
> factor_gender_vector <- factor(gender_vector)
> #print
> factor_gender_vector
[1] Male Female Female Male Male
Levels: Female Male
>
> gender_vector <- c("1", "0", "0", "1", "1")
>
> factor_gender_vector <- factor(gender_vector)
> #print
> factor_gender_vector
[1] 1 0 0 1 1
Levels: 0 1
>
>
> ## Animal vector ## No order
> animals_vector <- c("Elephant", "Giraffe", "Donkey", "Horse")
>
> factor_animals_vector <- factor(animals_vector)
>
> factor_animals_vector
[1] Elephant Giraffe Donkey Horse
Levels: Donkey Elephant Giraffe Horse
>
> ## Factor levels
> levels(factor_gender_vector)
[1] "0" "1"
> levels(factor_animals_vector)
[1] "Donkey" "Elephant" "Giraffe" "Horse"
>
> ## Summarizing
>
> summary(factor_animals_vector)
Donkey Elephant Giraffe Horse
1 1 1 1
> summary(factor_gender_vector)
0 1
2 3
>
>
> ## ordered factors
>
> temperature_vector <- c("High", "Low", "High","Low", "Medium")
> factor(temperature_vector)
[1] High Low High Low Medium
Levels: High Low Medium
> factor_temperature_vector <- factor(temperature_vector, order = TRUE, levels = c("Low", "Medium", "High"))
> factor_temperature_vector
[1] High Low High Low Medium
Levels: Low < Medium < High
>
> factor_temperature_vector <- factor(temperature_vector, order = FALSE, levels = c("Low", "Medium", "High"))
> factor_temperature_vector
[1] High Low High Low Medium
Levels: Low Medium High
>
> ## Speed vector
> speed_vector <-c("medium", "slow", "slow", "medium", "fast")
> factor_speed_vector <- factor(speed_vector, ordered = TRUE, levels= c("slow", "medium", "fast"))
>
> # Print
> factor_speed_vector
[1] medium slow slow medium fast
Levels: slow < medium < fast
> summary(factor_speed_vector)
slow medium fast
2 2 1
>
> ## Comparing ordered factors
>
> # Factor value for second data analyst
> da2 <- factor_speed_vector[2]
> da2
[1] slow
Levels: slow < medium < fast
> # Factor value for fifth data analyst
> da5 <- factor_speed_vector[5]
> da5
[1] fast
Levels: slow < medium < fast
> # Is data analyst 2 faster data analyst 5?
> da2 > da5
[1] FALSE

Leave a Reply

Your email address will not be published. Required fields are marked *