Clustering Analysis
Data Analysis
Clustering
Kmeans
Overview
Load Libraries
Prepare Data
cluster_df <- data %>%
select(
age,
lab_hba1c,
BMI_cat,
sex,
SDI) %>%
drop_na()# Convert factors to dummies
cluster_df_dummy <- model.matrix(~ ., data = cluster_df)[, -1] # remove intercept# Scale numeric variables
cluster_scaled <- scale(cluster_df_dummy)Optimal Number of Clusters
factoextra::fviz_nbclust(cluster_scaled,
kmeans,
method = "wss") +
theme_minimal() +
labs(title = "Elbow Method for Optimal k")factoextra::fviz_nbclust(cluster_scaled,
kmeans,
method = "silhouette") +
theme_minimal() +
labs(title = "Silhouette Method for Optimal k")K-MEANS (k = 3 as example)
# Add cluster labels to dataset
cluster_results <- cluster_df %>%
mutate(cluster = factor(km$cluster))factoextra::fviz_cluster(
km,
data = cluster_scaled,
geom = "point",
ellipse.type = "convex",
ggtheme = theme_minimal()
)