import numpy as np import pandas as pd def agglomerative_clustering_df(df, n_clusters=4, linkage="ward"): X = df.values # chuyển DataFrame thành numpy array # Code thuật toán gộp giống trước clusters = [[x] for x in X] while len(clusters) > n_clusters: min_dist = float("inf") merge_idx = (0, 0) for i in range(len(clusters)): for j in range(i + 1, len(clusters)): dist = cluster_distance(np.array(clusters[i]), np.array(clusters[j]), linkage) if dist < min_dist: min_dist = dist merge_idx = (i, j) i, j = merge_idx new_cluster = clusters[i] + clusters[j] clusters.pop(j) clusters.pop(i) clusters.append(new_cluster) labels = np.zeros(len(X), dtype=int) for cluster_id, cluster_points in enumerate(clusters): for p in cluster_points: idx = np.where((X == p).all(axis=1))[0][0] labels[idx] = cluster_id return labels # Hàm cluster_distance y chang code trước nha def cluster_distance(c1, c2, linkage="ward"): from scipy.spatial.distance import cdist if linkage == "single": return np.min(cdist(c1, c2)) elif linkage == "complete": return np.max(cdist(c1, c2)) elif linkage == "average": return np.mean(cdist(c1, c2)) elif linkage == "ward": m1, m2 = np.mean(c1, axis=0), np.mean(c2, axis=0) return np.linalg.norm(m1 - m2) else: raise ValueError("Unknown linkage!") # Ví dụ gọi: # import pandas as pd # df = pd.DataFrame([...]) # labels = agglomerative_clustering_df(df, n_clusters=4)