import pandas as pd import numpy as np from sklearn.preprocessing import MinMaxScaler, Normalizer, LabelEncoder from sklearn.model_selection import train_test_split, KFold from sklearn.svm import SVC from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score, precision_score, recall_score import matplotlib.pyplot as plt # ------------------------------------------------------- # a. Đọc dữ liệu và chuẩn hóa # ------------------------------------------------------- df = pd.read_csv("wine.csv") # Giả sử cột class là cột cuối class_col = df.columns[-1] # Chuẩn hóa 4 cột đầu về [0,10] scaler = MinMaxScaler(feature_range=(0, 10)) df.iloc[:, 0:4] = scaler.fit_transform(df.iloc[:, 0:4]) # Chuẩn hóa các cột còn lại trừ cột class norm = Normalizer() cols_to_norm = df.columns[4:-1] df[cols_to_norm] = norm.fit_transform(df[cols_to_norm]) # Mã hóa cột class le = LabelEncoder() df[class_col] = le.fit_transform(df[class_col]) print("Dữ liệu sau chuẩn hóa:") print(df.head()) # ------------------------------------------------------- # b. Biến đổi cosine cho 5 dòng đầu # ------------------------------------------------------- print("\n--- Biến đổi cosine thuận (5 dòng đầu) ---") cos_forward = np.cos(df.iloc[0:5, :-1]) print(cos_forward) print("\n--- Biến đổi cosine nghịch (arccos) ---") cos_inverse = np.arccos(np.clip(cos_forward, -1, 1)) print(cos_inverse) # ------------------------------------------------------- # c. Chia dữ liệu 97% train, 3% test # ------------------------------------------------------- X = df.drop(class_col, axis=1).values y = df[class_col].values X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.03, random_state=42 ) # ------------------------------------------------------- # d. Tạo KFold 10 # ------------------------------------------------------- kf = KFold(n_splits=10, shuffle=True, random_state=42) # ------------------------------------------------------- # e. SVM 10-fold # ------------------------------------------------------- svm_acc = [] svm_pre = [] svm_rec = [] for train_idx, test_idx in kf.split(X_train): Xtr, Xte = X_train[train_idx], X_train[test_idx] ytr, yte = y_train[train_idx], y_train[test_idx] model_svm = SVC() model_svm.fit(Xtr, ytr) pred = model_svm.predict(Xte) svm_acc.append(accuracy_score(yte, pred)) svm_pre.append(precision_score(yte, pred, average='macro')) svm_rec.append(recall_score(yte, pred, average='macro')) # ------------------------------------------------------- # f. Logistic Regression 10-fold # ------------------------------------------------------- log_acc = [] log_pre = [] log_rec = [] for train_idx, test_idx in kf.split(X_train): Xtr, Xte = X_train[train_idx], X_train[test_idx] ytr, yte = y_train[train_idx], y_train[test_idx] model_log = LogisticRegression(max_iter=1000) model_log.fit(Xtr, ytr) pred = model_log.predict(Xte) log_acc.append(accuracy_score(yte, pred)) log_pre.append(precision_score(yte, pred, average='macro')) log_rec.append(recall_score(yte, pred, average='macro')) # ------------------------------------------------------- # g. Vẽ đồ thị # ------------------------------------------------------- plt.figure(figsize=(10,5)) plt.plot(svm_acc, label="SVM Accuracy") plt.plot(log_acc, label="Logistic Accuracy") plt.title("Accuracy qua 10-fold") plt.legend() plt.show() plt.figure(figsize=(10,5)) plt.plot(svm_pre, label="SVM Precision") plt.plot(log_pre, label="Logistic Precision") plt.title("Precision qua 10-fold") plt.legend() plt.show() plt.figure(figsize=(10,5)) plt.plot(svm_rec, label="SVM Recall") plt.plot(log_rec, label="Logistic Recall") plt.title("Recall qua 10-fold") plt.legend() plt.show() print("\n===== KẾT QUẢ TRUNG BÌNH =====") print("SVM:", np.mean(svm_acc), np.mean(svm_pre), np.mean(svm_rec)) print("Logistic:", np.mean(log_acc), np.mean(log_pre), np.mean(log_rec))