import pandas as pd import numpy as np from sklearn.preprocessing import MinMaxScaler, Normalizer, LabelEncoder from sklearn.model_selection import train_test_split, KFold from sklearn.svm import SVC from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score, precision_score, recall_score import matplotlib.pyplot as plt from scipy.fftpack import dct, idct df = pd.read_csv("Wine.csv") class_col = df.columns[-1] matran = df.values a1 = matran[:, :4] dieuchinh1 = MinMaxScaler(feature_range=(0, 10)) a1 = dieuchinh1.fit_transform(a1) print(a1[:5, :]) a2 = matran[:, 4:-1] dieuchinh2 = Normalizer() a2 = dieuchinh2.fit_transform(a2) print(a2[:5, :]) encode = LabelEncoder() label = matran[:, -1] label = encode.fit_transform(label) print(label[:5]) a3 = matran[:5, :-1] print("5 dong dau") print(a3) thuan = dct(dct(a3.T, norm="ortho").T, norm="ortho") print(thuan) nghich = idct(idct(a3.T, norm="ortho").T, norm="ortho") print(nghich) X = df.drop(matran, axis=1).values y = df[matran].values X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.03, random_state=42 ) # d. Tạo KFold 10 kf = KFold(n_splits=10, shuffle=True, random_state=42) # e. SVM 10-fold svm_acc = [] svm_pre = [] svm_rec = [] for train_idx, test_idx in kf.split(X_train): Xtr, Xte = X_train[train_idx], X_train[test_idx] ytr, yte = y_train[train_idx], y_train[test_idx] model_svm = SVC() model_svm.fit(Xtr, ytr) pred = model_svm.predict(Xte) svm_acc.append(accuracy_score(yte, pred)) svm_pre.append(precision_score(yte, pred, average='macro')) svm_rec.append(recall_score(yte, pred, average='macro')) log_acc = [] log_pre = [] log_rec = [] for train_idx, test_idx in kf.split(X_train): Xtr, Xte = X_train[train_idx], X_train[test_idx] ytr, yte = y_train[train_idx], y_train[test_idx] model_log = LogisticRegression(max_iter=1000) model_log.fit(Xtr, ytr) pred = model_log.predict(Xte) log_acc.append(accuracy_score(yte, pred)) log_pre.append(precision_score(yte, pred, average='macro')) log_rec.append(recall_score(yte, pred, average='macro')) # g. Vẽ đồ thị plt.figure(figsize=(10,5)) plt.plot(svm_acc, label="SVM Accuracy") plt.plot(log_acc, label="Logistic Accuracy") plt.title("Accuracy qua 10-fold") plt.legend() plt.show() plt.figure(figsize=(10,5)) plt.plot(svm_pre, label="SVM Precision") plt.plot(log_pre, label="Logistic Precision") plt.title("Precision qua 10-fold") plt.legend() plt.show() plt.figure(figsize=(10,5)) plt.plot(svm_rec, label="SVM Recall") plt.plot(log_rec, label="Logistic Recall") plt.title("Recall qua 10-fold") plt.legend() plt.show() print("\n===== KẾT QUẢ LOGISTIC REGRESSION THEO TỪNG LẦN TRAIN =====") for i in range(10): print(f"Fold {i + 1}: Accuracy={log_acc[i]:.4f}, Precision={log_pre[i]:.4f}, Recall={log_rec[i]:.4f}") print("\n ===== KẾT QUẢ SVM THEO TỪNG LẦN TRAIN =====") for i in range(10): print(f"Fold {i + 1}: Accuracy={svm_acc[i]:.4f}, Precision={svm_pre[i]:.4f}, Recall={svm_rec[i]:.4f}") print("\n===== KẾT QUẢ TRUNG BÌNH =====") print("SVM:", np.mean(svm_acc), np.mean(svm_pre), np.mean(svm_rec)) print("Logistic:", np.mean(log_acc), np.mean(log_pre), np.mean(log_rec))