#https://datahexa.com/kmeans-clustering-with-wine-dataset/参考
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import cluster, preprocessing, datasets
from sklearn.cluster import KMeans
wine = datasets.load_wine()
X = wine.data
X.shape
y=wine.target
y.shape
wine.target_names
model = KMeans(n_clusters=10)
labels = model.fit_predict(X)
df = pd.DataFrame({'labels': labels})
type(df)
def species_label(theta):
if theta == 0:
return wine.target_names[0]
if theta == 1:
return wine.target_names[1]
if theta == 2:
return wine.target_names[2]
df['species'] = [species_label(theta) for theta in wine.target]
pd.crosstab(df['labels'], df['species'])