In [ ]:
#https://datahexa.com/kmeans-clustering-with-wine-dataset/参考
In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import cluster, preprocessing, datasets

from sklearn.cluster import KMeans
In [2]:
wine = datasets.load_wine()
In [3]:
X = wine.data
In [4]:
X.shape
Out[4]:
(178, 13)
In [5]:
y=wine.target
In [6]:
y.shape
Out[6]:
(178,)
In [7]:
wine.target_names
Out[7]:
array(['class_0', 'class_1', 'class_2'], dtype='<U7')
In [30]:
model = KMeans(n_clusters=10)
In [31]:
labels = model.fit_predict(X)
In [32]:
df = pd.DataFrame({'labels': labels})
type(df)
Out[32]:
pandas.core.frame.DataFrame
In [33]:
def species_label(theta):
    if theta == 0:
        return wine.target_names[0]
    if theta == 1:
        return wine.target_names[1]
    if theta == 2:
        return wine.target_names[2]
In [34]:
df['species'] = [species_label(theta) for theta in wine.target]
In [35]:
pd.crosstab(df['labels'], df['species'])
Out[35]:
species class_0 class_1 class_2
labels
0 5 5 6
1 8 0 0
2 0 15 14
3 0 17 0
4 18 1 0
5 6 0 0
6 1 12 18
7 7 3 5
8 0 18 5
9 14 0 0
In [ ]: