python进行聚类(scikit-lean、scipy)

    xiaoxiao2026-01-07  6

    用于聚类的数据集 %matplotlib inline import scipy.io as sio import matplotlib.pyplot as plt ''' 各种聚类数据 ''' #two_cluster def two_cluster():     two_cluster=u'cluster_data/two_cluster.mat'     two_cluster=sio.loadmat(two_cluster)['X'].T     data = two_cluster     return data #three_cluster def three_cluster():     path=u'cluster_data/three_cluster.mat'     three_cluster=sio.loadmat(path)['X'].T     data = three_cluster         return data #five_cluster def five_cluster():     path=u'cluster_data/five_cluster.mat'     five_cluster=sio.loadmat(path)     x=five_cluster['x'] #得到的数据为二行n列     y=five_cluster['y'] #到的数据为一行n列     data = np.vstack((x,y)).T #先垂直合并,而后转置     #data = np.array([x[0,:],x[1,:],y[0,:]]).T #list与array互换     return data #spiral def spiral():     path=u'cluster_data/spiral.mat'     spiral=sio.loadmat(path)['spiral']     spiral = spiral[0::3,:] #每隔3行取一个数据     data = spiral     data = np.array([data[:,1],data[:,2],data[:,0]]).T #list与array互换     return data #spiral_unbalance def spiral_unbalance():     path=u'cluster_data/spiral_unbalance.mat'     spiral_unbalance=sio.loadmat(path)['spiral_unbalance']     spiral_unbalance = spiral_unbalance[0::3,:] #每隔3行取一个数据     data = spiral_unbalance     data = np.array([data[:,1],data[:,2],data[:,0]]).T #list与array互换     return data #ThreeCircles def ThreeCircles():     path=u'cluster_data/ThreeCircles.mat'     ThreeCircles=sio.loadmat(path)['ThreeCircles']     ThreeCircles = ThreeCircles[0::3,:] #每隔3行取一个数据     data = ThreeCircles     data = np.array([data[:,1],data[:,2],data[:,0]]).T #list与array互换     return data #Twomoons def Twomoons():     path=u'cluster_data/Twomoons.mat'     Twomoons=sio.loadmat(path)['Twomoons']     Twomoons = Twomoons[0::3,:] #每隔3行取一个数据     data = Twomoons     data = np.array([data[:,1],data[:,2],data[:,0]]).T #list与array互换     plt.scatter(data[:,0],data[:,1],c=data[:,2])     return data #Twomoons1 def Twomoons1():     path=u'cluster_data/Twomoons.mat'     Twomoons1=sio.loadmat(path)['Twomoons']     Twomoons1 = Twomoons1[0::3,:] #每隔3行取一个数据     data = Twomoons1     data = np.array([data[:,1],data[:,2],data[:,0]]).T #list与array互换     return data def test():     print 'test' def show_all():     plt.figure(figsize=(16,8))     #动态调用方法     func_name_list = ['two_cluster','three_cluster','five_cluster','spiral','spiral_unbalance','ThreeCircles','Twomoons','Twomoons1']     for i in range(8):         data_list.append(eval(func_name_list[i])())     #动态画图     for i in range(8):         data = data_list[i]         plt.subplot(2,4,i+1)         #plt.figure()         plt.scatter(data[:,0],data[:,1],c=data[:,2])      data_list = [] show_all() 使用scikit的kmeans进行聚类 %matplotlib inline import scipy.io as sio #matlab文件名 two_cluster=u'cluster_data/two_cluster.mat' data=sio.loadmat(two_cluster) print data %matplotlib inline import matplotlib.pyplot as plt x = data['X'] cValue = x[2] plt.scatter(x[0],x[1],c=cValue) from sklearn import cluster, datasets b = np.array(x).T b = b[:,0:2] y_pred = cluster.KMeans(n_clusters=2, random_state=170).fit_predict(b) cValue = x[2] plt.scatter(x[0],x[1],c=y_pred) 数据集下载

    scikit-learn教程

    %matplotlib inline import scipy.io as sio #matlab文件名 two_cluster=u'cluster_data/spiral.mat' spiral=sio.loadmat(two_cluster)['spiral'] spiral = spiral[0::3,:] #每隔3行取一个数据 print len(spiral),len(spiral[0]) cValue = spiral[:,0] print cValue.shape color = ['b','y'] cValue = [color[int(i)] for i in list(cValue)] plt.scatter(spiral[:,1],spiral[:,2],c=cValue)

    使用kmeans结果

    from sklearn import cluster, datasets y_pred = cluster.KMeans(n_clusters=2, random_state=170).fit_predict(spiral[:,1:3]) plt.scatter(spiral[:,1],spiral[:,2],c=y_pred)

    使用scipy进行聚类效果

    # -*- coding: utf8 -*- %matplotlib inline import scipy.io as sio import matplotlib.pyplot as plt import scipy.cluster.hierarchy as hcluster from sklearn.cluster import AgglomerativeClustering import numpy.random as random   import numpy as np   import numpy.core.fromnumeric   def loadData():     #matlab文件名       two_cluster=u'cluster_data/spiral.mat'     spiral=sio.loadmat(two_cluster)['spiral']     spiral = spiral[0::3,:] #每隔3行取一个数据     print len(spiral),len(spiral[0])     cValue = spiral[:,0]     print cValue.shape     color = ['b','y']     cValue = [color[int(i)] for i in list(cValue)]     plt.scatter(spiral[:,1],spiral[:,2],c=cValue) def spiralSample():     plt.subplot(131)     plt.title(u'origal data')     plt.scatter(spiral[:,1],spiral[:,2],c=spiral[:,0])     #scipy进行聚类,默认depth=2(可得到两类),阈值t为距离阈值,设置criterion='maxclust',找到两类之间最小距离小于t的进行合并     #http://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.fcluster.html#scipy.cluster.hierarchy.fcluster     y_pred=hcluster.fclusterdata(spiral[:,1:3],criterion='maxclust',t=2)         plt.subplot(132)     plt.title(u'use scipy to hierarchy cluster')     plt.scatter(spiral[:,1],spiral[:,2],c=y_pred)     #scikit进行聚类     plt.subplot(133)     plt.title(u'use scikit to hierarchy cluster')     y_pred = AgglomerativeClustering(n_clusters=2, linkage='ward').fit_predict(spiral[:,1:3])         plt.scatter(spiral[:,1],spiral[:,2],c=y_pred)     plt.show() spiralSample()

    转载请注明原文地址: https://ju.6miu.com/read-1305765.html
    最新回复(0)