PCA的实现

    xiaoxiao2021-03-25  102

    #!/usr/bin/python2 #-*-coding:UTF-8-*- from numpy import * #整个PCA的实现中,是约定所有的数据记录中不含有标签的,因此,在调用之前,标签应该另做存放 #The centralize function change the dataSet in the original position def centralize(dataSet):    #The dataSet must be of array type     meanRow=mean(dataSet,axis=0,keepdims=False)     dataSet-meanRow     return dataSet,meanRow def getCov(dataSet):        #Get the cov matrix     return cov(dataSet,rowvar=0) def getEigValsVects(covMat):    #The covMat object must be of matrix type !     eigVals,eigVects=linalg.eig(covMat)     return eigVals,eigVects def selectEigValsVects(eigVals,eigVects,num):     valIndices=argsort(eigVals)     valIndices=valIndices[-1::-1]     selectIndices=valIndices[:num]     selectVals=eigVals[selectIndices]     selectVects=eigVects[:,selectIndices]     return selectVals,selectVects def percent2Num(eigVals,percent):     sortedEigVals=sort(eigVals)     sortedEigVals=sortedEigVals[-1::-1]     valSum=sum(sortedEigVals)     tempSum=0.0     for i in range(len(sortedEigVals)):         tempSum+=sortedEigVals[i]         if tempSum>percent*valSum:             return i+1 def pca(dataSet,percent):     dataSet,meanRow=centralize(dataSet)     covSet=getCov(dataSet)     eigVals,eigVects=getEigValsVects(mat(covSet))     valNum=percent2Num(eigVals,0.95)     selectVals,selectVects=selectEigValsVects(eigVals,eigVects,valNum)     newDataSet=dataSet*selectVects     return newDataSet

    转载请注明原文地址: https://ju.6miu.com/read-15632.html

    最新回复(0)