本文主要是介绍MIA------KNN,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
#encoding:utf-8
'''
Created on 2015年4月28日@author: zju
'''from numpy import *
import operator
def createDataSet():group = array([[1.0, 1.1], [1.0, 1.0], [0, 0], [0, 0.1]])labels = ['A', 'A', 'B', 'B']return group, labels
#inX: 用于分类的输入向量
#dataSet: 训练样本集
#labels: 标签向量
#k: 表示用于选择最近邻居的数目
#labels的元素数目和矩阵dataSet的行数相同
def classify0(inX, dataSet, labels, k):#dataSet矩阵的行数dataSetSize = dataSet.shape[0]#扩充inX为dataSet行一列的矩阵,才可以与训练数据矩阵dataSet作减法操作diffMat = tile(inX, (dataSetSize, 1)) - dataSet#矩阵diffMat中的每个元素的平方运算后的矩阵为sqDiffMatsqDiffMat = diffMat ** 2#矩阵sqDiffMat每一行的元素之和,运算之后的矩阵为sqDistancessqDistances = sqDiffMat.sum(axis = 1)#矩阵sqDistances中的每个元素的0.5次方,运算之后的矩阵为distancesdistances = sqDistances ** 0.5#到此,欧式距离计算完毕!#选取样本数据集中前k个最相似的数据sortedDistIndices = distances.argsort()classCount = {}for i in range(k):voteIlabel = labels[sortedDistIndices[i]]#get(key,default)classCount[voteIlabel] = classCount.get(voteIlabel, 0) + 1#最后,选在k个最相似数据中出现次数最多的分类,作为新数据的分类#使用operator模块的itemgetter方法,按照第二个元素的次序对元祖进行排序#此处的排序为逆序,即从大到小,最后返回发生频率最高的元素标签sortedClassCount = sorted(classCount.iteritems(),key = operator.itemgetter(1), reverse = True)return sortedClassCount[0][0]
#file2matrix和readFile完全相同的代码,但是前者不能运行,神奇!!!
def file2matrix(filename):fr = open(filename)arrayOfLines = fr.readline()numberOfLines = len(arrayOfLines)returnMat = zeros((numberOfLines, 3))classLabelVector = []index = 0for line in arrayOfLines:line = line.strip()listFromLine = line.split('\t')returnMat[index,:] = listFromLine[0:3]classLabelVector.append(int(listFromLine[-1]))index += 1return returnMat, classLabelVectordef readFile(filename, col):fr = open(filename)array = fr.readlines() #以文件中的每行为一个元素,形成一个list列表num = len(array)returnMat = zeros((num, col))#初始化元素为0的,行号数个列表,其中每个元素仍是列表,元素数是3,在此表示矩阵index = 0 classLabelVector = []for line in array:line = line.strip()#去掉一行后的回车符号linelist = line.split('\t')#将一行根据分割符,划分成多个元素的列表returnMat[index,:] = linelist[0:col]#向矩阵赋值,注意这种赋值方式比较笨拙classLabelVector.append(int(linelist[-1]))index +=1return returnMat,classLabelVectordef autoNorm(dataSet):minVals = dataSet.min(0)maxVals = dataSet.max(0)ranges = maxVals - minValsnormDataSet = zeros(shape(dataSet))m = dataSet.shape[0]normDataSet = dataSet - tile(minVals, (m, 1))normDataSet = normDataSet / tile(ranges, (m, 1))return normDataSet, ranges, minVals def datingClassTest(k):hoRatio = 0.10filename = 'D:/XX/book/data/datingTestSet.txt'datingDataMat, datingLabels = readFile(filename, 3)normDataSet, ranges, minVals = autoNorm(datingDataMat) m = normDataSet.shape[0]numTestVals = int(m * hoRatio)errorCount = 0for i in range(numTestVals):testLabels = classify0(normDataSet[i,:], normDataSet[numTestVals:m,:], datingLabels[numTestVals:m], k)if testLabels != datingLabels[i]:errorCount += 1print 'The total error rate is: %f' % (errorCount / float(numTestVals))def classifyPerson():resultList = ['not at all', 'in small doses', 'in large doses'] percentTats = float(raw_input(\"percentage of time spent playing videl games?"))ffMiles = float(raw_input(\"frequent filter miles earned per year?"))iceCream = float(raw_input("liters of ice cream consumed per year?"))datingDataMat, datingLabels = readFile('D:/XX/book/data/datingTestSet.txt', 3)normMat, ranges, minVals = autoNorm(datingDataMat)inArr = array([ffMiles, percentTats, iceCream])classifierResult = classify0((inArr-\minVals)/ranges, normMat, datingLabels, 3)print "You will probably like this person: ", \resultList[classifierResult - 1]
调用classifyPerson()
结果如下:
这篇关于MIA------KNN的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!