2019独角兽企业重金招聘Python工程师标准>>>
代码依赖于上次的代码中的例子,连接为《统计学习方法》第5章Python3实现(一) 熵、条件熵、信息增益、信息增益比
计算基尼指数的代码如下:
"""
CART: Classification And Regression Tree
Created on Dec 28th,2018
@author:Aomo Jan"""def calcGini(dataSet,yIndex=-1):"""计算基尼指数(Gini Index)"""numEntries=len(dataSet)classList=[dt[yIndex] for dt in dataSet]classSet=set(classList)pk={}giniP=0for ck in classSet:kList=[ dt for dt in dataSet if dt[yIndex]==ck ]pk[ck]=float(len(kList))/numEntriesginiP+=pk[ck]*(1-pk[ck])return giniPdef calcGiniDA(dataSet,xIndex,attrX,yIndex=-1):"""计算集合在第xIndex个特征为attrX的条件下的基尼指数"""numEntries=len(dataSet)d1=[dt for dt in dataSet if dt[xIndex]==attrX]d2=[dt for dt in dataSet if dt[xIndex]!=attrX]giniDA=(len(d1)*calcGini(d1)+len(d2)*calcGini(d2))/numEntriesreturn giniDAdef testDemo():from entropy import loadDataSetdataSet=loadDataSet()giniP=calcGini(dataSet)print('Gini=%f' % (giniP))dataSet=loadDataSet()gnA1=calcGiniDA(dataSet,3,'非常好')print('Gini(D,A2=1)=%f' % (gnA1))if __name__=='__main__':testDemo()
欢迎关注我们: