科学经得起实践检验-python3.6通过决策树实战精准准确预测今日大盘走势(含代码)
春有百花秋有月,夏有凉风冬有雪;
若无闲事挂心头,便是人间好时节。
--宋.无门慧开
不废话了,以下训练模型数据,采用本人发明的极致800实时指数近期的一些实际数据,
预测采用今日的真实数据
#coding=utf-8 __author__ = 'huangzhi' import math import operatordef calcShannonEnt(dataset):numEntries = len(dataset)labelCounts = {}for featVec in dataset:currentLabel = featVec[-1]if currentLabel not in labelCounts.keys():labelCounts[currentLabel] = 0 labelCounts[currentLabel] += 1 shannonEnt = 0.0 for key in labelCounts:prob = float(labelCounts[key]) / numEntriesshannonEnt -= prob * math.log(prob, 2)return shannonEntdef CreateDataSet():# dataset = [[1, 1, 'yes'], # [1, 1, 'yes'], # [1, 0, 'no'], # [0, 1, 'no'], # [0, 1, 'no']] dataset = [[3, 4, 100, 85, 4, 6, 110, 120, 4, 6, 151, 122, 8, 12, 110, 185, '跌'],[5, 7, 88, 85, 6, 8, 100, 130, 6, 9, 131, 132, 8, 14, 100, 195, '跌'],[6, 2, 60, 20, 9, 3, 80, 22, 16, 4, 131, 32, 33, 5, 160, 45, '涨'],[3, 4, 100, 105, 4, 6, 110, 120, 4, 6, 151, 122, 8, 12, 110, 185, '跌'],[5, 3, 50, 30, 8, 4, 70, 28, 12, 6, 101, 42, 28, 7, 120, 35, '涨'],[2, 6, 60, 95, 4, 8, 90, 130, 6, 11, 101, 142, 9, 15, 99, 145, '跌'],[5, 3, 70, 30, 8, 4, 90, 32, 22, 6, 141, 42, 43, 8, 150, 65, '涨'],[2, 8, 30, 60, 9, 8, 80, 90, 9, 20, 140, 160, 12, 32, 101, 205, '跌']]labels = ['l1', 'l2', 'l3', 'l4', 'l5', 'l6', 'l7', 'l8', 'l9', 'l11', 'l12', 'l13', 'l14', 'l15', 'l16', 'l17']return dataset, labelsdef splitDataSet(dataSet, axis, value):retDataSet = []for featVec in dataSet:if featVec[axis] == value:reducedFeatVec = featVec[:axis]reducedFeatVec.extend(featVec[axis + 1:])retDataSet.append(reducedFeatVec)return retDataSetdef chooseBestFeatureToSplit(dataSet):numberFeatures = len(dataSet[0]) - 1 baseEntropy = calcShannonEnt(dataSet)bestInfoGain = 0.0;bestFeature = -1;for i in range(numberFeatures):featList = [example[i] for example in dataSet]# print(featList) uniqueVals = set(featList)# print(uniqueVals) newEntropy = 0.0 for value in uniqueVals:subDataSet = splitDataSet(dataSet, i, value)prob = len(subDataSet) / float(len(dataSet))newEntropy += prob * calcShannonEnt(subDataSet)infoGain = baseEntropy - newEntropyif (infoGain > bestInfoGain):bestInfoGain = infoGainbestFeature = ireturn bestFeaturedef majorityCnt(classList):classCount = {}for vote in classList:if vote not in classCount.keys():classCount[vote] = 0 classCount[vote] = 1 sortedClassCount = sorted(classCount.iteritems(), key=operator.itemgetter(1), reverse=True)return sortedClassCount[0][0]def createTree(dataSet, inputlabels):labels = inputlabels[:]classList = [example[-1] for example in dataSet]if classList.count(classList[0]) == len(classList):return classList[0]if len(dataSet[0]) == 1:return majorityCnt(classList)bestFeat = chooseBestFeatureToSplit(dataSet)bestFeatLabel = labels[bestFeat]myTree = {bestFeatLabel: {}}del (labels[bestFeat])featValues = [example[bestFeat] for example in dataSet]uniqueVals = set(featValues)for value in uniqueVals:subLabels = labels[:]myTree[bestFeatLabel][value] = createTree(splitDataSet(dataSet, bestFeat, value), subLabels)return myTreedef classify(inputTree, featLabels, testVec):firstStr = list(inputTree.keys())[0]secondDict = inputTree[firstStr]featIndex = featLabels.index(firstStr)for key in secondDict.keys():if testVec[featIndex] == key:if type(secondDict[key]).__name__ == 'dict':classLabel = classify(secondDict[key], featLabels, testVec)else:classLabel = secondDict[key]return classLabelmyDat, labels = CreateDataSet() # print(calcShannonEnt(myDat)) # print(splitDataSet(myDat, 1, 1)) # print(chooseBestFeatureToSplit(myDat)) myTree = createTree(myDat, labels)#通过早上9:41分的实际数据进行预测 print(classify(myTree, labels, [1, 6, 156, 169, 1, 6, 156, 169, 1, 6, 156, 169, 1, 6, 156, 169])) #通过早上10:41分的实际数据进行预测 print(classify(myTree, labels, [1, 6, 156, 169, 4, 9, 129, 263, 4, 9, 129, 263, 4, 9, 129, 263])) #通过下午13:41分的实际数据进行预测 print(classify(myTree, labels, [1, 6, 156, 169, 4, 9, 129, 263, 5, 12, 123, 306, 5, 12, 123, 306])) #通过下午14:41分的实际数据进行预测 print(classify(myTree, labels, [1, 6, 156, 169, 4, 9, 129, 263, 5, 12, 123, 306, 6, 13, 99, 397]))
运行结果如下:
D:\Programs\Python\Python36-64\python.exe D:/pyfenlei/决策树/jcs4.py 跌 跌 跌 跌