"""Created on Sun Nov 13 09:14:13 2016@author: daxiong"""from nltk.corpus import stopwordsfrom nltk.tokenize import sent_tokenize,word_tokenize#英文停止词,set()集合函数消除重复项list_stopWords=list(set(stopwords
import jieba# 创建停用词listdef stopwordslist(filepath):stopwords = [line.strip() for line in open(filepath, 'r', encoding='utf-8').readlines()]return stopwords# 对句子进行分词def seg_sentence(sentence):sent
1 文本数据准备 首先文本数据准备,爬取李佳琦下的评论,如下: 2 提出文本数据、获得评论内容 #内容读取import xlrdimport pandas as pdwb=xlrd.open_workbook("评论数据.xlsx")sh=wb.sheet_by_index(0)col=sh.ncolsrow=sh.nrowsText=[]for i in range(r