本文主要是介绍python 爬虫 爬取腾讯较真查证平台,对新型冠状病毒“谣言”的新闻进行数据分析,希望对大家解决编程问题提供一定的参考价值,需要的开发者们随着小编来一起学习吧!
'''
python 爬虫 爬取腾讯较真查证平台,对新型冠状病毒“谣言”的新闻进行数据分析
http://www.cppcns.com/jiaoben/python/300617.html
Authon: taotao
Date:20200227'''import requests
import pandas
class SpiderRumor(object):def __init__(self):self.url = "https://vp.fact.qq.com/loadmore?artnum=0&page=%s"self.header = {"user-agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3269.3 Safari/537.36"}def Run_spider(self):df_all = list()for url in [self.url % i for i in range(40)]:data_list = requests.get(url, headers = self.header).json()["content"]print(data_list)tempdata = [[df["title"], df["date"], df["result"], df["explain"], df["tag"]] for df in data_list]# print(tempdata)df_all.extend(tempdata)# 生成Excel表pd = pandas.DataFrame(df_all, columns=["title", "date", "result", "explain", "tag"]).to_csv("关于新冠状病毒的谣言统计表.csv", encoding="utf_8_sig")# 程序过程if __name__ == '__main__':spider = SpiderRumor()spider.Run_spider()
'''
python 爬虫 爬取腾讯较真查证平台,对新型冠状病毒“谣言”的新闻进行数据分析
生成一个饼状图
http://www.cppcns.com/jiaoben/python/300617.html
Autor: taotao
Date: 20200227'''import matplotlib.pyplot
import pandas# windos系统设置中文字体
matplotlib.pyplot.rcParams['font.sans-serif'] = ['SimHei'] # 用来显示中文标签
matplotlib.pyplot.rcParams['axes.unicode_minus'] = False
data = pandas.read_csv("F:/python_program/20200227/关于新冠状病毒的谣言统计表.csv")
# print(data)
labels = data["explain"].value_counts().index.tolist()
print(labels)
sizes = data["explain"].value_counts().values.tolist()
print(sizes)
# colorsmatplotlib.pyplot.figure(figsize=(8, 8))
matplotlib.pyplot.pie(sizes, labels = labels, autopct = '%1.1f%%', shadow = True, startangle= 0)matplotlib.pyplot.axis("equal")
matplotlib.pyplot.show()# lables = data[]
这篇关于python 爬虫 爬取腾讯较真查证平台,对新型冠状病毒“谣言”的新闻进行数据分析的文章就介绍到这儿,希望我们推荐的文章对编程师们有所帮助!