python 爬虫 爬取腾讯较真查证平台,对新型冠状病毒“谣言”的新闻进行数据分析

''' python 爬虫 爬取腾讯较真查证平台,对新型冠状病毒“谣言”的新闻进行数据分析 http://www.cppcns.com/jiaoben/python/300617.html Authon: taotao Date:20200227 ''' import requests import pandas class SpiderRumor(object): def __init__(self): self.url = "https://vp.fact.qq.com/loadmore?artnum=0&page=%s" self.header = { "user-agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3269.3 Safari/537.36" } def Run_spider(self): df_all = list() for url in [self.url % i for i in range(40)]: data_list = requests.get(url, headers = self.header).json()["content"] print(data_list) tempdata = [[df["title"], df["date"], df["result"], df["explain"], df["tag"]] for df in data_list] # print(tempdata) df_all.extend(tempdata) # 生成Excel表 pd = pandas.DataFrame(df_all, columns=["title", "date", "result", "explain", "tag"]).to_csv("关于新冠状病毒的谣言统计表.csv", encoding="utf_8_sig") # 程序过程 if __name__ == '__main__': spider = SpiderRumor() spider.Run_spider()

''' python 爬虫 爬取腾讯较真查证平台,对新型冠状病毒“谣言”的新闻进行数据分析 生成一个饼状图 http://www.cppcns.com/jiaoben/python/300617.html Autor: taotao Date: 20200227 ''' import matplotlib.pyplot import pandas # windos系统设置中文字体 matplotlib.pyplot.rcParams['font.sans-serif'] = ['SimHei'] # 用来显示中文标签 matplotlib.pyplot.rcParams['axes.unicode_minus'] = False data = pandas.read_csv("F:/python_program/20200227/关于新冠状病毒的谣言统计表.csv") # print(data) labels = data["explain"].value_counts().index.tolist() print(labels) sizes = data["explain"].value_counts().values.tolist() print(sizes) # colors matplotlib.pyplot.figure(figsize=(8, 8)) matplotlib.pyplot.pie(sizes, labels = labels, autopct = '%1.1f%%', shadow = True, startangle= 0) matplotlib.pyplot.axis("equal") matplotlib.pyplot.show() # lables = data[] 

原文链接:https://blog.csdn.net/taotaoah/article/details/104546774?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522165277607816781818782534%2522%252C%2522scm%2522%253A%252220140713.130102334.pc%255Fblog.%2522%257D&request_id=165277607816781818782534&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~blog~first_rank_ecpm_v1~times_rank-2-104546774-null-null.nonecase&utm_term=%E6%96%B0%E9%97%BB

© 版权声明
THE END
喜欢就支持一下吧
点赞0 分享
评论 抢沙发
头像
文明发言,共建和谐米科社区
提交
头像

昵称

取消
昵称表情图片

    暂无评论内容