python爬取弹幕读取csv文件制作弹幕热点词云图-万恶之源马老师-后裔弃兵

article/2025/9/18 10:18:13

文章目录

- 1.后裔弃兵
- 2.万恶之源弹幕

python3.8
jupyter notebook

1.后裔弃兵

《后翼弃兵》豆瓣短评数据集

import pandas
import csv
import jieba
import numpy
from PIL import Image
from wordcloud import WordCloud

def getDataFromCsv():# 设置星级等级，根据等级来定位提取弹幕stars = ("1","2","3","4","5")# 设置空列表，装从表格里面读出来的所有数据comments = []# 打开表格，"r"读取模式  读取数据with open("1.csv","r",encoding="utf-8") as file:# 表格操作读数据reader = csv.reader(file)# 遍历表格里得到所有数据     [用户名,星级,评论]for i in reader:# 如果没有星级if i[8] not in stars:# 数据无效，忽略不处理passelse:# 数据有效，装入数组comments.append(i)# print(comments)file.close()# 将装有数据的列表返回出来return comments

getDataFromCsv()

[['2020-11-12 14:11:28.928','后翼弃兵','32579283','https://movie.douban.com/subject/32579283/comments?sort=new_score&status=P','#未注销#','https://www.douban.com/people/thedarknine/','\n            \n                网飞出品，多 半成品，这可是久违的“丝滑”。神童，但不神化。“山川异域，风月同天”，何尝不是一个江湖故事。\n        ','\n                    2020-10-23\n                ','4','1368'],['2020-11-12 14:11:28.928','后翼弃兵','32579283','https://movie.douban.com/subject/32579283/comments?sort=new_score&status=P','转发点赞五星','https://www.douban.com/people/26414546/','\n            \n                这个剧情感觉有点平淡，但是我为什么看完了。男人都想帮你，但其实是想睡你，但其实还是想帮你。没啥，我也想生活在俄罗斯。\n        ','\n                    2020-10-24\n                ','5','1407'],['2020-11-12 14:11:28.928','后翼弃兵','32579283','https://movie.douban.com/subject/32579283/comments?sort=new_score&status=P','fushia','https://www.douban.com/people/gotothefield/','\n            \n                国际象棋版麦瑟尔夫人+美国夫人+梅尔罗斯。最后一集好俗气啊但我仍然不能免俗地在每一个泪点留下眼泪。\n        ','\n                    2020-10-27\n                ','4','634'], ['2020-11-12 14:11:28.928','后翼弃兵','32579283','https://movie.douban.com/subject/32579283/comments?sort=new_score&status=P','你说什么都对','https://www.douban.com/people/limiaolm/','\n            \n                关键时刻还是姐妹比男人靠谱\n        ','\n                    2020-11-05\n                ','4','0']]

# 定义函数，将解析的评论做成词云
def getWordCloud():# 调用函数：得到表格中所有的数据data = getDataFromCsv()# 定义空的字符串，把所有的评论装进来str = ""# 遍历所有的数据for i in data:# [用户名, 星级, 评论]str+=i[6]print(str)# 通过jieba分词器将评论里面的词语用空格分离出来cutWord = " ".join(jieba.cut(str))# print(cutWord)# 读取图片模型
#     bgImg = numpy.array(Image.open("a.jpg"))# 准备词云参数cloud = WordCloud(# 文字的路径：本地的系统文件路径font_path="C:\Windows\Fonts\STZHONGS.TTF",# 生成词云的图片背景background_color="white",max_words=1300,margin=3,width=1800,height=800,random_state=42# 参考图片（参数，没有引号）
#         mask=bgImg).generate(cutWord)# 将做成的结果生成图片cloud.to_file("ciyun.png")

getWordCloud()

在这里插入图片描述

2.万恶之源弹幕

数据csv

import pandas
import csv
import jieba
import numpy
from PIL import Image
from wordcloud import WordCloud

def getDataFromCsv():# 设置星级等级comments = []# 打开表格，"r"读取模式  读取数据with open(r"E:\01_hjz\datas\01_ml\paulmadanmaku.csv","r",encoding="utf-8") as file:# 表格操作读数据reader = csv.reader(file)# 遍历表格里得到所有数据   next(reader)#从第二行开始读取for i in reader:# 如果没有星级comments.append(i)print(comments)file.close()# 将装有数据的列表返回出来return comments

getDataFromCsv()

[['0 days 00:01:16.562000000','2020-01-05','2020-01','2020-01-05 15:58:45','你要感谢人家小伙子点到为止，不然你眼睛保不住。','7ecdfd8'],['0 days 00:00:59.156000000','2020-05-18','2020-05','2020-05-18 04:49:33','已经被打死了','fb2f37fe'],...]

# 定义函数，将解析的评论做成词云
def getWordCloud():# 调用函数：得到表格中所有的数据data = getDataFromCsv()# 定义空的字符串，把所有的评论装进来str = ""# 遍历所有的数据for i in data:# [用户名, 星级, 评论]str+=i[4]print(str)# 通过jieba分词器将评论里面的词语用空格分离出来cutWord = " ".join(jieba.cut(str))# print(cutWord)# 读取图片模型
#     bgImg = numpy.array(Image.open("a.jpg"))# 准备词云参数cloud = WordCloud(# 文字的路径：本地的系统文件路径font_path="C:\Windows\Fonts\STZHONGS.TTF",# 生成词云的图片背景background_color="white",max_words=1300,margin=3,width=1800,height=800,random_state=42# 参考图片（参数，没有引号）
#         mask=bgImg).generate(cutWord)# 将做成的结果生成图片cloud.to_file("ciyun.png")

getWordCloud()

Dumping model to file cache C:\Users\ADMINI~1\AppData\Local\Temp\jieba.cache
Loading model cost 0.605 seconds.
Prefix dict has been built successfully.

在这里插入图片描述