python实现豆瓣电影评价感情分析

33次阅读

共计 5105 个字符,预计需要花费 13 分钟才能阅读完成。

最好评价:很好看的! 剧情有倒叙说看不懂的往下看就行! 任嘉伦演技很厉害了,那岚岳林敬两个人很容易分开,演可爱也是一点不尴尬就是很不错! 张慧雯长的也挺可爱的~ 关于剧情倒叙很多伏笔很多,作为一个原创剧本我个人很满意! 每个人都有自己的小心思小秘密,需要观众一点一点揣摩~ 没有绝对坏人,对于明尊我也不是很讨厌,可能因为演技太好有点被林源圈粉! 先夸一夸实景拍摄! 真的太美了,很久没见到几乎全实景的武侠剧了!!! 包括有一场捅马蜂窝的戏都是真实拍摄真的好开心能看到这样一部良心剧!! 剧情不拖沓不注水不加戏,人设鲜明每个人都有私心但又都能看到可怜的一面,你看到的感情线很多都是互相利用,太高能了! 男女主有仇,看上去套路性但其实这个仇根本不影响他们的感情,林若寒也是很支持林敬追求真爱不希望上辈子的仇给下一辈留下,是很好的母亲

最差评价:辣鸡片子,一点也不好看

好评率:68.4%
亨达代理申请 http://www.kaifx.cn/broker/ha…

代码部分

import requests

from bs4 import BeautifulSoup

import traceback

import csv

import jieba

import csv

from wordcloud import WordCloud

import numpy as np

from PIL import Image

import snownlp

import matplotlib.pyplot as plt

from matplotlib.font_manager import FontProperties

定义请求每页影评的方法

header = {

‘Accept’: ‘text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9’,

“Connection”: “keep – alive”,

“User-Agent”: “Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36”,

“Cookie”: ‘bid=HPwx786ji5w; douban-fav-remind=1; viewed=”22601258″; gr_user_id=954bdfba-9778-4359-b238-cd539123a160; _vwo_uuid_v2=D7DD1B1011AD0B9B5B3332525CEEF25CF|9b95f719e9255f99462f09e1248197a2; __utmz=223695111.1592467854.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; ll=”118254″; __utmc=30149280; __utmc=223695111; ap_v=0,6.0; __utma=30149280.349582947.1592100671.1592986456.1592989460.7; __utmz=30149280.1592989460.7.5.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1592989707%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3D_7ZcG2FOcVjzEmAtnon0r-2-zpQeowzBEOKVYuJSrfmg_SLF6-lCeZXNH6BtW6ig%26wd%3D%26eqid%3Da740e3ec00010995000000065eeb1ef0%22%5D; _pk_ses.100001.4cf6=*; __utma=223695111.1175335955.1592467854.1592986456.1592989707.4; __utmb=223695111.0.10.1592989707; __utmb=30149280.5.10.1592989460; _pk_id.100001.4cf6=263156288f9d7135.1592467854.4.1592992247.1592986526.’

}

rating = [0, 0, 0, 0, 0]

AllRating = 0

star_List= [‘ 很差 ’,’ 较差 ’,’ 还行 ’,’ 推荐 ’,’ 力荐 ’]

def getCommentByPage(url,commentList):

4. 添加了请求头的请求

response=requests.get(url,headers=header)

if response.status_code==200:

bs=BeautifulSoup(response.content,”html5lib”)

commentItemList=bs.select(“.comment-item”)

try:

for commentItem in commentItemList:

print(commentItem)

comment =commentItem.select_one(“.comment”)

commentInfo = comment.select_one(“.comment-info”)

获取评议人

auther = commentInfo.select_one(“a”).text

print(auther)

获取打分

star=commentInfo.select_one(“.rating”)

if(star!=None):

star1=star.get(‘title’)

print(star1)

for i in range(0,5):

if(star1 == star_List[i]):

rating[i] += 1

commentContent=comment.select_one(“.short”).text.replace(“\n”,””)

if not star==None:

print(auther,”—“,star[‘title’],”—-“,commentContent)

commentList.append([auther,star[‘title’],commentContent])

return commentList

except Exception:# 打印异常信息

print(traceback.print_exec())

pass

def readData():

commentList = []

with open(f”{name}.csv”, ‘r’, encoding=”utf-8″) as file:

csvReader = csv.reader(file)

print(csvReader)

遍历迭代

4. 使用列表生成式

return [item[2] for item in csvReader]

pass

def generateWordCloud():

commentList = readData()

finalComment = “”

k = 0

m = 0

# 加载停止词典

stop_words = [w.strip() for w in open(‘cn_stopwords.txt’, encoding=”utf-8″).readlines()]

max = snownlp.SnowNLP(commentList[0]).sentiments

maxtag = commentList[0]

min = snownlp.SnowNLP(commentList[0]).sentiments

mintag = commentList[0]

for comment in commentList:

# 如果不在停止此表中加入结果集

if comment not in stop_words:

finalComment+=comment

s = snownlp.SnowNLP(comment)

# 进行对没条评论情感分析打分累加

k = k + s.sentiments

# 对评论总数进行累加

m = m + 1

if max < s.sentiments:

max = s.sentiments

maxtag = comment

if min > s.sentiments:

min = s.sentiments

mintag = comment

f = open(name+”.txt”, “w”,encoding=”utf-8″)

str1 =maxtag + “\n”

str1 =str1+” 最差评价:” + mintag + “\n”

str1 =str1+” 好评率:”+str(round(k / m, 3)*100)+”%” +”\n”

f.write(str1)

finalComment=” “.join(jieba.cut(finalComment))

自定义词云轮廓

image =np.array(Image.open(“1.png”))

4、生成词云

font_path 字体路径

background_color 背景颜色

mask: 自定义图片最为生成慈云的轮廓

wordCloud = WordCloud(

font_path=”YaHeiMonacoHybrid.ttf”,

background_color=”white”,

mask=image

).generate(finalComment)

保存生成本地词云

wordCloud.to_file(f”{name}.png”)

def generatePie():

try:

for i in range(0, 5):

AllRating = AllRating + rating[i]

for i in range(0, 5):

rating[i] = rating[i]/AllRating

except Exception:

pass

font = FontProperties(fname=’YaHeiMonacoHybrid.ttf’, size=16)

plt.pie(

x=rating,

labels=[‘1′,’2′,’3′,’4′,’5’],

colors = [‘red’,’pink’,’blue’,’purple’,’orange’],

startangle=90,

shadow=True,

explode=tuple(indic), # tuple 方法用于将列表转化为元组

autopct=’%1.1f%%’ # 是数字 1,不是 l

)

plt.title(u’ 好评分析 ’, FontProperties=font)

plt.savefig(name+”_饼图.jpg”)

plt.show()

if __name__ == ‘__main__’:

commentList=[]

url = input(‘ 请输入要分析 电影的 id:( 例子:https://movie.douban.com/subj…’)

print(“ID———star———- 评价 \n”)

name=””

for i in range(10):

baseUrl = f”https://movie.douban.com/subject/30425206/comments?start={i * 20}”

baseUrl = f”https://movie.douban.com/subject/{url}/comments?start={i * 20}”

response = requests.get(baseUrl, headers=header)

if response.status_code == 200:

bs = BeautifulSoup(response.content, “html5lib”)

name = bs.title.text

name = name.strip()

commentList=getCommentByPage(baseUrl,commentList)

with open(f”{name}.csv”, ‘w’, newline=””, encoding=”utf-8″) as file:

csvWriter = csv.writer(file)

print(commentList)

csvWriter.writerows(commentList)

f = open(name+”.txt”, “w”,encoding=”utf-8″)

str1 =name +”\n” +” 最好评价:”

f.write(str1)

generateWordCloud()

generatePie()

print(“ 分析完成 ”)

正文完
 0