关于python:毕业设计之-题目基于大数据的电影数据分析可视化系统

54次阅读

共计 3962 个字符,预计需要花费 10 分钟才能阅读完成。

非残缺代码,毕业设计找丹成学长,q746876041

import csv
import pymysql
import requests
import re
from lxml import html
import time

申请头

headers = {

"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"

}

读取电影 url

urls = []
with open(‘./ 豆瓣电影 TOP250 链接.csv’, ‘r’) as f:

reader = csv.reader(f)
urls = [row[0] for row in reader]

for i in range(0, len(urls)):

url = urls[i]
# 申请页面
r = requests.get(url=url, headers=headers, timeout=5)
time.sleep(2)
etree = html.etree
selector = etree.HTML(r.text)
# 获取电影名称
filmname = []
try:
    filmname = selector.xpath('//*[@id="content"]/h1/span[1]/text()')[0]  # 电影名
    if filmname == "":
        filmname = None
except Exception as e:
    filmname = None
print("filmname :{}".format(filmname))
# 获取电影评分
score = []
try:
    score_list = selector.xpath('//*[@id="interest_sectl"]/div[1]/div[2]/strong/text()')
    score = score_list[0].replace("\t", "").replace("\n","")
    if score == "":
        score = None
except Exception as e:
    score = None
print("score :{}".format(score))
# 获取电影上映工夫
showtime = []
try:
    st = selector.xpath('//*[@id="content"]/h1/span[2]/text()')[0]  # 上映日期
    showtime = st.replace("(", "").replace(")","")
    if showtime == "":
        showtime = None
except Exception as e:
    showtime = None
print("time :{}".format(showtime))
# 获取电影片长
mins = []
try:
    mins_list = re.findall('片长:</span>.*?>(.*?)</span>', r.text, re.S)  # 片长
    mins = mins_list[0].replace('','').replace('分钟', '')
    if mins == "":
        mins = None
except Exception as e:
    mins = None
print("mins :{}".format(mins))
# 获取电影类型
genres_list = []
try:
    genres_list = re.findall('<span property="v:genre">(.*?)</span>', r.text, re.S)
    genres_list = '/'.join(genres_list)
    if genres_list == "":
        genres_list = None
except Exception as e:
    genres_list = None
print("genres_list :{}".format(genres_list))
# 获取电影制片地区
area_list = []
try:
    area_list = re.findall('<span class="pl"> 制片国家 / 地区:</span> (.*?)<br/>', r.text, re.S)
    area_list = '/'.join(area_list).replace('','')
    if area_list == "":
        area_list = None
except Exception as e:
    area_list = None
print("area_list :{}".format(area_list))
# 获取电影导演
directors_list = []
try:
    d_list = selector.xpath('//div[@id="info"]/span[1]/span[2]/a/text()')  # 导演
    if len(d_list) > 2:
        for i in range(0, 3):
            directors_list.append(d_list[i])
    else:
        for j in range(0, len(d_list)):
            directors_list.append(d_list[j])
    directors_list = '/'.join(directors_list)
    if directors_list == "":
        directors_list = None
except Exception as e:
    directors_list = None
print("directors_list :{}".format(directors_list))
# 获取电影编剧
scriptwriters_list = []
try:
    w_list = selector.xpath('//*[@id="info"]/span[2]/span[2]/a/text()')  # 编剧
    if len(w_list) > 2:
        for i in range(0, 3):
            scriptwriters_list.append(w_list[i])
    else:
        for j in range(0, len(w_list)):
            scriptwriters_list.append(w_list[j])
    scriptwriters_list = '/'.join(scriptwriters_list)
    if scriptwriters_list == "":
        scriptwriters_list = None
except Exception as e:
    scriptwriters_list = None
print('scriptwriters_list :{}'.format(scriptwriters_list))
# 获取电影主演
actors_list = []
try:
    actors = selector.xpath('//*[@id="info"]/span[3]/span[2]')[0]  # 演员
    a_list = [Skrill 下载](https://www.gendan5.com/wallet/Skrill.html)actors.xpath('string(.)').replace('','').split('/')  # 标签套标签,用 string(.) 同时获取所有文本
    if len(a_list) > 2:
        for i in range(0, 3):
            actors_list.append(a_list[i])
    else:
        for j in range(0, a_list):
            actors_list.append(a_list[j])
    actors_list = '/'.join(actors_list)
    if actors_list == "":
        actors_list = None
except Exception as e:
    actors_list = None
print('actors_list :{}'.format(actors_list))
# 获取电影评估
comment = []
try:
    comment = selector.xpath('//*[@id="interest_sectl"]/div[1]/div[2]/div/div[2]/a/span/text()')[0]
    if comment == "":
        comment = None
except Exception as e:
    comment = None
print("comment :{}".format(comment))
try:
    # 关上数据库连贯
    conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='123456', db='douban', charset='utf8')
    # 应用 cursor 办法创立一个游标
    cursor = conn.cursor()
    # # 执行 sql 语句
    query = 'insert into tb_film(url, filmname, score, showtime, genres, areas, mins, directors, scriptwriters, actors, comments) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'
    values = (
        url, filmname, score, showtime, genres_list, area_list, mins, directors_list, scriptwriters_list,
        actors_list,
        comment)
    cursor.execute(query, values)
    # 提交之前的操作,如果之前曾经执行屡次的 execute,那么就都进行提交
    conn.commit()
except Exception as e:
    print(e)
    # 回滚
    conn.rollback()
# 敞开 cursor 对象
cursor.close()
# 敞开数据库连贯
conn.close()

非残缺代码,毕业设计找丹成学长,q746876041

正文完
 0