非残缺代码,毕业设计找丹成学长,q746876041
import csv
import pymysql
import requests
import re
from lxml import html
import time
申请头
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"
}
读取电影url
urls = []
with open('./豆瓣电影TOP250链接.csv', 'r') as f:
reader = csv.reader(f)urls = [row[0] for row in reader]
for i in range(0, len(urls)):
url = urls[i]# 申请页面r = requests.get(url=url, headers=headers, timeout=5)time.sleep(2)etree = html.etreeselector = etree.HTML(r.text)# 获取电影名称filmname = []try: filmname = selector.xpath('//*[@id="content"]/h1/span[1]/text()')[0] # 电影名 if filmname == "": filmname = Noneexcept Exception as e: filmname = Noneprint("filmname :{}".format(filmname))# 获取电影评分score = []try: score_list = selector.xpath('//*[@id="interest_sectl"]/div[1]/div[2]/strong/text()') score = score_list[0].replace("\t", "").replace("\n", "") if score == "": score = Noneexcept Exception as e: score = Noneprint("score :{}".format(score))# 获取电影上映工夫showtime = []try: st = selector.xpath('//*[@id="content"]/h1/span[2]/text()')[0] # 上映日期 showtime = st.replace("(", "").replace(")", "") if showtime == "": showtime = Noneexcept Exception as e: showtime = Noneprint("time :{}".format(showtime))# 获取电影片长mins = []try: mins_list = re.findall('片长:</span>.*?>(.*?)</span>', r.text, re.S) # 片长 mins = mins_list[0].replace(' ', '').replace('分钟', '') if mins == "": mins = Noneexcept Exception as e: mins = Noneprint("mins :{}".format(mins))# 获取电影类型genres_list = []try: genres_list = re.findall('<span property="v:genre">(.*?)</span>', r.text, re.S) genres_list = '/'.join(genres_list) if genres_list == "": genres_list = Noneexcept Exception as e: genres_list = Noneprint("genres_list :{}".format(genres_list))# 获取电影制片地区area_list = []try: area_list = re.findall('<span class="pl">制片国家/地区:</span> (.*?)<br/>', r.text, re.S) area_list = '/'.join(area_list).replace(' ', '') if area_list == "": area_list = Noneexcept Exception as e: area_list = Noneprint("area_list :{}".format(area_list))# 获取电影导演directors_list = []try: d_list = selector.xpath('//div[@id="info"]/span[1]/span[2]/a/text()') # 导演 if len(d_list) > 2: for i in range(0, 3): directors_list.append(d_list[i]) else: for j in range(0, len(d_list)): directors_list.append(d_list[j]) directors_list = '/'.join(directors_list) if directors_list == "": directors_list = Noneexcept Exception as e: directors_list = Noneprint("directors_list :{}".format(directors_list))# 获取电影编剧scriptwriters_list = []try: w_list = selector.xpath('//*[@id="info"]/span[2]/span[2]/a/text()') # 编剧 if len(w_list) > 2: for i in range(0, 3): scriptwriters_list.append(w_list[i]) else: for j in range(0, len(w_list)): scriptwriters_list.append(w_list[j]) scriptwriters_list = '/'.join(scriptwriters_list) if scriptwriters_list == "": scriptwriters_list = Noneexcept Exception as e: scriptwriters_list = Noneprint('scriptwriters_list :{}'.format(scriptwriters_list))# 获取电影主演actors_list = []try: actors = selector.xpath('//*[@id="info"]/span[3]/span[2]')[0] # 演员 a_list = [Skrill下载](https://www.gendan5.com/wallet/Skrill.html)actors.xpath('string(.)').replace(' ', '').split('/') # 标签套标签,用string(.)同时获取所有文本 if len(a_list) > 2: for i in range(0, 3): actors_list.append(a_list[i]) else: for j in range(0, a_list): actors_list.append(a_list[j]) actors_list = '/'.join(actors_list) if actors_list == "": actors_list = Noneexcept Exception as e: actors_list = Noneprint('actors_list :{}'.format(actors_list))# 获取电影评估comment = []try: comment = selector.xpath('//*[@id="interest_sectl"]/div[1]/div[2]/div/div[2]/a/span/text()')[0] if comment == "": comment = Noneexcept Exception as e: comment = Noneprint("comment :{}".format(comment))try: # 关上数据库连贯 conn = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='123456', db='douban', charset='utf8') # 应用cursor办法创立一个游标 cursor = conn.cursor() # # 执行sql语句 query = 'insert into tb_film(url, filmname, score, showtime, genres, areas, mins, directors, scriptwriters, actors, comments) values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)' values = ( url, filmname, score, showtime, genres_list, area_list, mins, directors_list, scriptwriters_list, actors_list, comment) cursor.execute(query, values) # 提交之前的操作,如果之前曾经执行屡次的execute,那么就都进行提交 conn.commit()except Exception as e: print(e) # 回滚 conn.rollback()# 敞开cursor对象cursor.close()# 敞开数据库连贯conn.close()