coding: utf-8

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import *
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
requests.adapters.DEFAULT_RETRIES = 5
import time
import os
import re
driver = webdriver.Chrome()

driver = webdriver.FireFox()

wait = WebDriverWait(driver, 10)
def download(url, file_name):

headers = {    'Host': 'hubble.netease.com',    'Origin': 'https://www.icourse163.org',    'Referer': url.split("#")[0],    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36'}if not os.path.exists(file_name) or os.path.getsize(file_name) <= 10:    with open(file_name, "wb") as f:        r = requests.get(url, headers=headers, verify=False)        f.write(r.content)        f.close()        print("\t下载胜利:{}".format(file_name))else:    print("\t文件已存在:{}".format(file_name))

课件地址 存储门路 范畴a, b

def get_courseware(courseware_url, path, c_range=[0, 0]):

t = 0while t < 2:    try:        driver.get(courseware_url)        h3 = wait.until(            EC.element_to_be_clickable(                (By.CSS_SELECTOR, "#g-body > div.m-learnhead > div > div > div > a.f-fl > h4"))        )        school_name = re.findall(r'/([a-zA-Z]+)-', courseware_url)[0]        title = h3.text        path_1 = os.path.join(path, title + "_" + school_name)        if not os.path.exists(path_1):            os.makedirs(path_1)        path = os.path.join(path_1, "courseware")        if not os.path.exists(path):            os.makedirs(path)        # 总章节数        h3_count = len(driver.find_elements_by_css_selector(            "div > div.m-learnChapterList> div.m-learnChapterNormal > div.titleBox > h3"))        if c_range[1] == 0:            c_range2 = h3_count        else:            c_range2 = c_range[1]        for index in range(3 + c_range[0], 3 + c_range2):            driver.refresh()            h3 = wait.until(                EC.element_to_be_clickable((By.CSS_SELECTOR,                                            "div > div.m-learnChapterList> div.m-learnChapterNormal:nth-child(3) > div.titleBox > h3"))            )            h3.click()            h3 = wait.until(                EC.element_to_be_clickable((By.CSS_SELECTOR,                                            "div > div.m-learnChapterList> div.m-learnChapterNormal:nth-child({}) > div.titleBox > h3".format(                                                index)))            )            h3_text = h3.text            print("{}:".format(h3_text), end="\t")            patten = re.compile('.*?第[期货](https://www.gendan5.com/futures.html)(.{1,3})(周|章).*?')            match = re.match(patten, h3_text)            if match:                week = match.group(0)            else:                week = h3_text            h3.click()            time.sleep(3)            #                 file_count = len(driver.find_elements_by_xpath('//div[@class="f-icon lsicon f-fl "]/span[@class="u-icon-doc"]'))            file_count = len(driver.find_elements_by_xpath('//div[@class="sourceList"]/*[@title="文档讲稿"]'))            print(file_count)            h4_count = len(driver.find_elements_by_css_selector('div.u-learnLesson > h4'))            for h4_index in range(1, h4_count + 1):                h4 = wait.until(                    EC.element_to_be_clickable(                        (By.CSS_SELECTOR, 'div.u-learnLesson:nth-of-type({}) > h4.j-name'.format(h4_index)))                )                # 题目4                h4str = h4.text                file_count = len(driver.find_elements_by_css_selector(                    f'div.u-learnLesson:nth-of-type({h4_index}) > div.sourceList > div[title^="文档"]'))                for f_index in range(1, file_count + 1):                    title = wait.until(                        EC.element_to_be_clickable((By.CSS_SELECTOR,

f'div.u-learnLesson:nth-of-type({h4_index}) > div.sourceList > div[title^="文档"]'))

                    )                    titlestr = title.get_attribute("title")                    title.click()                    time.sleep(0.2)                    download_btn = wait.until(                        EC.element_to_be_clickable((By.PARTIAL_LINK_TEXT, '文档下载'))                    )                    download_url = download_btn.get_attribute("href")                    week = week.replace(":", "-").replace("/", " ").replace("\\", " ").replace("课件:", " ").replace(                        ":", " ")                    titlestr = f'{h4str} {titlestr}'                    title = titlestr.replace(":", "-").replace("/", " ").replace("\\", " ").replace("课件:",

" ").replace(

                        ":", " ").replace("/", " ")                    print(week, "   ", title)                    file_name = path + "\\" + week + " " + "".join(title.split()).replace(":", " ") + "." + \                                download_url.split(".")[-1].split('&')[0]                    print(file_name)                    download(download_url, file_name)                    driver.back()                    time.sleep(1)                    h3 = wait.until(                        EC.element_to_be_clickable((By.CSS_SELECTOR,                                                    "div > div.m-learnChapterList> div.m-learnChapterNormal:nth-child(3) > div.titleBox > h3"))                    )                    h3.click()                    h3 = wait.until(                        EC.element_to_be_clickable((By.CSS_SELECTOR,                                                    "div > div.m-learnChapterList> div.m-learnChapterNormal:nth-child({}) > div.titleBox > h3".format(                                                        index)))                    )                    h3.click()        t = 5    except FileNotFoundError:        print("FileNotFoundError: [Errno 2] No such file or directory: ")        t += 1

def main():

courseware_url = 'https://www.icourse163.org/learn/XDU-1001638014?tid=1462808447#/learn/content'path = r"D:\大二下\信号与零碎\中国大学MOOC"# 课件地址  存储门路  范畴[a, b](第a章到第b章,默认[0, 0]示意全副)get_courseware(courseware_url, path, [0, 0])driver.quit()  # 退出浏览器

if name == '__main__':

main()