from selenium import webdriver
from time import sleep
import tempfile
import os,sys
import pandas as游戏中的pd
import geopandas as gpd
import time
构建查问地址
def GetURL(ProductID,StartTime,EndTime,search_file):
# 查问边界 data = gpd.GeoDataFrame.from_file(search_file) bbox = (data.bounds.values)[0].tolist() # 钻研区范畴,左上角和右下角。依据须要结构字符串 Area = str(round(bbox[0],1))+','+str(round(bbox[3],1))+','+str(round(bbox[2],1))+','+str(round(bbox[1],1)) # 输出 MODIS 轨道矢量 modis_grid_file = 'E:\***\modis_WGS84_grid_world.shp' modis_grid = gpd.GeoDataFrame.from_file(modis_grid_file) # 查问边界笼罩的轨道核心坐标 modis_intersection = modis_grid[modis_grid.intersects(data.geometry[0])] path_row = 'Tile:' for mv in modis_intersection.values: path_row += "H"+str(mv[1])+"V"+str(mv[2])+"," # 依据以上信息构建 Search 页的网址 path_row = path_row[0:-1]
url='
return url
应用 Selenium 查问影像
def SearchFileList(url):
# 创立文件夹,命名规定为程序运行的时刻 # 将应用 selenium 下载的文件应用该文件夹存储 csvdir = 'E:\\***\\' + str(time.time()).replace('.','') os.mkdir(csvdir) # 配置 selenium 的参数 options = webdriver.ChromeOptions() prefs = {'profile.default_content_settings.popups': 0, 'download.default_directory': csvdir} options.add_experimental_option('prefs', prefs) chromedriver = r"C:\***\Google\Chrome\Application\chromedriver.exe"#chromedriver.exe 的本地门路 # options.add_argument('--headless') # 有无浏览器界面模式,依据须要设置 driver = webdriver.Chrome(executable_path=chromedriver,options=options) # 主动关上 Search 页 driver.get(url) # 浏览器关上 Search 页后,外汇跟单gendan5.com还要留足工夫让服务器进行数据检索 # 所以这里 sleep50 秒,能够依据网速自行设定 sleep(50) # 当然也能够判断搜寻后果,也就是蕴含 csv 的标签是否呈现 # WebDriverWait(driver, # 下载 csv 文件 # 找到文本蕴含 csv 的标签 # csvElement = driver.find_element_by_link_text('csv') csvElement = driver.find_element_by_xpath('// *[ @ id = "tab4download"] / a[2]') # 点击下载 csvElement.click() # 留下下载 csv 文件的工夫 sleep(20) # 敞开浏览器 driver.quit() return csvdir
下载影像
def MODISDown(FileDir):
# 获取下载的 csv 文件的文件名 csvfilename = os.listdir(FileDir)[0] # 结构文件门路 csvfilepath = os.path.join(FileDir, csvfilename) # print(csvfilepath) csvvalues = pd.read_csv(csvfilepath).values os.remove(csvfilepath) os.rmdir(FileDir) # 下载数据 file_count = 0 for cv in csvvalues: file_count += 1 # 构建数据的下载链接 modislink='[1] outdir = 'E:/***/MODIS/'+(cv[1].split("/"))[5] # outdir = 'E:/Temp/' + (cv[1].split("/"))[5] if not os.path.isdir(outdir): os.mkdir(www.sangpi.comoutdir) path = outdir + '/' + (cv[1].split("/"))[7] if not os.path.exists(path): print("({0}/{1}) Downloading {2}".format(file_count, len(csvvalues), modislink.split("/")[-1])) with open(path, 'w+b') as out: geturl(modislink, out)
获取下载链接并下载影像数据
def geturl(url,out=None):
USERAGENT = 'tis/download.py_1.0--' + sys.version.replace('\n', '').replace('\r', '') headers = { 'user-agent' : USERAGENT } token = '******' # 你的 token, 可登陆 Earthdata 网站后在 profile 中失去 headers['Authorization'] = 'Bearer ' + token try: import ssl CTX = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2) from urllib.request import urlopen, Request, URLError, HTTPError try: response = urlopen(Request(url, headers=headers), context=CTX) if out is None: return response.read().decode('utf-8') else: start = time.time() # 将连贯中的下载文件写入临时文件 并返回文件写入进度 chunk_read(response, out, report_hook=chunk_report) elapsed = max(time.time() - start,1.0) # 均匀下载速度 rate = (get_total_size(response) / 1024 ** 2) / elapsed print("Downloaded {0}b in {1:.2f}secs, Average Rate: {2:.2f}MB/sec".format(get_total_size(response), elapsed, rate)) # shutil.copyfileobj(response, out) except HTTPError as e: print('HTTP GET error code: %d' % e.code(), file=sys.stderr) print('HTTP GET error message: %s' % e.message, file=sys.stderr) except URLError as e: print('Failed to make request: %s' % e.reason, file=sys.stderr) return None except AttributeError: # OS X Python 2 and 3 don't support tlsv1.1+ therefore... curl import subprocess try: args = ['curl', '--fail', '-sS', '-L', '--get', url] for (k,v) in headers.items(): args.extend(['-H', ': '.join([k, v])]) if out is None: # python3's subprocess.check_output returns stdout as a byte string result = subprocess.check_output(args) return result.decode('utf-8') if isinstance(result, bytes) else result else: subprocess.call(args, stdout=out) except subprocess.CalledProcessError as e: print('curl GET error message: %' + (e.message if hasattr(e, 'message') else e.output), file=sys.stderr) return None
chunk_read modified from
def chunk_read( response, local_file, chunk_size=10240, report_hook=None):
# 残缺文件大小 file_size = get_total_size(response) # 下载文件大小 bytes_so_far = 0 # 文件写入本地 while 1: try: # 从地址中读取固定大小文件对象 chunk = response.read(chunk_size) except: sys.stdout.write("\n > There was an error reading data. \n") break try: # 将读取出的文件对象写入本地文件 local_file.write(chunk) except TypeError: local_file.write(chunk.decode(local_file.encoding)) # 写入实现即更新已下载文件大小 bytes_so_far += len(chunk) if not chunk: break if report_hook: # 获取下载进度 report_hook(bytes_so_far, file_size) return bytes_so_far
def chunk_report( bytes_so_far, file_size):
if file_size is not None: # 计算下载进度游戏进度的百分比 percent = float(bytes_so_far) / file_size percent = round(percent * 100, 2) sys.stdout.write(" > Downloaded %d of %d bytes (%0.2f%%)\r" % (bytes_so_far, file_size, percent)) else: # We couldn't figure out the size. sys.stdout.write(" > Downloaded %d of unknown Size\r" % (bytes_so_far))
def get_total_size(response):
try: file_size = response.info().getheader('Content-Length').strip()except AttributeError: try: file_size = response.getheader('Content-Length').strip() except AttributeError: print ("> Problem getting size") return Nonereturn int(file_size)
if name == "__main__":
# 定义要下载数据的信息 ProductID = 'MOD021KM--61/' # 产品号 #sys.argv[1]# # 设置数据的起始和截至工夫。其实就是依据须要结构一个简略的字符串 StartTime = '2020-06-01' # 开始工夫 #sys.argv[2]# EndTime = '2020-06-03' # 截至日期 #sys.argv[3]# search_file = r'E:\***\ 北京市 .shp' # 查问范畴 #sys.argv[4]# # 构建查问地址 url = GetURL(ProductID,StartTime,EndTime,search_file) # 获取数据列表 csvdir = SearchFileList(url) # 依据列表下载数据 MODISDown(csvdir)