关于vue.js:python-LAADSSelenium应该如何运用

45次阅读

共计 5275 个字符,预计需要花费 14 分钟才能阅读完成。

from selenium import webdriver

from time import sleep

import tempfile

import os,sys

import pandas as 游戏中的 pd

import geopandas as gpd

import time

构建查问地址

def GetURL(ProductID,StartTime,EndTime,search_file):

 # 查问边界

 data = gpd.GeoDataFrame.from_file(search_file)

 bbox = (data.bounds.values)[0].tolist()

 # 钻研区范畴,左上角和右下角。依据须要结构字符串

 Area = str(round(bbox[0],1))+','+str(round(bbox[3],1))+','+str(round(bbox[2],1))+','+str(round(bbox[1],1))

 # 输出 MODIS 轨道矢量

 modis_grid_file = 'E:\***\modis_WGS84_grid_world.shp'

 modis_grid = gpd.GeoDataFrame.from_file(modis_grid_file)

 # 查问边界笼罩的轨道核心坐标

 modis_intersection = modis_grid[modis_grid.intersects(data.geometry[0])]

 path_row = 'Tile:'

 for mv in modis_intersection.values:

     path_row += "H"+str(mv[1])+"V"+str(mv[2])+","

 # 依据以上信息构建 Search 页的网址

 path_row = path_row[0:-1]

url=’

 return url

应用 Selenium 查问影像

def SearchFileList(url):

 # 创立文件夹,命名规定为程序运行的时刻

 # 将应用 selenium 下载的文件应用该文件夹存储

 csvdir = 'E:\\***\\' + str(time.time()).replace('.','')

 os.mkdir(csvdir)

 # 配置 selenium 的参数

 options = webdriver.ChromeOptions()

 prefs = {'profile.default_content_settings.popups': 0, 'download.default_directory': csvdir}

 options.add_experimental_option('prefs', prefs)

 chromedriver = r"C:\***\Google\Chrome\Application\chromedriver.exe"#chromedriver.exe 的本地门路

 # options.add_argument('--headless')  # 有无浏览器界面模式,依据须要设置

 driver = webdriver.Chrome(executable_path=chromedriver,options=options)

 # 主动关上 Search 页

 driver.get(url)

 # 浏览器关上 Search 页后,外汇跟单 gendan5.com 还要留足工夫让服务器进行数据检索

 # 所以这里 sleep50 秒,能够依据网速自行设定

 sleep(50)

 # 当然也能够判断搜寻后果,也就是蕴含 csv 的标签是否呈现

 # WebDriverWait(driver,

 # 下载 csv 文件

 # 找到文本蕴含 csv 的标签

 # csvElement = driver.find_element_by_link_text('csv')

 csvElement = driver.find_element_by_xpath('// *[ @ id ="tab4download"] / a[2]')

 # 点击下载

 csvElement.click()

 # 留下下载 csv 文件的工夫

 sleep(20)

 # 敞开浏览器

 driver.quit()

 return csvdir

下载影像

def MODISDown(FileDir):

 # 获取下载的 csv 文件的文件名

 csvfilename = os.listdir(FileDir)[0]

 # 结构文件门路

 csvfilepath = os.path.join(FileDir, csvfilename)

 # print(csvfilepath)

 csvvalues = pd.read_csv(csvfilepath).values

 os.remove(csvfilepath)

 os.rmdir(FileDir)

 # 下载数据

 file_count = 0

 for cv in csvvalues:

     file_count += 1

     # 构建数据的下载链接

     modislink='[1]

     outdir = 'E:/***/MODIS/'+(cv[1].split("/"))[5]

     # outdir = 'E:/Temp/' + (cv[1].split("/"))[5]

     if not os.path.isdir(outdir):

         os.mkdir(www.sangpi.comoutdir)

     path = outdir + '/' + (cv[1].split("/"))[7]

     if not os.path.exists(path):

         print("({0}/{1}) Downloading {2}".format(file_count, len(csvvalues), modislink.split("/")[-1]))

         with open(path, 'w+b') as out:

             geturl(modislink, out)

获取下载链接并下载影像数据

def geturl(url,out=None):

 USERAGENT = 'tis/download.py_1.0--' + sys.version.replace('\n', '').replace('\r','')

 headers = {'user-agent' : USERAGENT}

 token = '******' # 你的 token, 可登陆 Earthdata 网站后在 profile 中失去

 headers['Authorization'] = 'Bearer' + token

 try:

     import ssl

     CTX = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)

     from urllib.request import urlopen, Request, URLError, HTTPError

     try:

         response = urlopen(Request(url, headers=headers), context=CTX)

         if out is None:

             return response.read().decode('utf-8')

         else:

             start = time.time()

             # 将连贯中的下载文件写入临时文件 并返回文件写入进度

             chunk_read(response, out, report_hook=chunk_report)

             elapsed = max(time.time() - start,1.0)

             # 均匀下载速度

             rate = (get_total_size(response) / 1024 ** 2) / elapsed

             print("Downloaded {0}b in {1:.2f}secs, Average Rate: {2:.2f}MB/sec".format(get_total_size(response), elapsed, rate))

             # shutil.copyfileobj(response, out)

     except HTTPError as e:

         print('HTTP GET error code: %d' % e.code(), file=sys.stderr)

         print('HTTP GET error message: %s' % e.message, file=sys.stderr)

     except URLError as e:

         print('Failed to make request: %s' % e.reason, file=sys.stderr)

     return None

 except AttributeError:

     # OS X Python 2 and 3 don't support tlsv1.1+ therefore... curl

     import subprocess

     try:

         args = ['curl', '--fail', '-sS', '-L', '--get', url]

         for (k,v) in headers.items():

             args.extend(['-H', ':'.join([k, v])])

         if out is None:

             # python3's subprocess.check_output returns stdout as a byte string

             result = subprocess.check_output(args)

             return result.decode('utf-8') if isinstance(result, bytes) else result

         else:

             subprocess.call(args, stdout=out)

     except subprocess.CalledProcessError as e:

         print('curl GET error message: %' + (e.message if hasattr(e, 'message') else e.output), file=sys.stderr)

     return None

chunk_read modified from

def chunk_read(response, local_file, chunk_size=10240, report_hook=None):

 # 残缺文件大小

 file_size = get_total_size(response)

 # 下载文件大小

 bytes_so_far = 0

 # 文件写入本地

 while 1:

     try:

         # 从地址中读取固定大小文件对象

         chunk = response.read(chunk_size)

     except:

         sys.stdout.write("\n > There was an error reading data. \n")

         break

     try:

         # 将读取出的文件对象写入本地文件

         local_file.write(chunk)

     except TypeError:

         local_file.write(chunk.decode(local_file.encoding))

     # 写入实现即更新已下载文件大小

     bytes_so_far += len(chunk)



     if not chunk:

         break

     if report_hook:

         # 获取下载进度

         report_hook(bytes_so_far, file_size)

 return bytes_so_far

def chunk_report(bytes_so_far, file_size):

 if file_size is not None:

     # 计算下载进度游戏进度的百分比

     percent = float(bytes_so_far) / file_size

     percent = round(percent * 100, 2)

     sys.stdout.write("> Downloaded %d of %d bytes (%0.2f%%)\r" %

                      (bytes_so_far, file_size, percent))

 else:

     # We couldn't figure out the size.

     sys.stdout.write("> Downloaded %d of unknown Size\r" % (bytes_so_far))

def get_total_size(response):

try:

   file_size = response.info().getheader('Content-Length').strip()

except AttributeError:

   try:

      file_size = response.getheader('Content-Length').strip()

   except AttributeError:

      print ("> Problem getting size")

      return None

return int(file_size)

if name == “__main__”:

 # 定义要下载数据的信息

 ProductID = 'MOD021KM--61/'  # 产品号 #sys.argv[1]#

 # 设置数据的起始和截至工夫。其实就是依据须要结构一个简略的字符串

 StartTime = '2020-06-01'  # 开始工夫 #sys.argv[2]#

 EndTime = '2020-06-03'  # 截至日期 #sys.argv[3]#

 search_file = r'E:\***\ 北京市 .shp'  # 查问范畴 #sys.argv[4]#

 # 构建查问地址

 url = GetURL(ProductID,StartTime,EndTime,search_file)

 # 获取数据列表

 csvdir = SearchFileList(url)

 # 依据列表下载数据

 MODISDown(csvdir)

正文完
 0