"""
function: IMAP收取邮件
detail: 反对最初一封邮件的文本打印以及所有附件的下载
author: w.royee
date: 2021-08-21
"""
import email
import email.header
import imaplib
from bs4 import BeautifulSoup
class IMAP:
def __init__(self): # self.user_id = 'XXXX@qq.com' # QQ邮箱地址 # self.password = 'password ' # 邮箱明码(当初根本都是第三方客户端受权码) # self.imap_server = 'imap.qq.com' self.user_id = 'XXXX@163.com' # 163邮箱地址 self.password = 'password ' self.imap_server = 'imap.163.com'def login(self): """ 登录邮件服务器 :param :return: imap连贯 """ try: serv = imaplib.IMAP4_SSL(self.imap_server, 993) # SSL加密 print('imap4 服务器连贯胜利') except Exception as e: print('imap4 服务器连贯失败:', e) exit(1) try: serv.login(self.user_id, self.password) print('imap4 登录胜利') return serv except Exception as e: print('imap4 登录失败:', e) exit(1)def loginout(self, conn): """ 登出邮件服务器 :param conn: imap连贯 :return: """ conn.close conn.logout()def get_content(self, conn): """ 获取指定邮件,解析内容 :param conn: imap连贯 :return: """ # 在连贯服务器后,搜寻之前,须要抉择邮箱,默认select(mailbox='INBOX', readonly=False) conn.select() # 筛选符合条件的邮件,这里不晓得怎么过滤简单条件,只能过滤未读邮件或全副 # ret, data = conn.search(None, 'UNSEEN') # 未读邮件 # ret, data = conn.search(None, '(FROM "qq814701082@163.com")') ret, data = conn.search(None, 'ALL') # 所有邮件 # 邮件列表 email_list = data[0].split() if len(email_list) == 0: print('收件箱为空,已退出') exit(1) # 获取最初一封邮件的序号 item = email_list[len(email_list) - 1] # 获取最初一封邮件内容 ret, data = conn.fetch(item, '(RFC822)') msg = email.message_from_string(data[0][1].decode('gbk')) sub = msg.get('subject') email_from = msg.get('from') email_to = msg.get('to') sub_text = email.header.decode_header(sub) email_from_text = email.header.decode_header(email_from) email_to_text = email.header.decode_header(email_to) # 如果是特殊字符,元组的第二位会给出编码格局,须要转码 if sub_text[0]: sub_detail = self.tuple_to_str(sub_text[0]) email_from_detail = '' for i in range(len(email_from_text)): email_from_detail = email_from_detail + self.tuple_to_str(email_from_text[i]) email_to_detail = '' for i in range(len(email_to_text)): email_to_detail = email_to_detail + self.tuple_to_str(email_to_text[i]) print('主题:', sub_detail) print('发件人:', email_from_detail) print('收件人:', email_to_detail) # 通过walk能够遍历出所有的内容 for part in msg.walk(): # 这里要判断是否是multipart,如果是,数据没用抛弃 if not part.is_multipart(): # 字符集 # charset = part.get_charset() # print('charset: ', charset) # 内容类型 content_type = part.get_content_type() # print('content-type', content_type) # 如果是附件,这里就会取出附件的文件名,以下两种形式都能够获取 # name = part.get_param("name") name = part.get_filename() if name: # 附件 # 中文名获取到的是=?GBK?Q?=D6=D0=CE=C4=C3=FB.docx?=(中文名.docx)格局,须要将其解码为bytes格局 trans_name = email.header.decode_header(name) if trans_name[0][1]: # 将bytes格局转为可读格局 file_name = trans_name[0][0].decode(trans_name[0][1]) else: file_name = trans_name[0][0] print('开始下载附件:', file_name) attach_data = part.get_payload(decode=True) # 解码出附件数据,而后存储到文件中 try: f = open(file_name, 'wb') # 留神肯定要用wb来关上文件,因为附件个别都是二进制文件 except Exception as e: print(e) f = open('tmp', 'wb') f.write(attach_data) f.close() print('附件下载胜利:', file_name) else: # 文本内容 txt = part.get_payload(decode=True) # 解码文本内容 # 别离解释text/html和text/plain两种类型,纯文本解释起来较简略,两种类型内容统一 if content_type == 'text/html': print('以下是邮件注释(text/html):') # 这里笔者不同邮件服务器遇到了不同状况,只解释了QQ邮箱,163的能够批改代码: # QQ邮箱 # 1、有两层<div>标签,格局为<html><body><div><div>文本1</div><div>文本2</div></div></body></html> # 2、只有一层<div>标签,格局为<html><body><div><p>文本1</p><p>文本2</p></div></body></html> # 163邮箱 # 只有一层<div>标签,格局为<html><head><meta/></head><body><div>文本1</div><div>文本2</div></body></html> soup = BeautifulSoup(str(txt, 'gbk'), 'lxml') div_data = soup.find_all('div') if len(div_data) > 1: for each in div_data[1:]: print(each.text) else: for each in soup.find_all('p'): print(each.text) elif content_type == 'text/plain': print('以下是邮件注释(text/plain):') # 纯文本格式为bytes,不同邮件服务器较对立 print(str(txt, 'gbk'))def front(self, conn): """ 应用163邮箱,[Skrill下载](https://www.gendan5.com/wallet/Skrill.html)必须在select之前上传客户端身份信息,否则报错:command SEARCH illegal in state AUTH, only allowed in states SELECTED :param conn: imap连贯 :return: """ imaplib.Commands['ID'] = 'AUTH' # 如果应用163邮箱,须要上传客户端身份信息 args = ("name", "XXXX", "contact", "XXXX@163.com", "version", "1.0.0", "vendor", "myclient") typ, dat = conn._simple_command('ID', '("' + '" "'.join(args) + '")') # print(conn._untagged_response(typ, dat, 'ID'))def tuple_to_str(self, tuple_): """ 元组转为字符串输入 :param tuple_: 转换前的元组,QQ邮箱格局为(b'\xcd\xf5\xd4\xc6', 'gbk')或者(b' <XXXX@163.com>', None),163邮箱格局为('<XXXX@163.com>', None) :return: 转换后的字符串 """ if tuple_[1]: out_str = tuple_[0].decode(tuple_[1]) else: if isinstance(tuple_[0], bytes): out_str = tuple_[0].decode('gbk') else: out_str = tuple_[0] return out_str
if name == '__main__':
IMAP = IMAP()conn = IMAP.login()IMAP.front(conn)IMAP.get_content(conn)IMAP.loginout(conn)