# -*- coding: utf-8 -*-import requestsfrom pyquery import PyQuery as pqfrom goose import Goosefrom goose.text import StopWordsChineseimport jsonimport timeclass ItSlaw(object):    def __init__(self):        self.url = 'http://www.itslaw.com/api/v1/caseFiles?startIndex=0&countPerPage=20&sortType=1&conditions=searchWord+{keyword}+1+{keywordcopy}'        self.headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36",                        "Accept": "application/json, text/plain, */*",                        "Accept-Encoding": "gzip, deflate, sdch",                        "Accept-Language": "zh-CN,zh;q=0.8",                        "Cache-Control": "no-cache",                        "Connection": "keep-alive",                        "Host": "www.itslaw.com",                        "If-Modified-Since": "Mon, 26 Jul 1997 05:00:00 GMT",                        "Pragma": "no-cache",                        "Referer": "http://www.itslaw.com"}        self.result = None        self.keyword = None        self.session = requests.Session()    def reset(self, keyword):        self.keyword = keyword        self.result = None    def fetch(self):        url = self.url.format(keyword='self.keyword', keywordcopy='self.keyword')        res = []        time.sleep(3)        proxies = {"http": "14.111.148.1"}        r = self.session.get(url, proxies=proxies)        print r.status_code        print '@@'*20        completed_url = 'http://www.itslaw.com/' + 'url'        g = Goose({'stopwords_class': StopWordsChinese})        article = g.extract(url=completed_url)        content = article.cleaned_text        res.append({'title': title, 'url': url, 'content': content})        self.result = res        return self.result    def get_result(self):        return self.resultif __name__ == '__main__':    search = ItSlaw()    search.reset('九州通医药集团股份有限公司')    search.fetch()    info = search.get_result()    print info