download:高级爬虫实战-零碎把握破解反爬技能 挑战高薪

public static String getPubTimeVarious(String url,String urlContent) {

         String pubTime = getPubTimeFromUrl(url);       //链接外面没有,匹配文本中的    if(pubTime == null)    {        if(urlContent!=null&&!urlContent.trim().equals(""))            return extractPageDate(urlContent);    }         return pubTime;} /**从url外面抽取出公布工夫,返回YYYY-MM-DD HH:mm:ss格局的字符串 * @param url * @return */public static String getPubTimeFromUrl(String url){    Pattern p_whole = Pattern.compile(url_reg_whole);    Matcher m_whole = p_whole.matcher(url);    if(m_whole.find(0)&&m_whole.groupCount()>0)    {       String time =  m_whole.group(0);       time = time.substring(1,time.length());       //每一步都不可能超出以后工夫              if(current.compareTo(TimeUtil.strToCalendar(time, "yyyyMMdd"))>=0)    {       return time.substring(0,4)+"-"+time.substring(4,6)+"-"+              time.substring(6,8)+" "+"00:00:00";    }    }       p_whole = null;    m_whole = null;    Pattern p_sep = Pattern.compile(url_reg_sep_ymd);    Matcher m_sep = p_sep.matcher(url);    if(m_sep.find(0)&&m_sep.groupCount()>0)    {         String time =  m_sep.group(0);         time = time.substring(1,time.length());         String[] seg = time.split("[-|/|_]{1}");         Calendar theTime = Calendar.getInstance();         theTime.set(Calendar.YEAR,Integer.parseInt(seg[0]));         theTime.set(Calendar.MONTH, Integer.parseInt(seg[1]));         theTime.set(Calendar.DAY_OF_MONTH, Integer.parseInt(seg[2]));         if(current.compareTo(theTime)>=0)            {                 return seg[0]+"-"+seg[1]+"-"+seg[2]+" "+"00:00:00";            }    }    p_sep = null;    m_sep = null;    Pattern p_sep_ym = Pattern.compile(url_reg_sep_ym);    Matcher m_sep_ym = p_sep_ym.matcher(url);    if(m_sep_ym.find(0)&&m_sep_ym.groupCount()>0)    {         String time =  m_sep_ym.group(0);         time = time.substring(1,time.length());         Calendar theTime = Calendar.getInstance();         String[] seg = time.split("[-|/|_]{1}");         theTime.set(Calendar.YEAR,Integer.parseInt(seg[0]));         theTime.set(Calendar.MONTH, Integer.parseInt(seg[1]));         theTime.set(Calendar.DAY_OF_MONTH, 1);         if(current.compareTo(theTime)>=0)        {                  return seg[0]+"-"+seg[1]+"-"+"01"+" "+"00:00:00";        }    }         return null;}