download:Hadoop 零碎入门+外围精讲

package com.zzger.model;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.CountDownLatch;

import com.zzger.module.queue.UrlQueue;
import com.zzger.util.HttpUtils;
import com.zzger.util.RegexUtils;

public class WebSite {

/** * 站点url */private String url;   /** * 需要爬行的url队列 */private UrlQueue urls = new UrlQueue<>();   /** * 已爬行过的页面url */private List exitUrls = Collections.synchronizedList(new ArrayList<>());   private static final int TOTAL_THREADS = 12;    private final CountDownLatch mStartSignal = new CountDownLatch(1);    private final CountDownLatch mDoneSignal = new CountDownLatch(TOTAL_THREADS);     public WebSite(String url){    this.url = url;    urls.offer(url);//把网站首页加入需要爬行的队列中}   public void guangDu(){    new Thread(new Runnable() {        @Override        public void run() {            paxing(HttpUtils.httpGet(url));        }    }).start();}   public void paxing(String html){    if(html.lastIndexOf("下一页

")<0) return ;

    String strList = html.substring(html.indexOf("<li class=\\"next-page\\">"),            html.lastIndexOf("下一页

"));

    String url = RegexUtils.RegexString("<a href=\\"(.+?)\\"", strList);    if(url.equals("Nothing")) return ;    urls.put(url);//把url存储到队列中    paxing(HttpUtils.httpGet(url));}   public void dxcPx(){    Page page = new Gxpage(urls.take());    List<Section> list = page.ybhqSection().getSections();    for(Section section : list){        new Thread(new Runnable() {            @Override            public void run() {                mStartSignal.countDown();// 计数减一为0,工作线程真正启动具体操作                  try {                    mStartSignal.await();// 阻塞,等待mStartSignal计数为0运行前面的代码                      // 所有的工作线程都在等待同一个启动的命令                  } catch (InterruptedException e) {                    e.printStackTrace();                }                DuanZi duanzi = section.select().getModel();                System.out.println(duanzi.getTitle());                mDoneSignal.countDown();// 实现当前计数减一              }        }        ).start();    }    try    {         mDoneSignal.await();// 等待所有工作线程结束      }     catch (InterruptedException e)     {         e.printStackTrace();     }     dxcPx();//线程工作执行完后,再次获取url队列进行工作}public static void main(String[] args) {    WebSite web = new WebSite("http://duanziwang.com");    web.guangDu();    for(int i = 0; i<10;i++){        new Thread(new Runnable() {            @Override            public void run() {                web.dxcPx();            }        }).start();    }       }

}