背景公司需求为导出在线用户信息,其中第一步便是取到在线用户,本文选择去ejabberd网页取爬取在线用户页面,获得返回报文后进行截取获得用户账号。后续方便进行取缓存数据获取必要的数据为导出excel做准备。主要代码连接ejabberd,获取返回报文 /** * 请求ejabbered集群管理页面的post请求方法 * * @param url * @param userNameAndPwd * @return / public static String sendPost(String urlAddress, String userNameAndPwd) { String reponseStr = “”; BufferedReader in = null; String line = “”; try { URL url = new URL(urlAddress); URLConnection connection = url.openConnection(); logger.debug(“与ejabberd建立连接的地址为:"+urlAddress); connection.setDoOutput(true); connection.setRequestProperty(“Pragma:”, “no-cache”); connection.setRequestProperty(“Accept”, “text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,/*;q=0.8”); connection.setRequestProperty(“Cache-Control”, “no-cache”); connection.setRequestProperty(“Content-Type”, “text/xml”); connection.setRequestProperty(“User-Agent”, “Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36”); connection.setRequestProperty(“Authorization”, “Basic " + userNameAndPwd); connection.connect(); // 定义 BufferedReader输入流来读取URL的响应 in = new BufferedReader(new InputStreamReader(connection.getInputStream())); while ((line = in.readLine()) != null) { reponseStr += line; } logger.info(“请求回执:” + reponseStr); } catch (Exception e) { logger.error(e); } return reponseStr; }返回报文样例<!DOCTYPE html PUBLIC “-//W3C//DTD XHTML 1.0 Transitional//EN” “http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html xmlns=‘http://www.w3.org/1999/xhtml' xml:lang=‘en’ lang=‘en’> <head> <title>ejabberd Web Admin</title> <meta http-equiv=‘Content-Type’ content=‘text/html; charset=utf-8’/> <script src=’/admin/server/sihua1.com//additions.js’ type=‘text/javascript’> </script> <link href=’/admin/server/sihua1.com/favicon.ico’ type=‘image/x-icon’ rel=‘shortcut icon’/> <link href=’/admin/server/sihua1.com/style.css’ type=‘text/css’ rel=‘stylesheet’/> </head> <body> <div id=‘container’> <div id=‘header’> <h1> <a href=’/admin/’>ejabberd Web Admin</a> </h1> </div> <div id=‘navigation’> <ul> <li> <div id=‘navitem’> <a href=’/admin/acls/’>Access Control Lists</a> </div> </li> <li> <div id=‘navitem’> <a href=’/admin/access/’>Access Rules</a> </div> </li> <li> <div id=‘navitem’> <a href=’/admin/vhosts/’>Virtual Hosts</a> </div> </li> <li> <div id=‘navheadsub’> <a href=’/admin/server/sihua1.com/’>sihua1.com</a> </div> </li> <li> <div id=‘navitemsub’> <a href=’/admin/server/sihua1.com/acls/’>Access Control Lists</a> </div> </li> <li> <div id=‘navitemsub’> <a href=’/admin/server/sihua1.com/access/’>Access Rules</a> </div> </li> <li> <div id=‘navitemsub’> <a href=’/admin/server/sihua1.com/users/’>Users</a> </div> </li> <li> <div id=‘navitemsub’> <a href=’/admin/server/sihua1.com/online-users/’>Online Users</a> </div> </li> <li> <div id=‘navitemsub’> <a href=’/admin/server/sihua1.com/last-activity/’>Last Activity</a> </div> </li> <li> <div id=‘navitemsub’> <a href=’/admin/server/sihua1.com/nodes/’>Nodes</a> </div> </li> <li> <div id=‘navitemsub’> <a href=’/admin/server/sihua1.com/stats/’>Statistics</a> </div> </li> <li> <div id=‘navitemsub’> <a href=’/admin/server/sihua1.com/shared-roster/’>Shared Roster Groups</a> </div> </li> <li> <div id=‘navitemsub’> <a href=’/admin/server/sihua1.com/muc/’>Multi-User Chat</a> </div> </li> <li> <div id=‘navitem’> <a href=’/admin/nodes/’>Nodes</a> </div> </li> <li> <div id=‘navitem’> <a href=’/admin/stats/’>Statistics</a> </div> </li> <li> <div id=‘navitem’> <a href=’/admin/muc/’>Multi-User Chat</a> </div> </li> </ul> </div> <div id=‘content’> <h1>Online Users</h1> <a href=’../user/wxin/’>wxin@sihua1.com</a> <br/> </div> <div id=‘clearcopyright’></div> </div> <div id=‘copyrightouter’> <div id=‘copyright’> <p> <a href=‘https://www.ejabberd.im/'>ejabberd</a> (c) 2002-2017 <a href=‘https://www.process-one.net/'>ProcessOne, leader in messaging and push solutions</a> </p> </div> </div> </body></html>报文为页面html,分为导航栏、内容等部分,我们关注内容部分即可;下面a标签内为我们想要的在线用户账号(多个用户则为多个a标签) <div id=‘content’> <h1>Online Users</h1> <a href=’../user/wxin/’>wxin@sihua1.com</a> <br/> </div>截取返回报文,获得在线用户账号Jidpackage com.onewaveinc.utils;//省略了import部分…/** * 此类原为导出excel表格,其中获取在线用户账号为其中一步; * 从ejabberd返回报文进行解析截取用户账号 * @author wxin * /public class ExportXMPPUserInfo { private UserManager userManager; private UserChannelLoginManager userChannelLoginManager; private String path;// private final static String CONNECTED_USERS =“connected_users”; /* * 加载ejabbered集群的管理页面账号信息至内存 / private Map<String, String> ejabberedUserMap=new HashMap<String, String>(); /* * 加载ejabbered集群的url / private Map<String, String> ejabberedUrlMap=new HashMap<String, String>(); private String xmppManageListStr; private ChannelManager channelManager; private MemcachedFactory memcachedFactory; private static Logger logger = Logger.getInstance(ExportXMPPUserInfo.class); private String zipPath; /* * 定时导出XMPP每个机房(一个集群)的在线用户的信息 * 导出信息:用户账号,mac地址,登陆的IP,登陆域名,机顶盒的型号,版本,和以及登陆所在节点的ip, * 显示 登陆的时间,登陆的时长(现在的时间减去登陆的时间)。 / public void getEjabberdJid() { try { List<Channel> channelList = new ArrayList<Channel>(); try { channelList = channelManager.findChannelIdList(); } catch (Exception e) { e.printStackTrace(); } String serverName = “”; String serverDomain = “”; String urlAddress = “”; String userNameAndPwd = “”; String respStr = “”; String usersUrlAddress = “”; String usersRespStr = “”; for (Channel channel : channelList) { serverName = channel.getName(); serverDomain = channel.getHost(); logger.info(“此次处理的集群名称为:” + serverName); //获取所有在线用户账号 urlAddress = ejabberedUrlMap.get(serverName)+“server/"+ serverDomain +"/online-users”; userNameAndPwd = ejabberedUserMap.get(serverName); respStr = HttpUtil.sendPost(urlAddress, userNameAndPwd); List<String> jidList = new ArrayList<String>(); jidList = getJidList(respStr); } } catch (Exception e) { result = “failed”; e.printStackTrace(); } return result; } /* * 截取用户jidList * @param respStr * @return List<String> jidList / public List<String> getJidList(String respStr) { List<String> jidList = new ArrayList<String>(); int indexOne = respStr.indexOf("<h1>Online Users</h1>”)+"<h1>Online Users</h1>".length(); int indexTwo = respStr.indexOf("</a><br/></div><div id=‘clearcopyright’></div>”); if (-1 != indexOne && -1 != indexTwo && indexTwo > indexOne) { String usersStr = respStr.substring(indexOne, indexTwo); logger.debug(“在线用户列表为:"+ usersStr); String []infoArr = usersStr.split("</a><br/>”); logger.debug(“按照</a>进行拆分用户列表” + Arrays.toString(infoArr)); for (String userInfo : infoArr) { indexOne = userInfo.indexOf(”/’>")+"/’>".length(); String newUserInfo = userInfo.substring(indexOne); logger.debug(“正在处理用户JID为”+newUserInfo); jidList.add(newUserInfo); } } return jidList; } /* * 加载各个Ejabbered集群的Node节点 * 此方法在初始化时进行(init-method=“queryEjabberedNodes”) * xmppManageListStr数据格式为:xmpp,admin@sihua1.com,1=http://10.223.138.137:5280/admin/;… * 单个元素为 imoss系统中的集群名称,管理员账号,管理员密码=ejaader集群管理地址,多个采用英文分号分隔 */ public void queryEjabberedNodes(){ try { if(StringUtils.isNotBlank(xmppManageListStr)){ String[] strArrayOne=xmppManageListStr.split(";"); for(String strTempOne:strArrayOne){ String[] arr=strTempOne.split("="); String[] strArrayTwo=arr[0].split(","); logger.info(“strArrayTwo[0]="+strArrayTwo[0]+",strArrayTwo[1]=” +strArrayTwo[1]+",strArrayTwo[2]="+strArrayTwo[2] +",arr[1]="+arr[1]); ejabberedUrlMap.put(strArrayTwo[0], arr[1]); String encodeUserAndPwd=Base64Uitl.encode((strArrayTwo[1]+":"+strArrayTwo[2]).getBytes()); ejabberedUserMap.put(strArrayTwo[0], encodeUserAndPwd); } } } catch (Exception e) { logger.error(“处理配置项xmpp.management.url.list产生异常!",e); } }//省略了getter、setter部分 }补充说明ExportXMPPUserInfo.queryEjabberedNodes()在配置中配置为init-method,在初始化阶段就已经运行:将发送url、账号密码提前写入map中。配置<bean id=“contbiz.imoss.exportXMPPUserInfo” class=“com.onewaveinc.utils.ExportXMPPUserInfo” init-method=“queryEjabberedNodes”> …</bean>