增加所需依赖:
<!-- apache poi 操作Microsoft Document -->
<dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.17</version> </dependency> <!-- Apache POI - Java API To Access Microsoft Format Files --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.17</version> </dependency> <!-- Apache POI - Java API To Access Microsoft Format Files --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml-schemas</artifactId> <version>3.17</version> </dependency> <!-- poi eventmodel形式 依赖包 --> <dependency> <groupId>xerces</groupId> <artifactId>xercesImpl</artifactId> <version>2.11.0</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.14</version> </dependency> <!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.14</version> </dependency>
代码如下:
package com.cy.exceldata.util;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.cy.exceldata.util.SheetHandler;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.junit.jupiter.api.Test;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
/**
- 数据量比拟大(8万条以上)的excel文件解析,将excel文件解析为 行列坐标-值的模式存入map中,此形式速度快,内存耗损小 但只能读取excle文件
- 提供解决单个sheet办法 processOneSheet(String filename) 以及解决多个sheet办法 processAllSheets(String filename)
*
*/
public class LargeExcelFileReadUtil {
private LinkedHashMap<String, String>rowContents=new LinkedHashMap<String, String>();private SheetHandler sheetHandler;public LinkedHashMap<String, String> getRowContents() { return rowContents;}public void setRowContents(LinkedHashMap<String, String> rowContents) { this.rowContents = rowContents;}public SheetHandler getSheetHandler() { return sheetHandler;}public void setSheetHandler(SheetHandler sheetHandler) { this.sheetHandler = sheetHandler;}//解决一个sheetpublic void processOneSheet(String filename) throws Exception { InputStream sheet2=null; OPCPackage pkg =null; try { pkg = OPCPackage.open(filename); XSSFReader r = new XSSFReader(pkg); SharedStringsTable sst = r.getSharedStringsTable(); XMLReader parser = fetchSheetParser(sst); sheet2 = r.getSheet("rId1"); InputSource sheetSource = new InputSource(sheet2); parser.parse(sheetSource); setRowContents(sheetHandler.getRowContents()); }catch (Exception e) { e.printStackTrace(); throw e; }finally{ if(pkg!=null){ pkg.close(); } if(sheet2!=null){ sheet2.close(); } }}//解决多个sheetpublic void processAllSheets(String filename) throws Exception { OPCPackage pkg =null; InputStream sheet=null; try{ pkg=OPCPackage.open(filename); XSSFReader r = new XSSFReader( pkg ); SharedStringsTable sst = r.getSharedStringsTable(); XMLReader parser = fetchSheetParser(sst); Iterator<InputStream> sheets = r.getSheetsData(); while(sheets.hasNext()) { System.out.println("Processing new sheet:\n"); sheet = sheets.next(); InputSource sheetSource = new InputSource(sheet); parser.parse(sheetSource); } }catch (Exception e) { e.printStackTrace(); throw e; }finally{ if(pkg!=null){ pkg.close(); } if(sheet!=null){ sheet.close(); } }}public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException { XMLReader parser = XMLReaderFactory.createXMLReader( "com.sun.org.apache.xerces.internal.parsers.SAXParser" ); setSheetHandler(new SheetHandler(sst)); ContentHandler handler = (ContentHandler) sheetHandler; parser.setContentHandler(handler); return parser;}/** * See org.xml.sax.helpers.DefaultHandler javadocs *///测试@Testpublic void test ()throws Exception { Long time=System.currentTimeMillis(); LargeExcelFileReadUtil example = new LargeExcelFileReadUtil(); example.processOneSheet("C:/Users/Administrator/Desktop/data/test/模仿数据406250.xlsx"); Long endtime=System.currentTimeMillis(); LinkedHashMap<String, String> map=example.getRowContents(); Iterator<Entry<String, String>> it= map.entrySet().iterator(); final Pattern CHINA_PATTERN = Pattern.compile("^((13[0-9])|(14[0,1,4-9])|(15[0-3,5-9])|(16[2,5,6,7])|(17[0-8])|(18[0-9])|(19[0-3,5-9]))\\d{8}$"); int count=0; int phoneNum = 0; String prePos=""; Set<String> numSet = new HashSet<String>(); while (it.hasNext()){ Map.Entry<String, String> entry=(Map.Entry<String, String>)it.next(); String pos=entry.getKey(); if(!pos.substring(1).equals(prePos)){ prePos=pos.substring(1); count++; } if(isMobile(entry.getValue())){ numSet.add(entry.getValue()); } } for (String s : numSet) { System.out.println(s); phoneNum = phoneNum + 1; } System.out.println("号码数量有: "+phoneNum); System.out.println("解析数据"+count+"条;耗时"+(endtime-time)/1000+"秒");}public static boolean isMobile(final String str) { Pattern p = null; Matcher m = null; boolean b = false; p = Pattern.compile("^[1][3,4,5,7,8][0-9]{9}$"); // 验证手机号 m = p.matcher(str); b = m.matches(); return b;}
}