如果用XSSFWorkbook 来读取excel,遇到文件太大时候,会导致oom, OutOfMemoryError: Java heap space
所以apache官网上有推荐
If memory footprint is an issue, then for XSSF, you can get at the underlying XML data, and process it yourself.
http://poi.apache.org/spreadsheet/how-to.html#xssf_sax_api
通过xml的方式读取excel,一行一行的处理,也不需要将整个文件载入,导致内存不足情况
带注释的官网例子:
package com.datacenter.hbase.adapter; import java.io.InputStream; import java.util.Iterator; import java.util.concurrent.atomic.AtomicInteger; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.eventusermodel.XSSFReader.SheetIterator; import org.apache.poi.xssf.model.SharedStringsTable; import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst; import org.apache.poi.openxml4j.opc.OPCPackage; import org.xml.sax.Attributes; import org.xml.sax.ContentHandler; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.helpers.XMLReaderFactory; public class ExcelFileHandler { /** * 解析一个sheet * @param filename * @throws Exception */ public void processOneSheet(String filename,String sheetName) throws Exception { OPCPackage pkg = OPCPackage.open(filename); XSSFReader r = new XSSFReader( pkg ); SharedStringsTable sst = r.getSharedStringsTable(); //打开文件获取文件句柄 XMLReader parser = fetchSheetParser(sst); // To look up the Sheet Name / Sheet Order / rID, // you need to process the core Workbook stream. // Normally it's of the form rId# or rSheet# InputStream sheet2 = r.getSheet( sheetName ); InputSource sheetSource = new InputSource(sheet2); parser.parse(sheetSource); sheet2.close(); } /** * 解析多个sheet * @param filename * @throws Exception */ public void processAllSheets(String filename) throws Exception { OPCPackage pkg = OPCPackage.open(filename); XSSFReader r = new XSSFReader( pkg ); SharedStringsTable sst = r.getSharedStringsTable(); System.out.println( "sheet.size="+sst.getCount() ); for( CTRst rst : sst.getItems() ){ //System.out.println( rst.get); } XMLReader parser = fetchSheetParser(sst); XSSFReader.SheetIterator sheets = (SheetIterator) r.getSheetsData(); while(sheets.hasNext()) { System.out.println("Processing new sheet:\n"); InputStream sheet = sheets.next(); System.out.println( "sheetname:"+sheets.getSheetName() );; InputSource sheetSource = new InputSource(sheet); parser.parse(sheetSource); sheet.close(); System.out.println(""); } } public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException { XMLReader parser = XMLReaderFactory.createXMLReader( "org.apache.xerces.parsers.SAXParser" ); //自定义处理 ContentHandler handler = new SheetHandler(sst); parser.setContentHandler(handler); return parser; } /** * See org.xml.sax.helpers.DefaultHandler javadocs * 将excel作为xml来处理 ,类似如下格式 * <row> * <cell/> * <cell/> * <cell/> * <cell/> * </row> * */ private static class SheetHandler extends DefaultHandler { private SharedStringsTable sst; private String lastContents; private boolean nextIsString; private AtomicInteger counter = new AtomicInteger( 0 ) ; private SheetHandler(SharedStringsTable sst) { this.sst = sst; } public void exist(){ if( counter .get() == 10 ){ System.exit( 0 ); } } /** * 处理<cell> */ public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { // c => cell if(name.equals("c")) { String rowNum = attributes.getValue("r") ; // Print the cell reference if( rowNum.startsWith("A") ){ exist() ; counter.incrementAndGet() ; System.out.print( rowNum + " - "); //坐标 } // Figure out if the value is an index in the SST // cellType == null ,if there is empty String cellType = attributes.getValue("t"); if(cellType != null && cellType.equals("s")) { //下面是否是字符串 nextIsString = true; } else { nextIsString = false; } } // Clear contents cache lastContents = ""; } /** * 处理结尾的 </cell> */ public void endElement(String uri, String localName, String name) throws SAXException { // Process the last contents as required. // Do now, as characters() may be called more than once if(nextIsString) { int idx = Integer.parseInt(lastContents); //获取单元格数据 lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString(); nextIsString = false; } // v => contents of a cell // Output after we've seen the string contents if(name.equals("v")) { //处理每个单元格数据 if( lastContents == null || "".equals( lastContents ) ){ System.out.print(" # null"); } System.out.print(" # "+lastContents); } if( "row".equals( name ) ){ //一行结束 System.out.println(); //change a line } } public void characters(char[] ch, int start, int length) throws SAXException { lastContents += new String(ch, start, length); } } public static void main(String[] args) throws Exception { String file = "F:\\excel\\IPAddressInfo.xlsx" ; ExcelFileHandler example = new ExcelFileHandler(); //example.processOneSheet( file , "rId1"); example.processAllSheets( file ); } }
网上的例子: 原文 http://www.cnblogs.com/lonelyxmas/archive/2013/01/29/2881958.html
上面的代码是没有处理空值的情况的,因为xml读取的时候会跳过该cell,如果要处理空值。
参考:http://www.bbsmax.com/A/MyJxWYr2zn/