poi 导入大数据

2019-03-19 14:57:48 wanglizheng825034277 阅读数 4859

xls和xlsx

  1. xls是旧版Excel格式文件,xlsx是新版Excel格式文件;而xlsx新版格式其实是一系列文件压缩包,
    如图:
    在这里插入图片描述
  2. xls是以二进制的方式存储,这种格式不易被其他软件读取使用;而xlsx采用了基于XML的ooxml开放文档标准,ooxml使用XML和ZIP技术结合进行文件存储,XML是一个基于文本的格式,而且ZIP容器支持内容的压缩,所以其一大优势是可以大大减小文件的尺寸;
  3. 使用POI来读写Excel文件有两种方式,一种用户模式(UserModel),读取时消耗大量内存,造成OOM问题;一种事件模式(SAX模式),仅仅关注文件内部数据,内存消耗很低;
  4. 导出文件同样如此,使用Workbook普通导出,数据量小的时候可以正常使用,但时间等待仍然很长,这时推荐使用POI提供的SXXFWorkbook处理,其使用时间窗口原理(具体可以查询)限制访问,刷出内存,降低内存消耗,提升效率。
    另外还需要注意,根据你使用的功能,仍然可能消耗大量内存,例如合并区域,超链接,注释……,这些内容只存储在内存中。
    在这里插入图片描述

导入数据(大量)

大量数据导入在网络上搜寻到的相关代码大部分通过集成POI原生的DefaultHandler重写其startElement, endElement, characters方法进行相关的解析,而POI已经将相关逻辑封装在XSSFSheetXMLHandler,只要实现暴露的接口SheetContentsHandler即可。
使用SheetContentsHandler的例子可以参考官方的XLSX2CVS
本例实现该接口:

package cn.skio.venus.api;

import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.util.SAXHelper;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;

import javax.xml.parsers.ParserConfigurationException;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;

/**
 * @autor jasmine
 */
public class ExcelEventParser {
    private String fileName;
    private SimpleSheetContentsHandler handler;
    // 测试使用对比使用SAX和UserModel模式选择(实际使用不需要)
    private Integer saxInterupt;
	private void setHandler(SimpleSheetContentsHandler handler) {
		this.handler = handler;
	}

	// 放置读取数据
    protected List<List<String>> table = new ArrayList<>();

    public ExcelEventParser(String filename, Integer saxInterupt){
        this.fileName = filename;
        this.saxInterupt = saxInterupt;
    }

    public List<List<String>> parse() {
        OPCPackage opcPackage = null;
        InputStream inputStream = null;

        try {
            FileInputStream fileStream = new FileInputStream(fileName);
            opcPackage = OPCPackage.open(fileStream);
            XSSFReader xssfReader = new XSSFReader(opcPackage);

            StylesTable styles = xssfReader.getStylesTable();
            ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(opcPackage);
            inputStream = xssfReader.getSheetsData().next();

            processSheet(styles, strings, inputStream);
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            if (inputStream != null) {
                try {
                    inputStream.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
            if (opcPackage != null) {
                try {
                    opcPackage.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
        return table;
    }

	// 确定XMLReader解析器,使用SAX模式解析xml文件
    private void processSheet(StylesTable styles, ReadOnlySharedStringsTable strings, InputStream sheetInputStream) throws SAXException, ParserConfigurationException, IOException {
        XMLReader sheetParser = SAXHelper.newXMLReader();

        if (handler == null) {
            setHandler(new SimpleSheetContentsHandler());
        }
        sheetParser.setContentHandler(new XSSFSheetXMLHandler(styles, strings, handler, false));

        try {
            sheetParser.parse(new InputSource(sheetInputStream));
        } catch (RuntimeException e) {
            System.out.println("---> 遇到空行读取文件结束!");
        }
    }

	// 实现SheetContentsHandler
    public class SimpleSheetContentsHandler implements SheetContentsHandler{
        protected List<String> row;
        @Override
        public void startRow(int rowNum) {
            row = new LinkedList<>();
        }

        @Override
        public void endRow(int rowNum) {
        	// 判断是否使用异常作为文件读取结束(有些Excel文件格式特殊,导致很多空行,浪费内存)
            if (saxInterupt == 1) {
                if (row.isEmpty()) {
                    throw new RuntimeException("Excel文件读取完毕");
                }
            }
			// 添加数据到list集合
            table.add(row);
        }

        /**
         * 所有单元格数据转换为string类型,需要自己做数据类型处理
         * @param cellReference 单元格索引
         * @param formattedValue 单元格内容(全部被POI格式化为字符串)
         * @param comment
         */
        @Override
        public void cell(String cellReference, String formattedValue, XSSFComment comment) {
            row.add(formattedValue);
        }

        @Override
        public void headerFooter(String text, boolean isHeader, String tagName) {
        }
    }
}

经测试结果,发现使用SAX模式(抛弃了样式等,只关注数据)仅仅消耗很少内存,效率高;而普通Workbook读取数据(测试文件为5.2MB的有大量空行文件)内存消耗 > 1GB(此时线上系统OOM概率非常大);
在这里插入图片描述

导出数据(大量)

导出数据的话瓶颈主要在于数据写入Excel文件,代码(同样的74273条数据导出)如下:

	// 使用SXSSFwrokbook,大量数据处理快速
	@GetMapping("/outExcel")
    public void outPutExcel(HttpServletResponse response) throws Exception {
        // 每次写100行数据,就刷新数据出缓存
        SXSSFWorkbook wb = new SXSSFWorkbook(100); // keep 100 rows in memory, exceeding rows will be flushed to disk
        Sheet sh = wb.createSheet();
        List<Tmp> tmps = tmpDao.findAll();
        log.info("---> 数据量:{}", tmps.size());

        for(int rowNum = 0; rowNum < tmps.size(); rowNum++){
            Row row = sh.createRow(rowNum);
            Tmp tmp = tmps.get(rowNum);
            Cell cell1 = row.createCell(0);
            cell1.setCellValue(tmp.getSource());

            Cell cell2 = row.createCell(1);
            cell2.setCellValue(tmp.getName());
            Cell cell3 = row.createCell(2);
            cell3.setCellValue(tmp.getPhone());
            Cell cell4 = row.createCell(3);
            cell4.setCellValue(tmp.getCity());
        }

        String fileName = "sxssf.xlsx";
        response.setHeader("Content-Disposition", "attachment;filename=" + URLEncoder.encode(fileName, "UTF-8"));
        wb.write(response.getOutputStream());
        wb.close();
    }
	// XSSFWorkbook, 效率低下
	@GetMapping("/outExcel2")
    public void outPutExcel2(HttpServletResponse response) throws Exception {
        XSSFWorkbook wb = new XSSFWorkbook();
        Sheet sh = wb.createSheet();
        List<Tmp> tmps = tmpDao.findAll();
        log.info("---> 数据量:{}", tmps.size());

        for(int rowNum = 0; rowNum < tmps.size(); rowNum++){
            Row row = sh.createRow(rowNum);
            Tmp tmp = tmps.get(rowNum);
            Cell cell1 = row.createCell(0);
            cell1.setCellValue(tmp.getSource());

            Cell cell2 = row.createCell(1);
            cell2.setCellValue(tmp.getName());
            Cell cell3 = row.createCell(2);
            cell3.setCellValue(tmp.getPhone());
            Cell cell4 = row.createCell(3);
            cell4.setCellValue(tmp.getCity());
        }

        String fileName = "sxssf.xlsx";
        response.setHeader("Content-Disposition", "attachment;filename=" + URLEncoder.encode(fileName, "GBK"));
        wb.write(response.getOutputStream());
        wb.close();
    }

效率对比:

对象 耗时
SXSSFWorkbook 在这里插入图片描述
XSSFWorkbook 在这里插入图片描述

CPU和内存消耗对比:
在这里插入图片描述

总结

  1. 大文件读取使用SAX
  2. 大文件写入使用SXSSFWorkbook

参考链接:
[1]: https://blog.csdn.net/Holmofy/article/details/82532311
[2]: https://blog.csdn.net/daiyutage/article/details/53010491
[3]: https://www.cnblogs.com/yfrs/p/5689347.html
[4]: easyexcel

2017-04-28 21:05:49 llppyy777 阅读数 577
package com.jeeframe.cms.updata.service.impl;


import java.io.IOException;
import java.io.InputStream;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.sql.Types;
import java.util.List;


import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;


import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.StylesTable;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;


public class ExcelReader {


    /**
     * 解析并显示一个表的内容和使用指定的样式
     * 
     * @param styles
     * @param strings
     * @param sheetInputStream
     */
    public static List<String[]> processSheet(StylesTable styles, ReadOnlySharedStringsTable strings,
            InputStream sheetInputStream, int minColumns)
            throws IOException, ParserConfigurationException, SAXException {


                InputSource sheetSource = new InputSource(sheetInputStream);
        SAXParserFactory saxFactory = SAXParserFactory.newInstance();
        SAXParser saxParser = saxFactory.newSAXParser();
        XMLReader sheetParser = saxParser.getXMLReader();
        ExcelReaderHandler handler = new ExcelReaderHandler(styles, strings, minColumns, System.out);


        sheetParser.setContentHandler(handler);
        sheetParser.parse(sheetSource);
        return handler.getRows();
    }


    /**
     * 解析第一个sheet
     * 
     * @param path
     * @param minColumns
     * @return List<String[]>
     * @throws IOException
     * @throws OpenXML4JException
     * @throws ParserConfigurationException
     * @throws SAXException
     */
    public static List<String[]> processOneSheet(String path, int minColumns)
            throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
        OPCPackage p = OPCPackage.open(path, PackageAccess.READ);
        ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(p);
        XSSFReader xssfReader = new XSSFReader(p);
        StylesTable styles = xssfReader.getStylesTable();
        InputStream stream = xssfReader.getSheet("rId1");
        List<String[]> list = processSheet(styles, strings, stream, minColumns);
        stream.close();
        return list;
    }


    public static void main(String[] args) throws Exception {
        /*
         * long begin = System.currentTimeMillis() ; List<String[]> list =
         * ExcelReader.processOneSheet("d:\\201401-ds.xlsx" , 18);
         * //List<String[]> list = ExcelReader.processOneSheet("d:\\out.xlsx" ,
         * 10); for (String cell : list.get(1)) { System.out.print(cell + "  ");
         * System.out.println(cell == null); } long end =
         * System.currentTimeMillis() ; System.out.println("用时:" + (end - begin)
         * /1000 + "秒");
         */
        long begin = System.currentTimeMillis();
        System.out.println(begin);
        List<String[]> list = ExcelReader.processOneSheet("d:\\test.xlsx", 18);
        long end = System.currentTimeMillis();
        System.out.println("读取用时:" + (end - begin) / 1000 + "秒,总量:" + list.size());
        Connection conn = getNew_Conn();
        conn.setAutoCommit(false);
        PreparedStatement pstmt = conn.prepareStatement("insert into temp_table values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)");
        for (int i = 0; i < list.size(); i++) {
            String[] row = list.get(i);
            for (int index = 1; index <= 18; index++) {
                if (row[index - 1] == null) {
                    pstmt.setNull(index, Types.NULL);
                } else {
                    pstmt.setObject(index, row[index - 1]);
                }
            }


            pstmt.addBatch();
            if (i > 0 && i % 10000 == 0) {
                pstmt.executeBatch();
                System.out.println("提交:" + i);
            }
        }
        pstmt.executeBatch();
        conn.commit();
        pstmt.close();
        conn.close();
        end = System.currentTimeMillis();
        System.out.println("插入用时:" + (end - begin) / 1000 + "秒");
    }


    private static Connection getNew_Conn() {
        Connection conn = null;
        try {
            Class.forName("com.mysql.jdbc.Driver");
            conn = DriverManager.getConnection(
                    "jdbc:mysql://localhost:3306/jeeframe_cms?useUnicode=true&amp;characterEncoding=UTF-8", "root",
                    "1392010");
        } catch (ClassNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (SQLException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }


        return conn;
    }


}
2016-03-03 15:59:00 weixin_34210740 阅读数 54

This one comes up quite a lot, but often the reason isn't what you might initially think. So, the first thing to check is - what's the source of the problem? Your file? Your code? Your environment? Or Apache POI?

(If you're here, you probably think it's Apache POI. However, it often isn't! A moderate laptop, with a decent but not excessive heap size, from a standing start, can normally read or write a file with 100 columns and 100,000 rows in under a couple of seconds, including the time to start the JVM).

Apache POI ships with a few programs and a few example programs, which can be used to do some basic performance checks. For testing file generation, the class to use is in the examples package, SSPerformanceTest. Run SSPerformanceTest with arguments of the writing type (HSSF, XSSF or SXSSF), the number rows, the number of columns, and if the file should be saved. If you can't run that with 50,000 rows and 50 columns in HSSF and SXSSF in under 3 seconds, and XSSF in under 10 seconds (and ideally all 3 in less than that!), then the problem is with your environment.

Next, use the example program ToCSV to try reading the a file in with HSSF or XSSF. Related is XLSX2CSV, which uses SAX parsing for .xlsx. Run this against both your problem file, and a simple one generated by SSPerformanceTest of the same size. If this is slow, then there could be an Apache POI problem with how the file is being processed (POI makes some assumptions that might not always be right on all files). If these tests are fast, then any performance problems are in your code!

2018-06-07 16:44:16 jiabin30 阅读数 289
使用POI能够导出大数据保证内存不溢出的一个重要原因是SXSSFWorkbook生成的EXCEL为2007版本,修改EXCEL2007文件后缀为ZIP打开可以看到,每一个Sheet都是一个xml文件,单元格格式和单元格坐标均用标签表示。直接使用SXSSFWorkbook来到导出EXCEL本身就是POI为了大数据量导出而量身定制的,所以导出可以直接使用SXSSFWorkbook方式。

  为了保险起见可以采用多Sheet的方式保证内存不溢出。需要注意的是Sheet名称不能重复;下载的时候需要定义好返回头。

response.setContentType("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");

  导出EXCEL较为简单,创建Workbook对象和Sheet对象往里塞值就行了。但是导入读取EXCEL的时候SXSSFWorkbook没有读取文件流的方法,只能使用XSSFWorkbook来读取,几千条数据可能就内存溢出了。

  这时候就要使用OPCPackage


public static OPCPackage open(java.io.InputStream in)
                       throws InvalidFormatException,
                              java.io.IOException

Open a package. Note - uses quite a bit more memory than open(String), which doesn't need to hold the whole zip 
file in memory, and can take advantage of native methods

Parameters:
    in - The InputStream to read the package from
Returns:
    A PackageBase object
Throws:
    InvalidFormatException
    java.io.IOException

  POI给出的API表示使用OPCPackage不需要将文件完全读取到内存中。

  调用方法

File file = uploadFile.getFile();
InputStream is = new FileInputStream(file);
excelReader.readInputStream(is);
excelReader.process();

  ExcelReader.java


/**
 * 抽象Excel2007读取器,excel2007的底层数据结构是xml文件,采用SAX的事件驱动的方法解析
 * xml,需要继承DefaultHandler,在遇到文件内容时,事件会触发,这种做法可以大大降低
 * 内存的耗费,特别使用于大数据量的文件。
 *
 */
public class Excel2007Reader extends DefaultHandler {
    //共享字符串表
    private SharedStringsTable sst;
    //上一次的内容
    private String lastContents;
    private boolean nextIsString;

    private int sheetIndex = -1;
    private List<String> rowlist = new ArrayList<String>();
    //当前行
    private int curRow = 0;
    //当前列
    private int curCol = 0;
    //日期标志
    private boolean dateFlag;
    //数字标志
    private boolean numberFlag;
    
    private boolean isTElement;
    
    private IRowReader rowReader;
    
    public void setRowReader(IRowReader rowReader){
        this.rowReader = rowReader;
    }
    
    /**只遍历一个电子表格,其中sheetId为要遍历的sheet索引,从1开始,1-3
     * @param filename
     * @param sheetId
     * @throws Exception
     */
    public void processOneSheet(String filename,int sheetId) throws Exception {
        OPCPackage pkg = OPCPackage.open(filename);
        XSSFReader r = new XSSFReader(pkg);
        SharedStringsTable sst = r.getSharedStringsTable();
        XMLReader parser = fetchSheetParser(sst);
        
        // 根据 rId# 或 rSheet# 查找sheet
        InputStream sheet2 = r.getSheet("rId"+sheetId);
        sheetIndex++;
        InputSource sheetSource = new InputSource(sheet2);
        parser.parse(sheetSource);
        sheet2.close();
    }

    /**
     * 遍历工作簿中所有的电子表格
     * @param filename
     * @throws Exception
     */
    public void process(String filename) throws Exception {
        OPCPackage pkg = OPCPackage.open(filename);
        XSSFReader r = new XSSFReader(pkg);
        SharedStringsTable sst = r.getSharedStringsTable();
        XMLReader parser = fetchSheetParser(sst);
        Iterator<InputStream> sheets = r.getSheetsData();
        while (sheets.hasNext()) {
            curRow = 0;
            sheetIndex++;
            InputStream sheet = sheets.next();
            InputSource sheetSource = new InputSource(sheet);
            parser.parse(sheetSource);
            sheet.close();
        }
    }

    public XMLReader fetchSheetParser(SharedStringsTable sst)
            throws SAXException {
        XMLReader parser = XMLReaderFactory
                .createXMLReader("org.apache.xerces.parsers.SAXParser");
        this.sst = sst;
        parser.setContentHandler(this);
        return parser;
    }

    public void startElement(String uri, String localName, String name,
            Attributes attributes) throws SAXException {
        
        // c => 单元格
        if ("c".equals(name)) {
            // 如果下一个元素是 SST 的索引,则将nextIsString标记为true
            String cellType = attributes.getValue("t");
            if ("s".equals(cellType)) {
                nextIsString = true;
            } else {
                nextIsString = false;
            }
            //日期格式
            String cellDateType = attributes.getValue("s");
            if ("1".equals(cellDateType)){
                dateFlag = true;
            } else {
                dateFlag = false;
            }
            String cellNumberType = attributes.getValue("s");
            if("2".equals(cellNumberType)){
                numberFlag = true;
            } else {
                numberFlag = false;
            }
            
        }
        //当元素为t时
        if("t".equals(name)){
            isTElement = true;
        } else {
            isTElement = false;
        }
        
        // 置空
        lastContents = "";
    }

    public void endElement(String uri, String localName, String name)
            throws SAXException {
        
        // 根据SST的索引值的到单元格的真正要存储的字符串
        // 这时characters()方法可能会被调用多次
        if (nextIsString) {
            try {
                int idx = Integer.parseInt(lastContents);
                lastContents = new XSSFRichTextString(sst.getEntryAt(idx))
                        .toString();
            } catch (Exception e) {

            }
        } 
        //t元素也包含字符串
        if(isTElement){
            String value = lastContents.trim();
            rowlist.add(curCol, value);
            curCol++;
            isTElement = false;
            // v => 单元格的值,如果单元格是字符串则v标签的值为该字符串在SST中的索引
            // 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符
        } else if ("v".equals(name)) {
            String value = lastContents.trim();
            value = value.equals("")?" ":value;
            //日期格式处理
            if(dateFlag){
                 Date date = HSSFDateUtil.getJavaDate(Double.valueOf(value));
                 SimpleDateFormat dateFormat = new SimpleDateFormat(
                 "dd/MM/yyyy");
                 value = dateFormat.format(date);
            } 
            //数字类型处理
            if(numberFlag){
                BigDecimal bd = new BigDecimal(value);
                value = bd.setScale(3,BigDecimal.ROUND_UP).toString();
            }
            rowlist.add(curCol, value);
            curCol++;
        }else {
            //如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法
            if (name.equals("row")) {
                rowReader.getRows(sheetIndex,curRow,rowlist);
                rowlist.clear();
                curRow++;
                curCol = 0;
            }
        }
        
    }

    public void characters(char[] ch, int start, int length)
            throws SAXException {
        //得到单元格内容的值
        lastContents += new String(ch, start, length);
    }
}
2018-05-22 00:33:37 sinat_30314715 阅读数 15040

java代码使用poi的API解决在读取大数据量的Excel数据时候内存溢出的问题:首先我需要声明下面的工具类是在老袁博客(https://laoyuan.me/posts/java-read-big-excel-with-poi.html)基础上做了稍微的改造,我将老袁的的工具类需要2个参数改成只需要一个参数就可以完成调用,当然你可以根据你自己的情况使用。
下面是一个工具类,复制到自己的项目中直接调用即可:

1、工具类

package com.xxx.xxx.xxx;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;

import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
/**
 * 解析大数据量Excel工具类
 * @author RobinTime
 *
 */
@Component
public class ExcelParser {
    private static final Logger logger = LoggerFactory.getLogger(ExcelParser.class);
    /**
     * 表格默认处理器
     */
    private ISheetContentHandler contentHandler = new DefaultSheetHandler();
    /**
     * 读取数据
     */
    private List<String[]> datas = new ArrayList<String[]>();

    /**
     * 转换表格,默认为转换第一个表格
     * @param stream
     * @return
     * @throws InvalidFormatException
     * @throws IOException
     * @throws ParseException
     */
    public ExcelParser parse(InputStream stream)
            throws InvalidFormatException, IOException, ParseException {
        return parse(stream, 1);
    }


    /**
     * 
     * @param stream
     * @param sheetId:为要遍历的sheet索引,从1开始
     * @return
     * @throws InvalidFormatException
     * @throws IOException
     * @throws ParseException
     */
    public synchronized ExcelParser parse(InputStream stream, int sheetId)
            throws InvalidFormatException, IOException, ParseException {
        // 每次转换前都清空数据
        datas.clear();
        // 打开表格文件输入流
        OPCPackage pkg = OPCPackage.open(stream);
        try {
            // 创建表阅读器
            XSSFReader reader;
            try {
                reader = new XSSFReader(pkg);
            } catch (OpenXML4JException e) {
                logger.error("读取表格出错");
                throw new ParseException(e.fillInStackTrace());
            }

            // 转换指定单元表
            InputStream shellStream = reader.getSheet("rId" + sheetId);
            try {
                InputSource sheetSource = new InputSource(shellStream);
                StylesTable styles = reader.getStylesTable();
                ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(pkg);
                getContentHandler().init(datas);// 设置读取出的数据
                // 获取转换器
                XMLReader parser = getSheetParser(styles, strings);
                parser.parse(sheetSource);
            } catch (SAXException e) {
                logger.error("读取表格出错");
                throw new ParseException(e.fillInStackTrace());
            } finally {
                shellStream.close();
            }
        } finally {
            pkg.close();

        }
        return this;

    }

    /**
     * 获取表格读取数据,获取数据前,需要先转换数据<br>
     * 此方法不会获取第一行数据
     * 
     * @return 表格读取数据
     */
    public List<String[]> getDatas() {
        return getDatas(true);

    }

    /**
     * 获取表格读取数据,获取数据前,需要先转换数据
     * 
     * @param dropFirstRow
     *            删除第一行表头记录
     * @return 表格读取数据
     */
    public List<String[]> getDatas(boolean dropFirstRow) {
        if (dropFirstRow && datas.size() > 0) {
            datas.remove(0);// 删除表头
        }
        return datas;

    }

    /**
     * 获取读取表格的转换器
     * 
     * @return 读取表格的转换器
     * @throws SAXException
     *             SAX错误
     */
    protected XMLReader getSheetParser(StylesTable styles, ReadOnlySharedStringsTable strings) throws SAXException {
        XMLReader parser = XMLReaderFactory.createXMLReader();
        parser.setContentHandler(new XSSFSheetXMLHandler(styles, strings, getContentHandler(), false));
        return parser;
    }

    public ISheetContentHandler getContentHandler() {
        return contentHandler;
    }

    public void setContentHandler(ISheetContentHandler contentHandler) {
        this.contentHandler = contentHandler;
    }

    /**
     * 表格转换错误
     */
    public class ParseException extends Exception {
        private static final long serialVersionUID = -2451526411018517607L;

        public ParseException(Throwable t) {
            super("表格转换错误", t);
        }

    }

    public interface ISheetContentHandler extends SheetContentsHandler {

        /**
         * 设置转换后的数据集,用于存放转换结果
         * 
         * @param datas
         *            转换结果
         */
        void init(List<String[]> datas);
    }

    /**
     * 默认表格解析handder
     */
    class DefaultSheetHandler implements ISheetContentHandler {
        /**
         * 读取数据
         */
        private List<String[]> datas;
        private int columsLength;
        // 读取行信息
        private String[] readRow;
        private ArrayList<String> fristRow = new ArrayList<String>();

        @Override
        public void init(List<String[]> datas) {
            this.datas = datas;
//          this.columsLength = columsLength;
        }

        @Override
        public void startRow(int rowNum) {
            if (rowNum != 0) {
                readRow = new String[columsLength];
            }
        }

        @Override
        public void endRow(int rowNum) {
        //将Excel第一行表头的列数当做数组的长度,要保证后续的行的列数不能超过这个长度,这是个约定。
            if (rowNum == 0) {
                columsLength = fristRow.size();
                readRow = fristRow.toArray(new String[fristRow.size()]);
            }else {
                readRow = fristRow.toArray(new String[columsLength]);
            }
            datas.add(readRow.clone());
            readRow = null;
            fristRow.clear();
        }

        @Override
        public void cell(String cellReference, String formattedValue, XSSFComment comment) {
            int index = getCellIndex(cellReference);//转换A1,B1,C1等表格位置为真实索引位置
            try {
                fristRow.set(index, formattedValue);
            } catch (IndexOutOfBoundsException e) {
                int size = fristRow.size();
                for (int i = index - size+1;i>0;i--){
                    fristRow.add(null);
                }
                fristRow.set(index,formattedValue);
            }
        }

        @Override
        public void headerFooter(String text, boolean isHeader, String tagName) {
        }

        /**
         * 转换表格引用为列编号
         * 
         * @param cellReference
         *            列引用
         * @return 表格列位置,从0开始算
         */
        public int getCellIndex(String cellReference) {
            String ref = cellReference.replaceAll("\\d+", "");
            int num = 0;
            int result = 0;
            for (int i = 0; i < ref.length(); i++) {
                char ch = cellReference.charAt(ref.length() - i - 1);
                num = (int) (ch - 'A' + 1);
                num *= Math.pow(26, i);
                result += num;
            }
            return result - 1;
        }
    }
}

2、调用

File tempFile = new File(this.getClass().getClassLoader().getResource("").getPath() + "tempFile\\" + (new Date()).getTime() + ".xlsx");
//传入一个路径产生流再将流传入工具类,返回解析对象,Excel的所有数据就被解析到List<String[]> 里面,遍历list任由你处置。
FileInputStream inputStream = new FileInputStream(tempFile);
ExcelParser parse = excelParser.parse(inputStream);
List<String[]> datas = parse.getDatas();