超大excel文件读,避免内存溢出
excel40M+,但是用传统的读取excel方法,会报内存溢出的错误。
所以采用了下面的方式,能解决此问题:
maven依赖
<dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>4.0.0</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>4.0.0</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>4.0.0</version> </dependency> <!-- 读取大量excel数据时使用 --> <dependency> <groupId>com.monitorjbl</groupId> <artifactId>xlsx-streamer</artifactId> <version>2.1.0</version> </dependency>
代码:
import com.monitorjbl.xlsx.StreamingReader;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
public class Main {
public static void main(String[] args){
try{
FileInputStream fis = new FileInputStream("D:\\d\\2023财年\\新疆\\a.xlsx");
Workbook wk = StreamingReader.builder()
.rowCacheSize(100) //缓存到内存中的行数,默认是10
.bufferSize(4096) //读取资源时,缓存到内存的字节大小,默认是1024
.open(fis); //打开资源,必须,可以是InputStream或者是File,注意:只能打开XLSX格式的文件
int sheetNums = wk.getNumberOfSheets();
System.out.println(sheetNums);
List<String> sheets = new ArrayList<>();
for(int i = 0 ; i < sheetNums;i ++){
Sheet sheet = wk.getSheetAt(i);
String sheetName = wk.getSheetName(i);
//遍历所有的行
for (Row row : sheet) {
StringBuilder sb = new StringBuilder();
//遍历所有的列
for (Cell cell : row) {
sb.append(cell.getStringCellValue().replaceAll("\\s+", "").toUpperCase(Locale.ROOT) + ",");
}
if(sb.toString().contains("模型名称")){
sheets.add(sb.toString().replaceAll("模型名称:", "").
replaceAll("模型名称:", "").replaceAll("表名:", ","));
}
}
}
for(String data: sheets){
System.out.println(data);
}
}catch (Exception e){
e.printStackTrace();
}
}
}