EasyExcel实现100w数据导入导出
他山之石,可以攻玉
参考文章链接:百万数据的导入导出解决方案
个人功能实现github代码地址:batchExcelData-import-export
前置准备
Java17 SpringBoot3
100w数据生成SQL
CREATE TABLE `student_info` (`id` INT ( 11 ) NOT NULL AUTO_INCREMENT,`student_id` INT NOT NULL,`name` VARCHAR ( 20 ) DEFAULT NULL,`course_id` INT NOT NULL,`class_id` INT ( 11 ) DEFAULT NULL,`create_time` DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,PRIMARY KEY ( `id` )
) ENGINE = INNODB AUTO_INCREMENT = 1 DEFAULT CHARSET = utf8;
CREATE TABLE `course` (`id` INT ( 11 ) NOT NULL AUTO_INCREMENT,`course_id` INT NOT NULL,`course_name` VARCHAR ( 40 ) DEFAULT NULL,PRIMARY KEY ( `id` )
) ENGINE = INNODB AUTO_INCREMENT = 1 DEFAULT CHARSET = utf8;DELIMITER //
CREATE FUNCTION rand_string ( n INT ) RETURNS VARCHAR (255) #该函数会返回一个字符串
BEGINDECLAREchars_str VARCHAR ( 100 ) DEFAULT 'abcdefghijklmnopqrstuvwxyzABCDEFJHIJKLMNOPQRSTUVWXYZ';DECLAREreturn_str VARCHAR ( 255 ) DEFAULT '';DECLAREi INT DEFAULT 0;WHILE i < n DOSET return_str = CONCAT(return_str,SUBSTRING( chars_str, FLOOR(1+RAND()* 52), 1 ));SET i = i + 1;END WHILE;RETURN return_str;
END //
DELIMITER;#函数2:创建随机数函数
DELIMITER //
CREATE FUNCTION rand_num (from_num INT ,to_num INT) RETURNS INT(11)
BEGIN
DECLARE i INT DEFAULT 0;
SET i = FLOOR(from_num +RAND()*(to_num - from_num+1)) ;
RETURN i;
END //
DELIMITER ;show variables like 'log_bin_trust_function_creators';set global log_bin_trust_function_creators=1;# 存储过程1:创建插入课程表存储过程
DELIMITER //
CREATE PROCEDURE insert_course( max_num INT )
BEGIN
DECLARE i INT DEFAULT 0;
SET autocommit = 0; #设置手动提交事务
REPEAT #循环
SET i = i + 1; #赋值
INSERT INTO course (course_id, course_name ) VALUES
(rand_num(10000,10100),rand_string(6));
UNTIL i = max_num END REPEAT;
COMMIT; #提交事务
END //
DELIMITER ;# 存储过程2:创建插入学生信息表存储过程
DELIMITER //
CREATE PROCEDURE insert_stu( max_num INT )
BEGIN
DECLARE i INT DEFAULT 0;
SET autocommit = 0; #设置手动提交事务
REPEAT #循环
SET i = i + 1; #赋值
INSERT INTO student_info (course_id, class_id ,student_id ,NAME ) VALUES
(rand_num(10000,10100),rand_num(10000,10200),rand_num(1,200000),rand_string(6));
UNTIL i = max_num
END REPEAT;
COMMIT; #提交事务
END //
DELIMITER ;CALL insert_course(100); -- 0.038
CALL insert_stu(1000000); -- 341.03s
pom
<dependencies><dependency><groupId>org.springframework.boot</groupId><artifactId>spring-boot-starter-web</artifactId></dependency><dependency><groupId>com.baomidou</groupId><artifactId>mybatis-plus-spring-boot3-starter</artifactId><version>3.5.7</version></dependency><dependency><groupId>com.alibaba.fastjson2</groupId><artifactId>fastjson2</artifactId><version>2.0.47</version></dependency><dependency><groupId>com.alibaba</groupId><artifactId>easyexcel</artifactId><version>3.1.0</version></dependency>
<!-- <dependency>-->
<!-- <groupId>org.mybatis.spring.boot</groupId>-->
<!-- <artifactId>mybatis-spring-boot-starter</artifactId>-->
<!-- <version>3.0.0</version>-->
<!-- </dependency>--><dependency><groupId>mysql</groupId><artifactId>mysql-connector-java</artifactId></dependency><dependency><groupId>org.projectlombok</groupId><artifactId>lombok</artifactId></dependency></dependencies>
yml
spring:datasource:url: jdbc:mysql://localhost:3306/mysql_study_db?useSSL=false&serverTimezone=UTC&allowPublicKeyRetrieval=trueusername: rootpassword: root123driver-class-name: com.mysql.cj.jdbc.Drivertype: com.alibaba.druid.pool.DruidDataSourcedruid:# 初始连接数initialSize: 5# 最小连接池数量minIdle: 10# 最大连接池数量maxActive: 20# 配置获取连接等待超时的时间maxWait: 60000# 配置连接超时时间connectTimeout: 30000# 配置网络超时时间socketTimeout: 60000# 配置间隔多久才进行一次检测,检测需要关闭的空闲连接,单位是毫秒timeBetweenEvictionRunsMillis: 60000# 配置一个连接在池中最小生存的时间,单位是毫秒minEvictableIdleTimeMillis: 300000# 配置一个连接在池中最大生存的时间,单位是毫秒maxEvictableIdleTimeMillis: 900000# 配置检测连接是否有效validationQuery: SELECT 1 FROM DUAL# 增加上传文件的大小限制servlet:multipart:max-file-size: 50MBmax-request-size: 50MBmybatis-plus:configuration:
# log-impl: org.apache.ibatis.logging.stdout.StdOutImpl # 配置MyBatis日志mapper-locations: classpath:mapper/*.xml
server:port: 7777
导出:
思路:不能写到一个Sheet就写多个,不能一次性读取100w就分批次读取,不能一行行写Excel就达到一定数量就写
写100w数据到excel中 导出所用时间:41秒
写10个sheet 每个10w 每次写2w
业务代码
@GetMapping("/export")public String exportExcel(HttpServletResponse response) {{OutputStream outputStream = null;try {long startTime = System.currentTimeMillis();System.out.println("导出开始时间:" + startTime);outputStream = response.getOutputStream();String fileName = new String(("excel100w.xlsx").getBytes(), "UTF-8");// 创建ExcelWriter
// ExcelWriter excelWriter = EasyExcel.write(fileName, StudentInfo.class).build();//本地文件ExcelWriter excelWriter = EasyExcel.write(outputStream, StudentInfo.class).build();//浏览器//模拟统计查询的数据数量这里模拟100w//记录总数:实际中需要根据查询条件进行统计即可Integer totalCount = Math.toIntExact(service.findCount());//每一个Sheet存放10w条数据
// Integer sheetDataRows = ExcelConstants.PER_SHEET_ROW_COUNT;Integer sheetDataRows = 100000;//每次写入的数据量2w
// Integer writeDataRows = ExcelConstants.PER_WRITE_ROW_COUNT;Integer writeDataRows = 20000;//计算需要的Sheet数量Integer sheetNum = totalCount % sheetDataRows == 0 ? (totalCount / sheetDataRows) : (totalCount / sheetDataRows + 1);//计算一般情况下每一个Sheet需要写入的次数(一般情况不包含最后一个sheet,因为最后一个sheet不确定会写入多少条数据)Integer oneSheetWriteCount = sheetDataRows / writeDataRows;//计算最后一个sheet需要写入的次数Integer lastSheetWriteCount = totalCount % sheetDataRows == 0 ? oneSheetWriteCount : (totalCount % sheetDataRows % writeDataRows == 0 ? (totalCount / sheetDataRows / writeDataRows) : (totalCount / sheetDataRows / writeDataRows + 1));//开始分批查询分次写入//注意这次的循环就需要进行嵌套循环了,外层循环是Sheet数目,内层循环是写入次数for (int sheetIndex = 0; sheetIndex < sheetNum; sheetIndex++) {long s1 = System.currentTimeMillis();log.info("sheet:{} start", sheetIndex + 1);// 创建新的 sheetWriteSheet writeSheet = EasyExcel.writerSheet("Sheet" + (sheetIndex + 1)).build();//循环写入次数: j的自增条件是当不是最后一个Sheet的时候写入次数为正常的每个Sheet写入的次数,如果是最后一个就需要使用计算的次数lastSheetWriteCountfor (int j = 0; j < (sheetIndex != sheetNum - 1 ? oneSheetWriteCount : lastSheetWriteCount); j++) {//分页查询一次2wList<StudentInfo> stuList = service.findByPage(j + 1 + oneSheetWriteCount * sheetIndex, writeDataRows);if (!CollectionUtils.isEmpty(stuList)) {//写数据excelWriter.write(stuList, writeSheet);}}long s2 = System.currentTimeMillis();log.info("sheet:{} end {} ms", sheetIndex + 1, s2 - s1);}// 下载EXCELresponse.setContentType("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");response.setCharacterEncoding("utf-8");response.setHeader("Content-Disposition", "attachment;filename=" + fileName);excelWriter.finish();outputStream.flush();//导出时间结束long endTime = System.currentTimeMillis();System.out.println("导出结束时间:" + endTime + "ms");System.out.println("导出所用时间:" + (endTime - startTime) / 1000 + "秒");} catch (FileNotFoundException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();} finally {if (outputStream != null) {try {outputStream.close();} catch (Exception e) {e.printStackTrace();}}}}return "success";}
结果
导出开始时间:1723191448429
2024-08-09T16:17:28.744+08:00 INFO 18552 --- [nio-7777-exec-1] com.alibaba.druid.pool.DruidDataSource : {dataSource-1} inited
2024-08-09T16:17:29.114+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:1 start
2024-08-09T16:17:32.155+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:1 end 3041 ms
2024-08-09T16:17:32.155+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:2 start
2024-08-09T16:17:34.787+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:2 end 2632 ms
2024-08-09T16:17:34.787+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:3 start
2024-08-09T16:17:37.695+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:3 end 2908 ms
2024-08-09T16:17:37.695+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:4 start
2024-08-09T16:17:40.722+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:4 end 3027 ms
2024-08-09T16:17:40.722+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:5 start
2024-08-09T16:17:44.782+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:5 end 4060 ms
2024-08-09T16:17:44.782+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:6 start
2024-08-09T16:17:50.179+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:6 end 5397 ms
2024-08-09T16:17:50.179+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:7 start
2024-08-09T16:17:56.353+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:7 end 6174 ms
2024-08-09T16:17:56.354+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:8 start
2024-08-09T16:18:02.744+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:8 end 6391 ms
2024-08-09T16:18:02.744+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:9 start
2024-08-09T16:18:09.716+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:9 end 6972 ms
2024-08-09T16:18:09.716+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:10 start
2024-08-09T16:18:16.214+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:10 end 6498 ms
导出结束时间:1723191507222ms
导出所用时间:58秒
ps:使用分页查询的时候要注意引入MyBatisPlus的分页插件
导入:
思路:分批读取+JDBC分批插入+手动事务控制
ps:JDBC连接MySQL时,如果要使用批处理功能,需要在url 中加入参数?rewriteBatchedStatements=true
业务代码
/*** the request was rejected because its size (45665808) exceeds the configured maximum (10485760)* <p>* spring.servlet.multipart.max-file-size=50MB* spring.servlet.multipart.max-request-size=50MB*** ΔJDBC连接MySQL时,如果要使用批处理功能,需要在url 中加入参数?rewriteBatchedStatements=true* 1 没加 ------总耗时:159188ms------* 2 加了 43533ms** 3.65倍** @param file* @return*/@PostMapping("/import")public String importExcel(@RequestParam("file") MultipartFile file) {//记录开始读取Excel时间,也是导入程序开始时间long startReadTime = System.currentTimeMillis();System.out.println("------开始读取Excel的Sheet时间(包括导入数据过程):" + startReadTime + "ms------");//读取所有Sheet的数据.每次读完一个Sheet就会调用这个方法try {EasyExcel.read(file.getInputStream(), StudentInfo.class, new EasyExceGeneralDatalListener(service)).doReadAll();} catch (IOException e) {throw new RuntimeException(e);}long endReadTime = System.currentTimeMillis();System.out.println("------结束读取Excel的Sheet时间(包括导入数据过程):" + endReadTime + "ms------");System.out.println("------总耗时:" + (endReadTime-startReadTime) + "ms------");return "success";}
EasyExcel监听器
package com.coding.entity.listener;import com.alibaba.excel.context.AnalysisContext;
import com.alibaba.excel.event.AnalysisEventListener;
import com.coding.entity.StudentInfo;
import com.coding.service.StudentInfoService;import java.util.ArrayList;
import java.util.List;
import java.util.Map;// 事件监听
public class EasyExceGeneralDatalListener extends AnalysisEventListener<StudentInfo> {/*** 处理业务逻辑的Service,也可以是Mapper*/private StudentInfoService service;/*** 用于存储读取的数据*/private List<StudentInfo> dataList = new ArrayList<StudentInfo>();public EasyExceGeneralDatalListener() {}public EasyExceGeneralDatalListener(StudentInfoService service) {this.service = service;}/*** easyexcel invoke 报错 class java.util.LinkedHashMap cannot be cast to class com.coding.entity.StudentInfo (java.util.LinkedHashMap is in module java.base of loader 'bootstrap'; com.coding.entity.StudentInfo is in unnamed module of loader 'app')EasyExcel.read(fileName, StudentInfo.class, listener).sheet().doRead();*/@Overridepublic void invoke(StudentInfo data, AnalysisContext context) {//数据add进入集合dataList.add(data);//size是否为100000条:这里其实就是分批.当数据等于10w的时候执行一次插入if (dataList.size() >= 100000) {//存入数据库:数据小于1w条使用Mybatis的批量插入即可;saveData();//清理集合便于GC回收dataList.clear();}}/*** 保存数据到DB** @param* @MethodName: saveData* @return: void*/private void saveData() {service.import2DBFromExcel10w(dataList);dataList.clear();}/*** Excel中所有数据解析完毕会调用此方法** @param: context* @MethodName: doAfterAllAnalysed* @return: void*/@Overridepublic void doAfterAllAnalysed(AnalysisContext context) {saveData();dataList.clear();}
}
JDBC批处理 事务
@Overridepublic void import2DBFromExcel10w(List<StudentInfo> dataList) {//结果集中数据为0时,结束方法.进行下一次调用if (dataList.size() == 0) {return ;}//JDBC分批插入+事务操作完成对10w数据的插入Connection conn = null;PreparedStatement ps = null;try {long startTime = System.currentTimeMillis();System.out.println(dataList.size() + "条,开始导入到数据库时间:" + startTime + "ms");conn = JDBCDruidUtils.getConnection();//控制事务:默认不提交conn.setAutoCommit(false);String sql = "insert into student_info (student_id,name,course_id,class_id,create_time) values(?,?,?,?,?)";ps = conn.prepareStatement(sql);//循环结果集:这里循环不支持"烂布袋"表达式for (int i = 0; i < dataList.size(); i++) {StudentInfo item = dataList.get(i);ps.setInt(1, item.getStudentId());ps.setString(2, item.getName());ps.setInt(3, item.getCourseId());ps.setInt(4, item.getClassId());ps.setDate(5, new Date(System.currentTimeMillis()));//将一组参数添加到此 PreparedStatement 对象的批处理命令中。ps.addBatch();}//执行批处理ps.executeBatch();//手动提交事务conn.commit();long endTime = System.currentTimeMillis();System.out.println(dataList.size() + "条,结束导入到数据库时间:" + endTime + "ms");System.out.println(dataList.size() + "条,导入用时:" + (endTime - startTime) + "ms");} catch (Exception e) {e.printStackTrace();} finally {//关连接JDBCDruidUtils.close(conn, ps);}}
JDBCDruidUtils
package com.coding.util;import com.alibaba.druid.pool.DruidDataSourceFactory;import javax.sql.DataSource;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Properties;/*** @description <JDBC工具类>* @author: zhouchaoyu* @Date: 2024-08-08*/
//JDBC工具类
public class JDBCDruidUtils {private static DataSource dataSource;/*创建数据Properties集合对象加载加载配置文件*/static {Properties pro = new Properties();//加载数据库连接池对象try {//获取数据库连接池对象pro.load(JDBCDruidUtils.class.getClassLoader().getResourceAsStream("druid.properties"));
// pro.forEach((key, value) -> System.out.println(key + ": " + value));dataSource = DruidDataSourceFactory.createDataSource(pro);} catch (Exception e) {e.printStackTrace();}}/*获取连接*/public static Connection getConnection() throws SQLException {return dataSource.getConnection();}/*** 关闭conn,和 statement独对象资源** @param connection* @param statement* @MethodName: close* @return: void*/public static void close(Connection connection, Statement statement) {if (connection != null) {try {connection.close();} catch (SQLException e) {e.printStackTrace();}}if (statement != null) {try {statement.close();} catch (SQLException e) {e.printStackTrace();}}}/*** 关闭 conn , statement 和resultset三个对象资源** @param connection* @param statement* @param resultSet* @MethodName: close* @return: void*/public static void close(Connection connection, Statement statement, ResultSet resultSet) {close(connection, statement);if (resultSet != null) {try {resultSet.close();} catch (SQLException e) {e.printStackTrace();}}}/*获取连接池对象*/public static DataSource getDataSource() {return dataSource;}}
druid.properties
# Database connection settings
driverClassName=com.mysql.cj.jdbc.Driver
url=jdbc:mysql://localhost:3306/mysql_study_db?useSSL=false&serverTimezone=UTC&allowPublicKeyRetrieval=true&rewriteBatchedStatements=true
username=root
password=root123# Druid connection pool settings
initialSize=5
minIdle=5
maxActive=20
maxWait=60000
timeBetweenEvictionRunsMillis=60000
minEvictableIdleTimeMillis=300000
validationQuery=SELECT 1
testWhileIdle=true
testOnBorrow=false
testOnReturn=false
poolPreparedStatements=true
maxPoolPreparedStatementPerConnectionSize=20
结果
------开始读取Excel的Sheet时间(包括导入数据过程):1723192568422ms------
100000条,开始导入到数据库时间:1723192572571ms
2024-08-09T16:36:12.611+08:00 INFO 18552 --- [nio-7777-exec-3] com.alibaba.druid.pool.DruidDataSource : {dataSource-2} inited
100000条,结束导入到数据库时间:1723192575060ms
100000条,导入用时:2489ms
100000条,开始导入到数据库时间:1723192577309ms
100000条,结束导入到数据库时间:1723192579305ms
100000条,导入用时:1996ms
100000条,开始导入到数据库时间:1723192582523ms
100000条,结束导入到数据库时间:1723192584428ms
100000条,导入用时:1905ms
100000条,开始导入到数据库时间:1723192586686ms
100000条,结束导入到数据库时间:1723192588795ms
100000条,导入用时:2109ms
100000条,开始导入到数据库时间:1723192590817ms
100000条,结束导入到数据库时间:1723192592851ms
100000条,导入用时:2034ms
100000条,开始导入到数据库时间:1723192594977ms
100000条,结束导入到数据库时间:1723192597116ms
100000条,导入用时:2139ms
100000条,开始导入到数据库时间:1723192599090ms
100000条,结束导入到数据库时间:1723192601174ms
100000条,导入用时:2084ms
100000条,开始导入到数据库时间:1723192603465ms
100000条,结束导入到数据库时间:1723192605340ms
100000条,导入用时:1875ms
100000条,开始导入到数据库时间:1723192607375ms
100000条,结束导入到数据库时间:1723192609421ms
100000条,导入用时:2046ms
100000条,开始导入到数据库时间:1723192611420ms
100000条,结束导入到数据库时间:1723192613705ms
100000条,导入用时:2285ms
2024-08-09T16:36:53.875+08:00 INFO 18552 --- [nio-7777-exec-3] org.ehcache.core.EhcacheManager : Cache 'a1eb40dd-8471-4d69-b6c5-5b7cca3a84b6' removed from EhcacheManager.
2024-08-09T16:36:53.875+08:00 INFO 18552 --- [nio-7777-exec-3] org.ehcache.core.EhcacheManager : Cache 'a1eb40dd-8471-4d69-b6c5-5b7cca3a84b6' removed from EhcacheManager.
------结束读取Excel的Sheet时间(包括导入数据过程):1723192613880ms------
------总耗时:45458ms------