需求背景:解析一个CSV文件(大约15万条数据),获取CSV文件的表格数据,并保存到数据库中(这里选择sqlite数据库,当然也可以选择其他数据库)
思路:1、查看CSV文件的表头,方便解析
2、使用代码打开这个csv文件,循环解析文件内容
3、 批量分次保存到数据库中
具体实现如下:
- 添加对应的依赖
<!--1-sqlite相关--><!-- https://mvnrepository.com/artifact/org.xerial/sqlite-jdbc --><dependency><groupId>org.xerial</groupId><artifactId>sqlite-jdbc</artifactId><version>3.42.0.0</version></dependency><!--2-sqlite 方言--><dependency><groupId>com.zsoltfabok</groupId><artifactId>sqlite-dialect</artifactId><version>1.0</version></dependency><!-- 对数据库操作的统计 --><dependency><groupId>p6spy</groupId><artifactId>p6spy</artifactId><version>3.9.1</version></dependency><!-- Apache Commons CSV (可选) --><dependency><groupId>org.apache.commons</groupId><artifactId>commons-csv</artifactId><version>1.8</version> <!-- 根据需要调整版本 --></dependency><!-- io常用工具类 --><dependency><groupId>commons-io</groupId><artifactId>commons-io</artifactId><version>2.16.1</version></dependency><!-- 文件上传工具类 --><dependency><groupId>commons-fileupload</groupId><artifactId>commons-fileupload</artifactId><version>1.5</version></dependency>
- 业务代码逻辑实现
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVRecord;import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.sql.*;/*** CSV 到 SQLite 实用程序** @author xjs*/
public class CsvToSQLiteUtils {public static void main(String[] args) {// CSV文件路径 CSV 文件的表头为:"雨量站", "降雨量", "经度", "纬度", "时间"String csvFilePath = "D:\\home\\files\\rainGaugeStationContainsCoordinates.csv";// SQLite数据库文件路径String DATABASE_URL = "jdbc:sqlite:D:\\home\\db\\rainfall.db";// 创建表名Timestamp timestamp = new Timestamp(System.currentTimeMillis());String tableName = "rainfall_" + timestamp.getTime();System.out.println("tableName = " + tableName);// 读取 CSV 文件并保存到 SQLite 数据库中readCsvSaveLocal(csvFilePath, tableName, DATABASE_URL);System.out.println("CSV文件读取完成并保存到SQLite数据库中。");}/*** 读取 CSV 保存本地** @param stationRainFile Station Rain 文件* @param tableName 表名称*/public static void readCsvSaveLocal(String stationRainFile, String tableName, String databaseUrl) {// 1. 创建SQLite数据库连接try (Connection conn = DriverManager.getConnection(databaseUrl)) {if (conn != null) {// 2. 创建表(如果不存在)createTableIfNotExists(conn, tableName);// 3. 设置SQLite连接的字符编码为UTF-8try (Statement stmt = conn.createStatement()) {// 设置SQLite编码为UTF-8stmt.execute("PRAGMA encoding = 'UTF-8';");}// 4. 开始事务conn.setAutoCommit(false);// 5. 读取CSV文件并插入数据readCsvAndInsertDatas(stationRainFile, conn, tableName);// 6. 提交事务conn.commit();System.out.println("数据成功插入到SQLite数据库!");}} catch (SQLException e) {System.err.println("SQLite连接失败: " + e.getMessage());}}/*** 创建表,如果不存在的话*/private static void createTableIfNotExists(Connection conn, String tableName) throws SQLException {try (Statement stmt = conn.createStatement()) {String CREATE_TABLE_SQL ="CREATE TABLE IF NOT EXISTS " + tableName + " (" +" id INTEGER PRIMARY KEY AUTOINCREMENT," +" station_name TEXT NOT NULL," +" rainfall REAL NOT NULL," +" longitude REAL NOT NULL," +" latitude REAL NOT NULL," +" datetime TEXT NOT NULL" +");";stmt.execute(CREATE_TABLE_SQL);}}/*** 读取 CSV 并插入数据** @param csvFilePath CSV 文件路径* @param conn 控制室* @param tableName 表名称*/private static void readCsvAndInsertDatas(String csvFilePath, Connection conn, String tableName) {// 使用 Apache Commons CSV 读取CSV文件try (Reader reader = new InputStreamReader(Files.newInputStream(Paths.get(csvFilePath)), "GBK")) {Iterable<CSVRecord> records = CSVFormat.DEFAULT.withHeader("雨量站", "降雨量", "经度", "纬度", "时间").withSkipHeaderRecord() // 跳过表头.parse(reader);String insertDataSql = "INSERT INTO " + tableName + " (station_name, rainfall, longitude, latitude, datetime) VALUES (?, ?, ?, ?, ?);";try (PreparedStatement pstmt = conn.prepareStatement(insertDataSql)) {// 批量大小int batchSize = 1000;int count = 0;for (CSVRecord record : records) {// 获取每一列的数据String stationName = record.get("雨量站");double rainfall = Double.parseDouble(record.get("降雨量"));double longitude = Double.parseDouble(record.get("经度"));double latitude = Double.parseDouble(record.get("纬度"));String datetime = record.get("时间");// 设置插入数据的参数pstmt.setString(1, stationName);pstmt.setDouble(2, rainfall);pstmt.setDouble(3, longitude);pstmt.setDouble(4, latitude);pstmt.setString(5, datetime);// 添加到批处理pstmt.addBatch();count++;// 每batchSize条数据执行一次批量插入if (count % batchSize == 0) {pstmt.executeBatch();}}// 执行剩余的批量插入pstmt.executeBatch();} catch (SQLException e) {System.err.println("批量插入数据时出错: " + e.getMessage());}} catch (IOException e) {System.err.println("读取CSV或插入数据时出错: " + e.getMessage());}}/*** 计算总降雨量*/public static Double getTotal(String tableName) {// 1. 创建SQLite数据库连接try (Connection conn = DriverManager.getConnection("jdbc:sqlite:rainfall.db")) {if (conn != null) {// 2. 执行SQL查询String queryDataSql = "select sum(rainfall_difference) as total from(SELECT ABS( (SELECT rainfall FROM " + tableName + " WHERE station_name = s.station_name ORDER BY datetime ASC LIMIT 1) - (SELECT rainfall FROM " + tableName + " WHERE station_name = s.station_name ORDER BY datetime DESC LIMIT 1)) AS rainfall_difference FROM " + tableName + " s GROUP BY station_name)";// 3. 处理查询结果try (PreparedStatement pstmt = conn.prepareStatement(queryDataSql)) {ResultSet rs = pstmt.executeQuery();while (rs.next()) {// 获取总和return rs.getDouble("total");}} catch (SQLException e) {System.err.println("查询数据时出错: " + e.getMessage());}}} catch (SQLException e) {System.err.println("SQLite连接失败: " + e.getMessage());}return null;}
至此,就可以把CSV文件的内容保存到sqlite数据库中了!