逗号分隔值(Comma-Separated Values,CSV,有时也称为字符分隔值,因为分隔字符也可以不是逗号),其文件以纯文本形式存储表格数据(数字和文本)。纯文本意味着该文件是一个字符序列,不含必须像二进制数字那样被解读的数据。CSV文件由任意数目的记录组成,记录间以某种换行符分隔;每条记录由字段组成,字段间的分隔符是其它字符或字符串。
如果不想看过程分析,想直接看代码请跳到最后!!!
分隔符: 最常见的分隔符是逗号或制表符,但是由于项目中使用的是︴
,所以以下案例以此字符演示。
思路:
传入.csv文件集所在的目录,例如new File("C:\\Users\\brbai\\Desktop\\files")
递归读取此目录下所有层级的.csv文件
调用insertData(File file,int type)使用字符缓冲流读取首行的字段名,并将字符缓冲流传给readCSV使用
按需求对读取的字符做转换,使其与数据库类型匹配,方便插入数据库。例如 : 首行为字段名,用,
连接,其他行数据,普通字符串用''
包裹,时间转换为相应格式,其他同理
readCSV(BufferedReader reader)每次读取1000行数据返回,批量插入数据库,插入成功后继续读取,每个.csv文件读取完后,关闭缓冲流
依次读取每个文件,直至所有文件读取写入结束遍历文件夹,读取所有.csv文件,插入数据库
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 public void getFileName (File f) { File[] file = f.listFiles(); for (File file2 : file) { String fileName = file2.getName(); if (!file2.isDirectory()){ long start = System.currentTimeMillis(); if (fileName.indexOf("soa_bank" ) != -1 ){ insertData(file2,SOA_BANK); }else if (fileName.indexOf("soa_account" ) != -1 ){ insertData(file2,SOA_ACCOUNT); } log.info("文件{}导入完成,用时{}ms" ,file2.getPath(),(System.currentTimeMillis()-start)); } if (file2.isDirectory() && !"today" .equals(fileName)) { getFileName(file2); } } }
读取csv文件 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 public List readCSV (BufferedReader reader) { try { ArrayList<String> list = new ArrayList<String>(); String line = null ; for (int i=0 ; i < 1000 ; i++) { line = reader.readLine(); if (line == null ){ break ; } line = "'" +line.replaceAll("︴" ,"','" )+"'" ; String regex1 = "['][0-9]{4}[-][0-9]{1,2}[-][0-9]{1,2}[']" ; Pattern pattern1 = Pattern.compile(regex1); Matcher m1 = pattern1.matcher(line); line = line.replaceAll(regex1 ,"︴" ); while (m1.find()){ line = line.replaceFirst("︴" ," to_date ( " +m1.group()+", 'YYYY-MM-DD HH24:MI:SS' )" ); } String regex2 = "['][0-9]{4}[-][0-9]{1,2}[-][0-9]{1,2}[ ][0-9]{1,2}[:][0-9]{1,2}[:][0-9]{1,2}[']" ; Pattern pattern2 = Pattern.compile(regex2); Matcher m2 = pattern2.matcher(line); line = line.replaceAll(regex2 ,"︴" ); while (m2.find()){ line = line.replaceFirst("︴" ," to_date ( " +m2.group()+", 'YYYY-MM-DD HH24:MI:SS' )" ); } list.add(line); } return list; } catch (Exception e) { log.error("文件:{}读取信息出错" ); try { reader.close(); } catch (IOException ex) { ex.printStackTrace(); } e.printStackTrace(); } return null ; }
批量插入数据库 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 public void insertData (File file,int type) { String head = null ; List<String> list = null ; BufferedReader reader = null ; try { reader = new BufferedReader(new FileReader(file)); head = reader.readLine(); if (head == null ){ log.info("文件{}内容为空!!!" ,file.getPath()); return ; } else { head = head.replaceAll("︴" ,"," ); } while ((list = readCSV(reader)).size() != 0 ){ switch (type){ case 1 :{ bankInfoMapper.insertListData(head,list); } break ; case 2 :{ accountMapper.insertListData(head,list); } break ; } log.info("文件{}导入中--- +{}行" ,file.getPath(),list.size()); } } catch (IOException e) { log.info("***文件{}导入数据库出现异常***" ,file.getPath()); e.printStackTrace(); }finally { try { reader.close(); } catch (IOException e) { e.printStackTrace(); } } }
Mapper接口(AccountMapper为例) 1 2 3 public interface AccountMapper extends BaseMapper <Account > { void insertListData (@Param("head" ) String head,@Param ("list" ) List<String> list) ; }
批量插入(Oracle数据库)(对应Mapper接口的xml文件,例) 1 2 3 4 5 6 7 8 9 10 11 12 13 14 <?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd"> <mapper namespace ="com.xxx.spmacore.soa.mapper.AccountMapper" > <insert id ="insertListData" useGeneratedKeys ="false" > insert into T_ACCOUNT ( ${head} ) <foreach collection ="list" separator ="UNION ALL" item ="item" > ( SELECT ${item} FROM DUAL ) </foreach > </insert > </mapper >
读取数据库表结构
当csv文件中时间为null,或数据字段不含全部的表字段时,可能需要查询数据库各字段属性与其对应做相关处理,此处只提供数据库表结构的读取,相关处理按实际业务情况。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 @Autowired private SqlSessionFactory sqlSessionFactory; public Map<String,Map<String, String>> getDatabaseAttribute(String tableName) { SqlSession sqlSession = sqlSessionFactory.openSession(); Connection connection = sqlSession.getConnection(); Map<String,Map<String, String>> database = new HashMap<>(); DatabaseMetaData metaData = null ; try { metaData = connection.getMetaData(); ResultSet columns = metaData .getColumns(null , "DEVTEST" , tableName, "%" ); Map<String,String> attributes = new HashMap<>(); while (columns.next()){ attributes.put(columns.getString("COLUMN_NAME" ),columns.getString("TYPE_NAME" )); } database.put(tableName,attributes); } catch (SQLException e) { e.printStackTrace(); }finally { try { connection.close(); } catch (SQLException e) { e.printStackTrace(); } } return database; }
完整工具类 CSVUtils 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 package com.xxx.spmacore.common.utils;import com.xxx.spmacore.soa.mapper.AccountMapper;import com.xxx.spmacore.soa.mapper.BankInfoMapper;import java.io.BufferedReader;import java.io.File;import java.io.FileReader;import java.io.IOException;import java.util.ArrayList;import java.util.List;import java.util.regex.Matcher;import java.util.regex.Pattern;import lombok.extern.slf4j.Slf4j;import org.springframework.beans.factory.annotation.Autowired;import org.springframework.stereotype.Component;@Component @Slf 4jpublic class CSVUtils { @Autowired private BankInfoMapper bankInfoMapper; @Autowired private AccountMapper accountMapper; private static final int SOA_BANK = 1 ; private static final int SOA_ACCOUNT = 2 ; public void getFileName (File f) { File[] file = f.listFiles(); for (File file2 : file) { String fileName = file2.getName(); if (!file2.isDirectory()){ long start = System.currentTimeMillis(); if (fileName.indexOf("soa_bank" ) != -1 ){ insertData(file2,SOA_BANK); }else if (fileName.indexOf("soa_account" ) != -1 ){ insertData(file2,SOA_ACCOUNT); } log.info("文件{}导入完成,用时{}ms" ,file2.getPath(),(System.currentTimeMillis()-start)); } if (file2.isDirectory()) { getFileName(file2); } } } public List readCSV (BufferedReader reader) { try { ArrayList<String> list = new ArrayList<String>(); String line = null ; for (int i=0 ; i < 1000 ; i++) { line = reader.readLine(); if (line == null ){ break ; } line = "'" +line.replaceAll("︴" ,"','" )+"'" ; String regex1 = "['][0-9]{4}[-][0-9]{1,2}[-][0-9]{1,2}[']" ; Pattern pattern1 = Pattern.compile(regex1); Matcher m1 = pattern1.matcher(line); line = line.replaceAll(regex1 ,"︴" ); while (m1.find()){ line = line.replaceFirst("︴" ," to_date ( " +m1.group()+", 'YYYY-MM-DD HH24:MI:SS' )" ); } String regex2 = "['][0-9]{4}[-][0-9]{1,2}[-][0-9]{1,2}[ ][0-9]{1,2}[:][0-9]{1,2}[:][0-9]{1,2}[']" ; Pattern pattern2 = Pattern.compile(regex2); Matcher m2 = pattern2.matcher(line); line = line.replaceAll(regex2 ,"︴" ); while (m2.find()){ line = line.replaceFirst("︴" ," to_date ( " +m2.group()+", 'YYYY-MM-DD HH24:MI:SS' )" ); } list.add(line); } return list; } catch (Exception e) { log.error("文件:{}读取信息出错" ); try { reader.close(); } catch (IOException ex) { ex.printStackTrace(); } e.printStackTrace(); } return null ; } public void insertData (File file,int type) { String head = null ; List<String> list = null ; BufferedReader reader = null ; try { reader = new BufferedReader(new FileReader(file)); head = reader.readLine(); if (head == null ){ log.info("文件{}内容为空!!!" ,file.getPath()); return ; } else { head = head.replaceAll("︴" ,"," ); } while ((list = readCSV(reader)).size() != 0 ){ switch (type){ case 1 :{ bankInfoMapper.insertListData(head,list); } break ; case 2 :{ accountMapper.insertListData(head,list); } break ; } log.info("文件{}导入中--- +{}行" ,file.getPath(),list.size()); } } catch (IOException e) { log.info("***文件{}导入数据库出现异常***" ,file.getPath()); e.printStackTrace(); }finally { try { reader.close(); } catch (IOException e) { e.printStackTrace(); } } } }