通过查询网上资料,发现有两种解析思路:a.通过pattern分割各字段,b.逐字符读取并判断,当然还有通过第三方Jar包来解析的方法。
1.通过Pattern准确分割字段(Reference:csv文件读取)
package xufei; import java.io.BufferedReader; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /* * 文件规则 * Microsoft的格式是最简单的。以逗号分隔的值要么是“纯粹的”(仅仅包含在括号之前), * 要么是在双引号之间(这时数据中的双引号以一对双引号表示)。 * Ten Thousand,10000, 2710 ,,"10,000","It's ""10 Grand"", baby",10K * 这一行包含七个字段(fields): * Ten Thousand * 10000 * 2710 * 空字段 * 10,000 * It's "10 Grand", baby * 10K * 每条记录占一行 * 以逗号为分隔符 * 逗号前后的空格会被忽略 * 字段中包含有逗号,该字段必须用双引号括起来。如果是全角的没有问题。 * 字段中包含有换行符,该字段必须用双引号括起来 * 字段前后包含有空格,该字段必须用双引号括起来 * 字段中的双引号用两个双引号表示 * 字段中如果有双引号,该字段必须用双引号括起来 * 第一条记录,可以是字段名 */ /** * タイトル: xufei.CSVAnalysis.java * 説明: * 著作権: Copyright (c) 2006 * 会社名: technodia * @author 徐飞 * @version 1.0 * createDate Aug 11, 2008 * 修正履歴 * 修正日 修正者 修正理由 */ public class CSVAnalysis { private InputStreamReader fr = null; private BufferedReader br = null; public CSVAnalysis(String f) throws IOException { fr = new InputStreamReader(new FileInputStream(f)); } /** * 解析csv文件 到一个list中 * 每个单元个为一个String类型记录,每一行为一个list。 * 再将所有的行放到一个总list中 * @return * @throws IOException */ public List> readCSVFile() throws IOException { br = new BufferedReader(fr); String rec = null;//一行 String str;//一个单元格 List> listFile = new ArrayList>(); try { //读取一行 while ((rec = br.readLine()) != null) { Pattern pCells = Pattern .compile("(\"[^\"]*(\"{2})*[^\"]*\")*[^,]*,"); Matcher mCells = pCells.matcher(rec); List cells = new ArrayList();//每行记录一个list //读取每个单元格 while (mCells.find()) { str = mCells.group(); str = str.replaceAll( "(?sm)\"?([^\"]*(\"{2})*[^\"]*)\"?.*,", "$1"); str = str.replaceAll("(?sm)(\"(\"))", "$2"); cells.add(str); } listFile.add(cells); } } catch (Exception e) { e.printStackTrace(); } finally { if (fr != null) { fr.close(); } if (br != null) { br.close(); } } return listFile; } public static void main(String[] args) throws Throwable { CSVAnalysis parser = new CSVAnalysis("c:/test2.csv"); parser.readCSVFile(); } }2.逐字符读取字符串,并对字符判断(Reference:csv文件读取
import java.io.BufferedReader; import java.io.Closeable; import java.io.IOException; import java.io.Reader; import java.util.ArrayList; import java.util.List; /** * A very simple CSV reader released under a commercial-friendly license. * * @author Glen Smith * */ public class CSVReader implements Closeable { private BufferedReader br; private boolean hasNext = true; private final char separator; private final char quotechar; private final char escape; private int skipLines; private boolean linesSkiped; /** The default separator to use if none is supplied to the constructor. */ public static final char DEFAULT_SEPARATOR = ','; public static final int INITIAL_READ_SIZE = 64; /** * The default quote character to use if none is supplied to the * constructor. */ public static final char DEFAULT_QUOTE_CHARACTER = '"'; /** * The default escape character to use if none is supplied to the * constructor. */ public static final char DEFAULT_ESCAPE_CHARACTER = '\\'; /** * The default line to start reading. */ public static final int DEFAULT_SKIP_LINES = 0; /** * Constructs CSVReader using a comma for the separator. * * @param reader * the reader to an underlying CSV source. */ public CSVReader(Reader reader) { this(reader, DEFAULT_SEPARATOR); } /** * Constructs CSVReader with supplied separator. * * @param reader * the reader to an underlying CSV source. * @param separator * the delimiter to use for separating entries. */ public CSVReader(Reader reader, char separator) { this(reader, separator, DEFAULT_QUOTE_CHARACTER, DEFAULT_ESCAPE_CHARACTER); } /** * Constructs CSVReader with supplied separator and quote char. * * @param reader * the reader to an underlying CSV source. * @param separator * the delimiter to use for separating entries * @param quotechar * the character to use for quoted elements */ public CSVReader(Reader reader, char separator, char quotechar) { this(reader, separator, quotechar, DEFAULT_ESCAPE_CHARACTER, DEFAULT_SKIP_LINES); } public CSVReader(Reader reader, char separator, char quotechar, char escape) { this(reader, separator, quotechar, escape, DEFAULT_SKIP_LINES); } /** * Constructs CSVReader with supplied separator and quote char. * * @param reader * the reader to an underlying CSV source. * @param separator * the delimiter to use for separating entries * @param quotechar * the character to use for quoted elements * @param line * the line number to skip for start reading */ public CSVReader(Reader reader, char separator, char quotechar, int line) { this(reader, separator, quotechar, DEFAULT_ESCAPE_CHARACTER, line); } /** * Constructs CSVReader with supplied separator and quote char. * * @param reader * the reader to an underlying CSV source. * @param separator * the delimiter to use for separating entries * @param quotechar * the character to use for quoted elements * @param escape * the character to use for escaping a separator or quote * @param line * the line number to skip for start reading */ public CSVReader(Reader reader, char separator, char quotechar, char escape, int line) { this.br = new BufferedReader(reader); this.separator = separator; this.quotechar = quotechar; this.escape = escape; this.skipLines = line; } /** * Reads the entire file into a List with each element being a String[] of * tokens. * * @return a List of String[], with each String[] representing a line of the * file. * * @throws IOException * if bad things happen during the read */ public List<String[]> readAll() throws IOException { List<String[]> allElements = new ArrayList<String[]>(); while (hasNext) { String[] nextLineAsTokens = readNext(); if (nextLineAsTokens != null) allElements.add(nextLineAsTokens); } return allElements; } /** * Reads the next line from the buffer and converts to a string array. * * @return a string array with each comma-separated element as a separate * entry. * * @throws IOException * if bad things happen during the read */ public String[] readNext() throws IOException { String nextLine = getNextLine(); return hasNext ? parseLine(nextLine) : null; } /** * Reads the next line from the file. * * @return the next line from the file without trailing newline * @throws IOException * if bad things happen during the read */ private String getNextLine() throws IOException { if (!this.linesSkiped) { for (int i = 0; i < skipLines; i++) { br.readLine(); } this.linesSkiped = true; } String nextLine = br.readLine(); if (nextLine == null) { hasNext = false; } return hasNext ? nextLine : null; } /** * Parses an incoming String and returns an array of elements. * * @param nextLine * the string to parse * @return the comma-tokenized list of elements, or null if nextLine is null * @throws IOException if bad things happen during the read */ private String[] parseLine(String nextLine) throws IOException { if (nextLine == null) { return null; } List<String>tokensOnThisLine = new ArrayList<String>(); StringBuilder sb = new StringBuilder(INITIAL_READ_SIZE); boolean inQuotes = false; do { if (inQuotes) { // continuing a quoted section, reappend newline sb.append("\n"); nextLine = getNextLine(); if (nextLine == null) break; } for (int i = 0; i < nextLine.length(); i++) { char c = nextLine.charAt(i); if (c == this.escape) { if( isEscapable(nextLine, inQuotes, i) ){ sb.append(nextLine.charAt(i+1)); i++; } else { i++; // ignore the escape } } else if (c == quotechar) { if( isEscapedQuote(nextLine, inQuotes, i) ){ sb.append(nextLine.charAt(i+1)); i++; }else{ inQuotes = !inQuotes; // the tricky case of an embedded quote in the middle: a,bc"d"ef,g if(i>2 //not on the beginning of the line && nextLine.charAt(i-1) != this.separator //not at the beginning of an escape sequence && nextLine.length()>(i+1) && nextLine.charAt(i+1) != this.separator //not at the end of an escape sequence ){ sb.append(c); } } } else if (c == separator && !inQuotes) { tokensOnThisLine.add(sb.toString()); sb = new StringBuilder(INITIAL_READ_SIZE); // start work on next token } else { sb.append(c); } } } while (inQuotes); tokensOnThisLine.add(sb.toString()); return tokensOnThisLine.toArray(new String[0]); } /** * precondition: the current character is a quote or an escape * @param nextLine the current line * @param inQuotes true if the current context is quoted * @param i current index in line * @return true if the following character is a quote */ private boolean isEscapedQuote(String nextLine, boolean inQuotes, int i) { return inQuotes // we are in quotes, therefore there can be escaped quotes in here. && nextLine.length() > (i+1) // there is indeed another character to check. && nextLine.charAt(i+1) == quotechar; } /** * precondition: the current character is an escape * @param nextLine the current line * @param inQuotes true if the current context is quoted * @param i current index in line * @return true if the following character is a quote */ private boolean isEscapable(String nextLine, boolean inQuotes, int i) { return inQuotes // we are in quotes, therefore there can be escaped quotes in here. && nextLine.length() > (i+1) // there is indeed another character to check. && ( nextLine.charAt(i+1) == quotechar || nextLine.charAt(i+1) == this.escape); } /** * Closes the underlying reader. * * @throws IOException if the close fails */ public void close() throws IOException{ br.close(); } }