lucene索引并搜索mysql数据库

    xiaoxiao2021-03-25  178

     由于对lucene比较感兴趣,本人在网上找了点资料,终于成功地用lucene对 MySQL 数据库 进行索引创建并成功搜索,先总结如下:

        首先介绍一个jdbc工具类,用于得到Connection对象:

    [java] view plain copy print ? import java.sql.Connection;     import java.sql.DriverManager;       import java.sql.SQLException;    /**     * JdbcUtil.java   * @version 1.0   * @createTime JDBC获取Connection工具类   */    public class JdbcUtil {         private static Connection conn = null;         private static final String URL = "jdbc:mysql://127.0.0.1/project?autoReconnect=true&characterEncoding=utf8";         private static final String JDBC_DRIVER = "com.mysql.jdbc.Driver";         private static final String USER_NAME = "root";         private static final String PASSWORD = "";            public static Connection getConnection() {             try {                 Class.forName(JDBC_DRIVER);                 conn = DriverManager.getConnection(URL, USER_NAME, PASSWORD);             } catch (ClassNotFoundException e) {                 e.printStackTrace();             } catch (SQLException e) {                 e.printStackTrace();             }             return conn;         }  }     import java.sql.Connection; import java.sql.DriverManager; import java.sql.SQLException; /** * JdbcUtil.java * @version 1.0 * @createTime JDBC获取Connection工具类 */ public class JdbcUtil { private static Connection conn = null; private static final String URL = "jdbc:mysql://127.0.0.1/project?autoReconnect=true&characterEncoding=utf8"; private static final String JDBC_DRIVER = "com.mysql.jdbc.Driver"; private static final String USER_NAME = "root"; private static final String PASSWORD = ""; public static Connection getConnection() { try { Class.forName(JDBC_DRIVER); conn = DriverManager.getConnection(URL, USER_NAME, PASSWORD); } catch (ClassNotFoundException e) { e.printStackTrace(); } catch (SQLException e) { e.printStackTrace(); } return conn; } }     然后就是本文的主要内容了,对数据库信息进行索引与对索引进行搜索:

    [java] view plain copy print ? import java.io.File;     import java.sql.Connection;     import java.sql.ResultSet;     import java.sql.Statement;     import java.util.ArrayList;     import java.util.List;     import org.apache.lucene.analysis.Analyzer;     import org.apache.lucene.document.Document;     import org.apache.lucene.document.Field;     import org.apache.lucene.document.Field.TermVector;     import org.apache.lucene.index.IndexWriter;     import org.apache.lucene.queryParser.QueryParser;  import org.apache.lucene.search.*;  import org.apache.lucene.store.Directory;     import org.apache.lucene.store.FSDirectory;  import org.apache.lucene.util.Version;  import org.wltea.analyzer.lucene.IKAnalyzer;  import org.wltea.analyzer.lucene.IKSimilarity;         /**     * SearchLogic.java   * @version 1.0   * @createTime Lucene数据库检索   */    public class SearchLogic {         private static Connection conn = null;         private static Statement stmt = null;         private static  ResultSet rs = null;         private String searchDir = "E:\\Test\\Index";         private static File indexFile = null;         private static Searcher searcher = null;         private static Analyzer analyzer = null;         /** 索引页面缓冲 */        private int maxBufferedDocs = 500;         /**       * 获取数据库数据       * @return ResultSet       * @throws Exception       */        public List<SearchBean> getResult(String queryStr) throws Exception {             List<SearchBean> result = null;             conn = JdbcUtil.getConnection();             if(conn == null) {                 throw new Exception("数据库连接失败!");             }             String sql = "select id, username, password, type from account";             try {                 stmt = conn.createStatement();                 rs = stmt.executeQuery(sql);                 this.createIndex(rs);   //给数据库创建索引,此处执行一次,不要每次运行都创建索引,以后数据有更新可以后台调用更新索引                 TopDocs topDocs = this.search(queryStr);                 ScoreDoc[] scoreDocs = topDocs.scoreDocs;                 result = this.addHits2List(scoreDocs);             } catch(Exception e) {                 e.printStackTrace();                 throw new Exception("数据库查询sql出错! sql : " + sql);             } finally {                 if(rs != null) rs.close();                 if(stmt != null) stmt.close();                 if(conn != null) conn.close();             }                      return result;         }       /**   * 为数据库检索数据创建索引   * @param rs   * @throws Exception   */        private void createIndex(ResultSet rs) throws Exception {             Directory directory = null;             IndexWriter indexWriter = null;                      try {                 indexFile = new File(searchDir);                 if(!indexFile.exists()) {                     indexFile.mkdir();                 }                 directory = FSDirectory.open(indexFile);                 analyzer = new IKAnalyzer();                               indexWriter = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);                 indexWriter.setMaxBufferedDocs(maxBufferedDocs);                 Document doc = null;                 while(rs.next()) {                     doc = new Document();                     Field id = new Field("id", String.valueOf(rs.getInt("id")), Field.Store.YES, Field.Index.NOT_ANALYZED, TermVector.NO);                     Field username = new Field("username", rs.getString("username") == null ? "" : rs.getString("username"), Field.Store.YES,Field.Index.ANALYZED, TermVector.NO);                     doc.add(id);                     doc.add(username);                     indexWriter.addDocument(doc);                 }                                          indexWriter.optimize();                 indexWriter.close();             } catch(Exception e) {                 e.printStackTrace();             }          }            /**       * 搜索索引       * @param queryStr       * @return       * @throws Exception       */        private TopDocs search(String queryStr) throws Exception {                    if(searcher == null) {                 indexFile = new File(searchDir);                 searcher = new IndexSearcher(FSDirectory.open(indexFile));               }             searcher.setSimilarity(new IKSimilarity());             QueryParser parser = new QueryParser(Version.LUCENE_30,"username",new IKAnalyzer());             Query query = parser.parse(queryStr);                    TopDocs topDocs = searcher.search(query, searcher.maxDoc());             return topDocs;         }            /**       * 返回结果并添加到List中       * @param scoreDocs       * @return       * @throws Exception       */        private List<SearchBean> addHits2List(ScoreDoc[] scoreDocs ) throws Exception {             List<SearchBean> listBean = new ArrayList<SearchBean>();             SearchBean bean = null;             for(int i=0 ; i<scoreDocs.length; i++) {                 int docId = scoreDocs[i].doc;                 Document doc = searcher.doc(docId);                 bean = new SearchBean();                 bean.setId(doc.get("id"));                 bean.setUsername(doc.get("username"));                 listBean.add(bean);             }             return listBean;         }            public static void main(String[] args) {             SearchLogic logic = new SearchLogic();             try {                 Long startTime = System.currentTimeMillis();                 List<SearchBean> result = logic.getResult("商家");                 int i = 0;                 for(SearchBean bean : result) {                     if(i == 10)                       break;                     System.out.println("bean.name " + bean.getClass().getName() + " : bean.id " + bean.getId()+ " : bean.username " + bean.getUsername());                   i++;                 }                            System.out.println("searchBean.result.size : " + result.size());                 Long endTime = System.currentTimeMillis();                 System.out.println("查询所花费的时间为:" + (endTime-startTime)/1000);             } catch (Exception e) {               e.printStackTrace();                 System.out.println(e.getMessage());             }         }     }     import java.io.File; import java.sql.Connection; import java.sql.ResultSet; import java.sql.Statement; import java.util.ArrayList; import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.TermVector; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.*; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.wltea.analyzer.lucene.IKAnalyzer; import org.wltea.analyzer.lucene.IKSimilarity; /** * SearchLogic.java * @version 1.0 * @createTime Lucene数据库检索 */ public class SearchLogic { private static Connection conn = null; private static Statement stmt = null; private static ResultSet rs = null; private String searchDir = "E:\\Test\\Index"; private static File indexFile = null; private static Searcher searcher = null; private static Analyzer analyzer = null; /** 索引页面缓冲 */ private int maxBufferedDocs = 500; /** * 获取数据库数据 * @return ResultSet * @throws Exception */ public List<SearchBean> getResult(String queryStr) throws Exception { List<SearchBean> result = null; conn = JdbcUtil.getConnection(); if(conn == null) { throw new Exception("数据库连接失败!"); } String sql = "select id, username, password, type from account"; try { stmt = conn.createStatement(); rs = stmt.executeQuery(sql); this.createIndex(rs); //给数据库创建索引,此处执行一次,不要每次运行都创建索引,以后数据有更新可以后台调用更新索引 TopDocs topDocs = this.search(queryStr); ScoreDoc[] scoreDocs = topDocs.scoreDocs; result = this.addHits2List(scoreDocs); } catch(Exception e) { e.printStackTrace(); throw new Exception("数据库查询sql出错! sql : " + sql); } finally { if(rs != null) rs.close(); if(stmt != null) stmt.close(); if(conn != null) conn.close(); } return result; } /** * 为数据库检索数据创建索引 * @param rs * @throws Exception */ private void createIndex(ResultSet rs) throws Exception { Directory directory = null; IndexWriter indexWriter = null; try { indexFile = new File(searchDir); if(!indexFile.exists()) { indexFile.mkdir(); } directory = FSDirectory.open(indexFile); analyzer = new IKAnalyzer(); indexWriter = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); indexWriter.setMaxBufferedDocs(maxBufferedDocs); Document doc = null; while(rs.next()) { doc = new Document(); Field id = new Field("id", String.valueOf(rs.getInt("id")), Field.Store.YES, Field.Index.NOT_ANALYZED, TermVector.NO); Field username = new Field("username", rs.getString("username") == null ? "" : rs.getString("username"), Field.Store.YES,Field.Index.ANALYZED, TermVector.NO); doc.add(id); doc.add(username); indexWriter.addDocument(doc); } indexWriter.optimize(); indexWriter.close(); } catch(Exception e) { e.printStackTrace(); } } /** * 搜索索引 * @param queryStr * @return * @throws Exception */ private TopDocs search(String queryStr) throws Exception { if(searcher == null) { indexFile = new File(searchDir); searcher = new IndexSearcher(FSDirectory.open(indexFile)); } searcher.setSimilarity(new IKSimilarity()); QueryParser parser = new QueryParser(Version.LUCENE_30,"username",new IKAnalyzer()); Query query = parser.parse(queryStr); TopDocs topDocs = searcher.search(query, searcher.maxDoc()); return topDocs; } /** * 返回结果并添加到List中 * @param scoreDocs * @return * @throws Exception */ private List<SearchBean> addHits2List(ScoreDoc[] scoreDocs ) throws Exception { List<SearchBean> listBean = new ArrayList<SearchBean>(); SearchBean bean = null; for(int i=0 ; i<scoreDocs.length; i++) { int docId = scoreDocs[i].doc; Document doc = searcher.doc(docId); bean = new SearchBean(); bean.setId(doc.get("id")); bean.setUsername(doc.get("username")); listBean.add(bean); } return listBean; } public static void main(String[] args) { SearchLogic logic = new SearchLogic(); try { Long startTime = System.currentTimeMillis(); List<SearchBean> result = logic.getResult("商家"); int i = 0; for(SearchBean bean : result) { if(i == 10) break; System.out.println("bean.name " + bean.getClass().getName() + " : bean.id " + bean.getId()+ " : bean.username " + bean.getUsername()); i++; } System.out.println("searchBean.result.size : " + result.size()); Long endTime = System.currentTimeMillis(); System.out.println("查询所花费的时间为:" + (endTime-startTime)/1000); } catch (Exception e) { e.printStackTrace(); System.out.println(e.getMessage()); } } }     对了上面的类还用到了一个javabean类,如下:

    [java] view plain copy print ? public class SearchBean {      private String id;      private String username;      public String getId() {          return id;      }      public void setId(String id) {          this.id = id;      }      public String getUsername() {          return username;      }      public void setUsername(String username) {          this.username = username;      }  }   public class SearchBean { private String id; private String username; public String getId() { return id; } public void setId(String id) { this.id = id; } public String getUsername() { return username; } public void setUsername(String username) { this.username = username; } }     这些代码大部分都是我在网上找到的doc文档中复制粘贴而来,本着“拿来主义”,我对这些代码修改不大,经测试,这些代码能够正常运行。

        写了几篇博客,对lucene的使用方式也越来越清楚,在这里也很有必要总结一下:

        使用lucene包括两个步骤,分别是索引和搜索。

        •索引过程如下:        ◦ 创建一个IndexWriter用来写索引文件,它有几个参数,INDEX_DIR就是索引文件所存放的位置,Analyzer便是用来对文档进行词法分析和语言处理的。        ◦ 创建一个Document代表我们要索引的文档。        ◦ 将不同的Field加入到文档中。我们知道,一篇文档有多种信息,如题目,作者,修改时间,内容等。不同类型的信息用不同的Field来表示。        ◦ IndexWriter调用函数addDocument将索引写到索引文件夹中。     •搜索过程如下:        ◦ IndexReader将磁盘上的索引信息读入到内存,INDEX_DIR就是索引文件存放的位置。        ◦ 创建IndexSearcher准备进行搜索。        ◦ 创建Analyer用来对查询语句进行词法分析和语言处理。        ◦ 创建QueryParser用来对查询语句进行语法分析。        ◦ QueryParser调用parser进行语法分析,形成查询语法树,放到Query中。        ◦ IndexSearcher调用search对查询语法树Query进行搜索,得到结果TopScoreDocCollector。

        对了,必须说一下,上面的例子还用到了一个新的jar包IKAnalyzer.jar包,它是一个开源的中文分词器,如果不使用这个分词器,那么将无法解析中文,比如说我的第一篇关于Lucene的博客就无法解析中文字符串!

    转载请注明原文地址: https://ju.6miu.com/read-1959.html

    最新回复(0)