2021-05-30 更新
整个流程
1、读取数据库数据
2、分词(我在这里用的是盘古分词)
3、单独存在硬盘上
4、基于Lucene单独存的文件上查找,不是数据库查找
实例源码下载地址: https://pan.baidu.com/s/1cl88QsYp_zhY-yclORZHlg 提取码: s39x
相关的视频教程 https://www.bilibili.com/video/BV1K44y1k7Vq?share_source=copy_web

1、读取数据库数据
2、分词(我在这里用的是盘古分词)
3、单独存在硬盘上
4、基于Lucene单独存的文件上查找,不是数据库查找
实例源码下载地址: https://pan.baidu.com/s/1cl88QsYp_zhY-yclORZHlg 提取码: s39x
相关的视频教程 https://www.bilibili.com/video/BV1K44y1k7Vq?share_source=copy_web
//引用的第三方库有以下5个 //PanGu.Lucene.Analyzer //PanGu //PanGu.HighLight //Lucene.Net //ICSharpCode.SharpZipLib盘古分词类
public class LuceneAnalyze { #region AnalyzerKey /// <summary> /// 将搜索的keyword分词 /// </summary> /// <param name="keyword"></param> /// <returns></returns> public string[] AnalyzerKey(string keyword) { Analyzer analyzer = new PanGuAnalyzer(); QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "title", analyzer); Query query = parser.Parse(this.CleanKeyword(keyword)); if (query is TermQuery) { Term term = ((TermQuery)query).Term; return new string[] { term.Text }; } else if (query is PhraseQuery) { Term[] term = ((PhraseQuery)query).GetTerms(); return term.Select(t => t.Text).ToArray(); } else if (query is BooleanQuery) { BooleanClause[] clauses = ((BooleanQuery)query).GetClauses(); List<string> analyzerWords = new List<string>(); foreach (BooleanClause clause in clauses) { Query childQuery = clause.Query; if (childQuery is TermQuery) { Term term = ((TermQuery)childQuery).Term; analyzerWords.Add(term.Text); } else if (childQuery is PhraseQuery) { Term[] term = ((PhraseQuery)childQuery).GetTerms(); analyzerWords.AddRange(term.Select(t => t.Text)); } } return analyzerWords.ToArray(); } else { return new string[] { keyword }; } } /// <summary> /// 清理头尾and or 关键字 /// </summary> /// <param name="keyword"></param> /// <returns></returns> private string CleanKeyword(string keyword) { if (string.IsNullOrWhiteSpace(keyword)) { } else { bool isClean = false; while (!isClean) { keyword = keyword.Trim(); if (keyword.EndsWith(" AND")) { keyword = string.Format("{0}and", keyword.Remove(keyword.Length - 3, 3)); } else if (keyword.EndsWith(" OR")) { keyword = string.Format("{0}or", keyword.Remove(keyword.Length - 2, 2)); } else if (keyword.StartsWith("AND ")) { keyword = string.Format("and{0}", keyword.Substring(3)); } else if (keyword.StartsWith("OR ")) { keyword = string.Format("or{0}", keyword.Substring(2)); } else if (keyword.Contains(" OR ")) { keyword = keyword.Replace(" OR ", " or "); } else if (keyword.Contains(" AND ")) { keyword = keyword.Replace(" AND ", " and "); } else isClean = true; } } return QueryParser.Escape(keyword); } #endregion AnalyzerKey }
Lucene初始化和搜索的方法
/// <summary> /// 为keyword做盘古分词 /// </summary> /// <param name="keyword"></param> /// <param name="luceneQuery"></param> /// <returns></returns> private string AnalyzerKeyword(string keyword) { StringBuilder queryStringBuilder = new StringBuilder(); LuceneAnalyze analyzer = new LuceneAnalyze(); string[] words = analyzer.AnalyzerKey(keyword); if (words.Length == 1) { queryStringBuilder.AppendFormat("{0}:{1}* ", "title", words[0]); } else { //string.Join(" ", words.Select(w => $"title:{w}")); foreach (string word in words) { queryStringBuilder.AppendFormat("{0}:{1} ", "title", word); } } string result = queryStringBuilder.ToString().TrimEnd(); //logger.Info(string.Format("AnalyzerKeyword 将 keyword={0}转换为{1}", keyword, result)); return result; } private FSDirectory CreateFSDirectory() { string dirPath = AppDomain.CurrentDomain.BaseDirectory + "\\LuceneData";//文件夹 在这里是把数据写入了硬盘 也可以放在内存 if (!System.IO.Directory.Exists(dirPath)) { System.IO.Directory.CreateDirectory(dirPath); } return FSDirectory.Open(dirPath); } /// <summary> /// 初始化 将数据写到本地 /// </summary> public void InitIndex() { DataTable dt = GetList();//从数据库获取数据库数据 if (dt == null || dt.Rows.Count < 1) { return; }//没有读到数据则不做处理 FSDirectory directory = CreateFSDirectory(); using (IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED))//索引写入器 { foreach (DataRow dr in dt.Rows) { //在这里我只加了 id和title 我们也可以加入创建时间 文章内容等等 Document doc = new Document();//一条数据 doc.Add(new Field("id", dr[0].ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//一个字段 列名 值 是否保存值 是否分词 doc.Add(new Field("title", dr[1].ToString(), Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc);//写进去 } writer.Optimize();//优化 就是合并 } } /// <summary> /// 获取数据库数据 /// </summary> /// <returns></returns> private DataTable GetList() { //因为数据库存数据不是很多 在这里全读取出来了 如果数据大 可以分批处理 DataSet ds = SQLiteHelper.ExecuteQuery("select ID,Title from Article"); if (ds != null && ds.Tables.Count > 0) { return ds.Tables[0]; } return null; } /// <summary> /// 多个词组查询 /// </summary> /// <param name="keyword"></param> public void SearchData(string keyword) { FSDirectory dir = CreateFSDirectory(); IndexSearcher searcher = new IndexSearcher(dir);//查找器 QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "title", new PanGuAnalyzer());//解析器 Query query = parser.Parse(AnalyzerKeyword(keyword)); TopDocs docs = searcher.Search(query, null, 10000);//找到的数据 int i = 0; foreach (ScoreDoc sd in docs.ScoreDocs) { if (i++ < 1000) //查询最多1000 我们也可以做分页查询 { Document doc = searcher.Doc(sd.Doc); Console.WriteLine("***************************************"); Console.WriteLine(string.Format("id={0}", doc.Get("id"))); Console.WriteLine(string.Format("title={0}", doc.Get("title"))); //在这里我只加了 id和title 我们也可以加入创建时间 文章内容等等 } } Console.WriteLine($"一共命中{docs.TotalHits}"); } /// <summary> /// 单个词查询 /// </summary> public void SingelSearchData() { FSDirectory dir = CreateFSDirectory(); IndexSearcher searcher = new IndexSearcher(dir);//查找器 TermQuery query = new TermQuery(new Term("title", "图书馆"));//包含 TopDocs docs = searcher.Search(query, null, 10000);//找到的数据 foreach (ScoreDoc sd in docs.ScoreDocs) { Document doc = searcher.Doc(sd.Doc); Console.WriteLine("***************************************"); Console.WriteLine(string.Format("id={0}", doc.Get("id"))); Console.WriteLine(string.Format("title={0}", doc.Get("title"))); Console.WriteLine(string.Format("time={0}", doc.Get("time"))); Console.WriteLine(string.Format("price={0}", doc.Get("price"))); Console.WriteLine(string.Format("content={0}", doc.Get("content"))); } Console.WriteLine("1一共命中了{0}个", docs.TotalHits); } /// <summary> /// 带排序多个词查询 /// </summary> public void OrderSearchData(string keyword) { FSDirectory dir = CreateFSDirectory(); IndexSearcher searcher = new IndexSearcher(dir);//查找器 QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "title", new PanGuAnalyzer());//解析器 Query query = parser.Parse(keyword); NumericRangeFilter<int> timeFilter = NumericRangeFilter.NewIntRange("time", 20180000, 20181822, true, true);//过滤 SortField sortPrice = new SortField("price", SortField.DOUBLE, false);//降序 SortField sortTime = new SortField("time", SortField.INT, true);//升序 Sort sort = new Sort(sortTime, sortPrice);//排序 哪个前哪个后 TopDocs docs = searcher.Search(query, timeFilter, 10000, sort);//找到的数据 int i = 0; foreach (ScoreDoc sd in docs.ScoreDocs) { if (i++ < 1000)//查询最多1000 我们也可以做分页查询 { Document doc = searcher.Doc(sd.Doc); Console.WriteLine("***************************************"); Console.WriteLine(string.Format("id={0}", doc.Get("id"))); Console.WriteLine(string.Format("title={0}", doc.Get("title"))); Console.WriteLine(string.Format("time={0}", doc.Get("time"))); Console.WriteLine(string.Format("price={0}", doc.Get("price"))); } } Console.WriteLine("3一共命中了{0}个", docs.TotalHits); }
