C#,Lucene,PanGu全文检索,减少数据压力并实现口语化非标准查询

整个流程
1、读取数据库数据
2、分词(我在这里用的是盘古分词)
3、单独存在硬盘上
4、基于Lucene单独存的文件上查找,不是数据库查找

转载请保留http://www.luofenming.com/show.aspx?id=ART2019112800001
核心代码如下(我在这里直接调用的是Lucene和PanGu动态库)
/// <summary>
/// 为keyword做盘古分词
/// </summary>
/// <param name="keyword"></param>
/// <param name="luceneQuery"></param>
/// <returns></returns>
private string AnalyzerKeyword(string keyword)
{
    StringBuilder queryStringBuilder = new StringBuilder();
    LuceneAnalyze analyzer = new LuceneAnalyze();
    string[] words = analyzer.AnalyzerKey(keyword);
    if (words.Length == 1)
    {
        queryStringBuilder.AppendFormat("{0}:{1}* ", "title", words[0]);
    }
    else
    {
        //string.Join(" ", words.Select(w => $"title:{w}"));
        foreach (string word in words)
        {
            queryStringBuilder.AppendFormat("{0}:{1} ", "title", word);
        }
    }
    string result = queryStringBuilder.ToString().TrimEnd();
    //logger.Info(string.Format("AnalyzerKeyword 将 keyword={0}转换为{1}", keyword, result));
    return result;
}

/// <summary>
/// 初始化 将数据写到本地
/// </summary>
public void InitIndex()
{
    DataTable dt = GetList();//从数据库获取数据库数据
    if (dt == null || dt.Rows.Count < 1) { return; }//没有读到数据则不做处理
    FSDirectory directory = FSDirectory.Open(@"D:\data\");//文件夹   在这里是把数据写入了硬盘 也可以放在内存
    using (IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED))//索引写入器
    {
        foreach (DataRow dr in dt.Rows)
        {
            //在这里我只加了 id和title   我们也可以加入创建时间  文章内容等等
            Document doc = new Document();//一条数据
            doc.Add(new Field("id", dr[0].ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));//一个字段  列名  值   是否保存值  是否分词
            doc.Add(new Field("title", dr[1].ToString(), Field.Store.YES, Field.Index.ANALYZED));
            writer.AddDocument(doc);//写进去
        }
        writer.Optimize();//优化  就是合并
    }
}
/// <summary>
/// 获取数据库数据
/// </summary>
/// <returns></returns>
private DataTable GetList()
{
    //因为数据库存数据不是很多  在这里全读取出来了  如果数据大  可以分批处理
    DataSet ds = SQLiteHelper.ExecuteQuery("select ID,Title  from Article");
    if (ds != null && ds.Tables.Count > 0)
    {
        return ds.Tables[0];
    }
    return null;
}

/// <summary>
/// 多个词组查询
/// </summary>
/// <param name="keyword"></param>
public void SearchData(string keyword)  
{
    FSDirectory dir = FSDirectory.Open(@"D:\data\");
    IndexSearcher searcher = new IndexSearcher(dir);//查找器

    QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "title", new PanGuAnalyzer());//解析器
    Query query = parser.Parse(AnalyzerKeyword(keyword));
    TopDocs docs = searcher.Search(query, null, 10000);//找到的数据
    int i = 0;
    foreach (ScoreDoc sd in docs.ScoreDocs)
    {
        if (i++ < 1000) //查询最多1000  我们也可以做分页查询
        {
            Document doc = searcher.Doc(sd.Doc);
            Console.WriteLine("***************************************");
            Console.WriteLine(string.Format("id={0}", doc.Get("id")));
            Console.WriteLine(string.Format("title={0}", doc.Get("title")));
            //在这里我只加了 id和title   我们也可以加入创建时间  文章内容等等
        }
    }
    Console.WriteLine($"一共命中{docs.TotalHits}");
}
/// <summary>
/// 单个词查询
/// </summary>
public void SingelSearchData()
{
    FSDirectory dir = FSDirectory.Open(@"D:\data\");
    IndexSearcher searcher = new IndexSearcher(dir);//查找器
    TermQuery query = new TermQuery(new Term("title", "图书馆"));//包含
    TopDocs docs = searcher.Search(query, null, 10000);//找到的数据
    foreach (ScoreDoc sd in docs.ScoreDocs)
    {
        Document doc = searcher.Doc(sd.Doc);
        Console.WriteLine("***************************************");
        Console.WriteLine(string.Format("id={0}", doc.Get("id")));
        Console.WriteLine(string.Format("title={0}", doc.Get("title")));
        Console.WriteLine(string.Format("time={0}", doc.Get("time")));
        Console.WriteLine(string.Format("price={0}", doc.Get("price")));
        Console.WriteLine(string.Format("content={0}", doc.Get("content")));
    }
    Console.WriteLine("1一共命中了{0}个", docs.TotalHits);

}
/// <summary>
/// 带排序多个词查询
/// </summary>
public void OrderSearchData(string keyword)
{
    FSDirectory dir = FSDirectory.Open(@"D:\data\");
    IndexSearcher searcher = new IndexSearcher(dir);//查找器
    QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "title", new PanGuAnalyzer());//解析器
    Query query = parser.Parse(keyword);
    NumericRangeFilter<int> timeFilter = NumericRangeFilter.NewIntRange("time", 20180000, 20181822, true, true);//过滤
    SortField sortPrice = new SortField("price", SortField.DOUBLE, false);//降序
    SortField sortTime = new SortField("time", SortField.INT, true);//升序
    Sort sort = new Sort(sortTime, sortPrice);//排序 哪个前哪个后

    TopDocs docs = searcher.Search(query, timeFilter, 10000, sort);//找到的数据
    int i = 0;
    foreach (ScoreDoc sd in docs.ScoreDocs)
    {
        if (i++ < 1000)//查询最多1000  我们也可以做分页查询
        {
            Document doc = searcher.Doc(sd.Doc);
            Console.WriteLine("***************************************");
            Console.WriteLine(string.Format("id={0}", doc.Get("id")));
            Console.WriteLine(string.Format("title={0}", doc.Get("title")));
            Console.WriteLine(string.Format("time={0}", doc.Get("time")));
            Console.WriteLine(string.Format("price={0}", doc.Get("price")));
        }
    }
    Console.WriteLine("3一共命中了{0}个", docs.TotalHits);
}

实例源码下载地址链接:https://pan.baidu.com/s/1zTVEilKp3o2GshfM6O5dZA 提取码:wl76
Lucene开源下载下载地址链接:https://pan.baidu.com/s/1mLYcX5lkXcYzNkjGKH6Ehw 提取码:6ztu

评论