.net Core,Lucene.net,Jieba.net站内搜索

2021-12-05 更新
核心代码
/// <summary>
/// Jieba.net分词
/// </summary>
/// <param name="key"></param>
/// <returns></returns>
public List<string> CutKeyWord(string key)
{
    var rs = new List<string>();
    var segmenter = new JiebaSegmenter();
    var list = segmenter.Cut(key);
    if (list != null && list.Count() > 0)
    {
        foreach (var item in list)
        {
            if (string.IsNullOrEmpty(item) || item.Length <= 1)
            {
                continue;
            }

            rs.Add(item);
        }
    }
    return rs;
}
/// <summary>
/// 将检索数据写到硬盘上
/// </summary>
public void UpdateMerchIndex()
{
    try
    {
        Console.WriteLine($"[{DateTime.Now}] UpdateMerchIndex job begin...");

        var indexDir = Path.Combine(System.IO.Directory.GetCurrentDirectory(), "temp", "lucene", "merchs");
        if (System.IO.Directory.Exists(indexDir) == false)
        {
            System.IO.Directory.CreateDirectory(indexDir);
        }
        var VERSION = Lucene.Net.Util.LuceneVersion.LUCENE_48;
        var director = FSDirectory.Open(new DirectoryInfo(indexDir));
        var analyzer = new JieBaAnalyzer(TokenizerMode.Search);
        var indexWriterConfig = new IndexWriterConfig(VERSION, analyzer);

        using (var indexWriter = new IndexWriter(director, indexWriterConfig))
        {
            if (File.Exists(Path.Combine(indexDir, "segments.gen")) == true)
            {
                indexWriter.DeleteAll();
            }

            var query = MovieDatabase.Database;

            var addDocs = new List<Document>();
            foreach (var q in query)
            {
                var doc = new Document();
                var field1 = new Int32Field("id", q.Id, Field.Store.YES);
                var field2 = new TextField("title", q.Title, Field.Store.YES);
                var field3 = new TextField("Summary", q.Summary, Field.Store.YES);
                doc.Add(field1);
                doc.Add(field2);
                doc.Add(field3);
                addDocs.Add(doc);// 添加文本到索引中
            }
            if (addDocs.Count > 0)
            {
                indexWriter.AddDocuments(addDocs);
            }
        }

        Console.WriteLine($"[{DateTime.Now}] UpdateMerchIndex job end!");
    }
    catch (Exception ex)
    {
        Console.WriteLine($"UpdateMerchIndex ex={ex}");
    }
}


/// <summary>
/// 根据关键词搜索
/// </summary>
/// <param name="key"></param>
/// <returns></returns>
public List<Movie> SearchMerchs(string key)
{
    if (string.IsNullOrEmpty(key))
    {
        return null;
    }
    key = key.Trim().ToLower();

    var rs = new List<Movie>();
    try
    {
        var indexDir = Path.Combine(System.IO.Directory.GetCurrentDirectory(), "temp", "lucene", "merchs");

        //var VERSION = Lucene.Net.Util.LuceneVersion.LUCENE_48;

        if (System.IO.Directory.Exists(indexDir) == true)
        {
            var reader = DirectoryReader.Open(FSDirectory.Open(new DirectoryInfo(indexDir)));
            var search = new IndexSearcher(reader);

            var directory = FSDirectory.Open(new DirectoryInfo(indexDir), NoLockFactory.GetNoLockFactory());
            var reader2 = IndexReader.Open(directory);
            var searcher = new IndexSearcher(reader2);

            //var parser = new QueryParser(VERSION, "title", new JieBaAnalyzer(TokenizerMode.Search));
            var booleanQuery = new BooleanQuery();

            var list = new JiebaHelper().CutKeyWord(key);
            foreach (var word in list)
            {
                var query1 = new TermQuery(new Term("Summary", word));
                booleanQuery.Add(query1, Occur.SHOULD);
            }

            var collector = TopScoreDocCollector.Create(1000, true);//1000 最多最找1000条
            searcher.Search(booleanQuery, null, collector);
            var docs = collector.GetTopDocs(0, collector.TotalHits).ScoreDocs;//0 从索引0开始

            foreach (var d in docs)
            {
                var num = d.Doc;
                var document = search.Doc(num);// 拿到指定的文档
                var title = document.Get("title");
                var id = document.Get("id");
                var Summary = document.Get("Summary");
                Movie movie = new Movie();
                movie.Id = int.Parse(id);
                movie.Title = title;
                rs.Add(movie);
            }
        }
    }
    catch (Exception ex)
    {
        Console.WriteLine($"SearchMerchs ex={ex}");
    }
    return rs;
}
转载请保留http://www.luofenming.com/show.aspx?id=ART2019121600001

源码Dome 链接: https://pan.baidu.com/s/1cVX3lJiuf6KUcdtstvVKHg 提取码: 3822

注意:我上传百度云的源码,ConsoleApp1引用luceneCore程序集时,也要引用Lucene.Net.Analysis.Common.dll。否则会出异常