I will show how to implement in few minutes search engine for your website or windows application or any other type of application.
It's pretty easy and I saw the very good performance as well.
I will be using Lucene.NET open source: https://lucenenet.apache.org/
High-level architecture diagram:
So, let get started from Indexing process, it's extremely simple code, I just using console application:
Page Model in order to create Pages list to Index. Phase II, you can change this list to a function that can crawl your pages in a recursive manner on your website, then you don't need to have this list.
public class Page
{
public string PageTitle { set; get; }
public string PageBody { set; get; }
public string PageUrl { set; get; }
}
static void Main(string[] args)
{
const string directoryPath = @"C:\GitSource\Search Web Client\App_Data\LuceneIndexes";
_directory = FSDirectory.Open(directoryPath);
var files = _directory.ListAll();
//Delete previous Indexing Results
foreach (var file in files)
{
_directory.DeleteFile(file);
}
Analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
Writer = new IndexWriter(_directory, Analyzer, IndexWriter.MaxFieldLength.UNLIMITED);
CreateIndex();
Console.WriteLine("Indexing DONE");
Console.ReadKey();
}
private static void CreateIndex()
{
var pages = new List<Page>
{
new Page {PageTitle = "Index", PageUrl = "http://localhost:63461/"},
new Page {PageTitle = "About", PageUrl = "http://localhost:63461/Home/About"},
new Page {PageTitle = "Contact", PageUrl = "http://localhost:63461/Home/Contact"}
};
foreach (var page in pages)
{
page.PageBody = GetPageBody(page.PageUrl);
var doc = new Document();
doc.Add(new Field("postUrl", page.PageUrl, Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc.Add(new Field("postTitle", page.PageTitle, Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc.Add(new Field("postBody", page.PageBody, Field.Store.YES,
Field.Index.ANALYZED));
Writer.AddDocument(doc);
Console.WriteLine(string.Format("Indexing Page {0}", page.PageTitle));
}
Writer.Optimize();
Writer.Flush(true, true, true);
Writer.Commit();
Writer.Dispose();
}
Indexing Application Ready.
After running this process you will find Lucene indexing files under:
const string directoryPath = @"C:\GitSource\Search Web Client\App_Data\LuceneIndexes";
Now let see how we can search in these indexing files using Lucene.NET search engine.
I will create ASP.NET MVC application for instance. I will add one new page - "Search" and searching in 3 pages defined previously in Indexing application above:
//Home
new Page {PageTitle = "Index", PageUrl = "http://localhost:63461/"},
//About
new Page {PageTitle = "About", PageUrl = "http://localhost:63461/Home/About"},
//Contact
new Page {PageTitle = "Contact", PageUrl = "http://localhost:63461/Home/Contact"}
Let's add a new controller to HomeController in order to get Search Page and add search functionality for POST call from cshtml:
public ActionResult Search(SearchingQuery searchingQuery)
{
if (searchingQuery == null)
{
throw new ArgumentException("Searching Query Can't be NULL");
}
if (string.IsNullOrEmpty(searchingQuery.Query))
{
return View();
}
var searchResults = new List<SearchResults>();
string indexDirectory = Server.MapPath("~/App_Data/LuceneIndexes");
var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
IndexSearcher searcher = new IndexSearcher(FSDirectory.Open(indexDirectory));
var parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_30, "postBody", analyzer);
var searchQuery = parser.Parse(searchingQuery.Query);
TopDocs hits = searcher.Search(searchQuery, 200);
int results = hits.TotalHits;
for (int i = 0; i < results; i++)
{
Document doc = searcher.Doc(hits.ScoreDocs[i].Doc);
var searchResult = new SearchResults
{
PageUrl = doc.Get("postUrl"),
PageTitle = doc.Get("postTitle"),
PageBody = doc.Get("postBody")
};
searchResults.Add(searchResult);
}
analyzer.Close();
searcher.Dispose();
return View(searchResults);
}
and the Model for SearchResults:
public class SearchResults
{
public string PageUrl { get; set; }
public string PageTitle { get; set; }
public string PageBody { get; set; }
}
Search.cshtml
<div class="container">
<div class="row">
@using (Html.BeginForm("Search", "Home", FormMethod.Post))
{
<div class="col-lg-4 col-lg-offset-4">
<input type="text" id="Query" name="Query" placeholder="Please Search Something..." />
<button type="submit">Search</button>
</div>
}
</div>
<div class="row">
<h2>Results</h2>
</div>
<div class="row">
@if (Model == null || Model.Count == 0)
{
<div class="row">
Sorry, No Results Found
</div>
}
else
{
<ul>
@foreach (var result in Model)
{
<li>
<a href="@result.PageUrl">@result.PageTitle</a>
</li>
}
</ul>
}
</div>
</div>
Web Site Structure
That's it :)
Thanks!
No comments:
Post a Comment