I wrote the following code to test the SetBoost method when adding a field to a document.
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using Lucene;
using Lucene.Net;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Directory = Lucene.Net.Store.Directory;
using Version = Lucene.Net.Util.Version;
namespace LuceneTest
{
public class LuceneTest
{
static void Main(string[] args)
{
var product1 = new Document();
product1.Add(new Field("Id", "1", Field.Store.YES, Field.Index.NOT_ANALYZED));
var title1 = new Field("title", "Special One", Field.Store.YES, Field.Index.ANALYZED);
title1.SetBoost(2f);
product1.Add(title1);
product1.Add(new Field("synopsis", "special synopsis", Field.Store.YES, Field.Index.ANALYZED));
var product2 = new Document();
product2.Add(new Field("Id", "2", Field.Store.YES, Field.Index.NOT_ANALYZED));
var title2 = new Field("title", "Special Two", Field.Store.YES, Field.Index.ANALYZED);
title2.SetBoost(2f);
product2.Add(title2);
product2.Add(new Field("synopsis", "special synopsis", Field.Store.YES, Field.Index.ANALYZED));
var product3 = new Document();
product3.Add(new Field("Id", "3", Field.Store.YES, Field.Index.NOT_ANALYZED));
var title3 = new Field("title", "Normal One", Field.Store.YES, Field.Index.ANALYZED);
title3.SetBoost(2f);
product3.Add(title3);
product3.Add(new Field("synopsis", "special synopsis", Field.Store.YES, Field.Index.ANALYZED));
var product4 = new Document();
product4.Add(new Field("Id", "4", Field.Store.YES, Field.Index.NOT_ANALYZED));
var title4 = new Field("title", "Normal Two", Field.Store.YES, Field.Index.ANALYZED);
title4.SetBoost(2f);
product4.Add(title4);
product4.Add(new Field("synopsis", "special synopsis", Field.Store.YES, Field.Index.ANALYZED));
var product5 = new Document();
product5.Add(new Field("Id", "5", Field.Store.YES, Field.Index.NOT_ANALYZED));
var title5 = new Field("title", "Special Three", Field.Store.YES, Field.Index.ANALYZED);
title5.SetBoost(2f);
product5.Add(title5);
product5.Add(new Field("synopsis", "normal synopsis", Field.Store.YES, Field.Index.ANALYZED));
Directory directory = FSDirectory.Open(new DirectoryInfo(Environment.CurrentDirectory + "\\Lucene"));
Analyzer analyzer = new StandardAnalyzer();
var writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
writer.AddDocument(product1);
writer.AddDocument(product2);
writer.AddDocument(product3);
writer.AddDocument(product4);
writer.AddDocument(product5);
writer.Optimize();
writer.Close();
Console.WriteLine("searching...");
var indexReader = IndexReader.Open(directory, true);
var indexSearcher = new IndexSearcher(indexReader);
var booleanQuery1 = new BooleanQuery();
booleanQuery1.Add(new BooleanClause(new PrefixQuery(new Term("title", "special")), BooleanClause.Occur.SHOULD));
booleanQuery1.Add(new BooleanClause(new PrefixQuery(new Term("synopsis", "special")), BooleanClause.Occur.SHOULD));
var booleanQuery2 = new BooleanQuery();
booleanQuery2.Add(new BooleanClause((Query)booleanQuery1, BooleanClause.Occur.MUST));
TopDocs results = indexSearcher.Search(booleanQuery2, (Filter)null, 200);
var hits = results.ScoreDocs;
foreach (var hit in hits)
{
var document = indexSearcher.Doc(hit.doc);
Console.WriteLine(document.Get("Id") + " " + document.Get("title") + " " + hit.score);
}
Console.WriteLine("done...");
Console.ReadLine();
}
}
}
I am using Lucene version 2.9.4.1. I set boost on the title field. I would expect products 1, 2, and 5 to be at the top when I search for the term "special" in the title and synopsis fields, but instead I get the following:
searching...
1 Special One 1.414214
2 Special Two 1.414214
3 Normal One 0.3535534
4 Normal Two 0.3535534
5 Special Three 0.3535534
done...
Product 5 has the same score as products 3 and 4 even though it has the term "special" in its title, just not the synopsis.
Any help or thoughts would be appreciated. Thanks
In the book Lucene In Action, second edition, page 49, section 2.5.2 Boosting fields "But remember when you want to change boost on field or document you'll have to fully remove and then read the entire document, or use the updateDocument method, which does the same thing".
As you are testing with the same index file, I think you need to call writer.updateDocument before open the index.
I believe the issue is that you are using PrefixQueries. Prefix Query gets rewritten to a constant scoring query. You can set the rewrite method yourself, like:
Or you could try just using TermQuery instead of PrefixQuery. Either way you should see the field level boost take effect.
Oh, one note, if you wish to understand why results are being scored the way they are, you should take a look at Searcher.explain. Scoring gets complicated, and this is a very handy tool for understanding and tuning it.