I am trying to setup a Solr index for searching against a database of product information. For this purpose, I have populated a database of product details and used Solr 6.0.0. For a given product detail (title, brand, other keywords), I would like to know if there is a product in the database that closely matches the given details. I have started dataimport and created the index. However, when I search, the scores of the matching product are all the same in spite of the products listed being different. I have tried with different combinations of search keywords, but the result is similar in every case. I have also tried using different Tokenizers and Filters.
Sample of schema.xml
I have tried is:
<?xml version="1.0" encoding="UTF-8" ?>
<schema name="example" version="1.5">
<field name="id" type="Int" indexed="true" stored="true"/>
<field name="name" type="text_general" indexed="true" stored="true" />
<field name="brand" type="text_general" indexed="true" stored="true"/>
<field name="category" type="text_general" indexed="true" stored="true"/>
<field name="description" type="text_general" indexed="true" stored="true" />
<field name="catchall" type="text_general" indexed="true" stored="true" multiValued="true" />
<copyField source="id" dest="catchall" />
<copyField source="name" dest="catchall" />
<copyField source="brand" dest="catchall" />
<copyField source="category" dest="catchall" />
<copyField source="description" dest="catchall" />
<uniqueKey>id</uniqueKey>
<defaultSearchField>catchall</defaultSearchField>
<types>
<fieldtype name="string" class="solr.StrField" sortMissingLast="true" />
<fieldtype name="Int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
<fieldtype name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<charFilter class="solr.HTMLStripCharFilterFactory"/>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1"
splitOnNumerics="1"
splitOnCaseChange="1"
generateNumberParts="1"
catenateWords="0"
catenateNumbers="0"
catenateAll="0"
preserveOriginal="1"
/>
<filter class="solr.ASCIIFoldingFilterFactory" preserveOriginal="true"/>
<filter class="solr.ICUFoldingFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<charFilter class="solr.HTMLStripCharFilterFactory"/>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1"
splitOnNumerics="1"
splitOnCaseChange="1"
generateNumberParts="1"
catenateWords="0"
catenateNumbers="0"
catenateAll="0"
preserveOriginal="1"
/>
<filter class="solr.ASCIIFoldingFilterFactory" preserveOriginal="true"/>
<filter class="solr.ICUFoldingFilterFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="ignored" stored="false" indexed="false" multiValued="true" class="solr.StrField" />
</types>
</schema>
Edit
The entity definition from data-config.xml
is as below
<entity name="master_products"
pk="id"
query="select p.* ,b.* from master_products p ,master_brands b where b.id=p.brand_id"
deltaImportQuery="SELECT * FROM master_products WHERE product_name='${dataimporter.delta.product_name}' "
>
<!-- or b.brnad='${dataimporter.delta.brand}' -->
<field column="product_name" name="name"/>
<field column="product_description" name="description"/>
<field column="id" name="id"/>
<field column="mrp" name="mrp"/>
<field column="brand" name="brand"/>
<entity name="master_brands"
query="select * from master_brands"
deltaImportQuery="select * from master_brands where id ={master_products.brand_id}" processor="SqlEntityProcessor" cacheImpl="SortedMapBackedCache" >
</entity>
<entity name="master_product_categories"
query="select * from master_product_categories"
deltaImportQuery="select * from master_product_categories where id ={master_products. product_category_id}" processor="SqlEntityProcessor" cacheImpl="SortedMapBackedCache" >
<field column="category" name="category" />
</entity>
</entity>
Edit The query is as below.
http://localhost:8983/solr/myproducts/select?fl=* score&fq=brand:Nikon&fq=mrp:28950*&indent=on&q=name:*"Nikon D3200 (Black) DSLR with AF-S 18-55mm VR Kit Lens"*&wt=json
I would like help in achieving my goal. Can you please direct me to creating the proper configuration that would meet my purpose? Thanks in advance.