Manipulating query using Custom Query Parser in So

2019-08-26 17:21发布

问题:

I have tried to create a CustomQueryParser where I am making use of OpenNLP libraries as well.

My objective is if i have a query "How many defective rims are causing failure in ABC tyres in China"

I want the final query to be something like "defective rims failure tyres China" which then would go to the Analyzer for further processing.

This is my code for QueryParserPlugin -

package com.mycompany.lucene.search;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QParserPlugin;
import com.mycompany.lucene.search.QueryParser;

public class QueryParserPlugin extends QParserPlugin {
@Override
  public QParser createParser(String qstr, SolrParams localParams, 
SolrParams params, SolrQueryRequest req) {
    return new QueryParser(qstr, localParams, params, req, "body_txt_str");
  }
}

And the code for my QueryParser -

package com.mycompany.lucene.search;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.QParser;
import org.apache.solr.search.SyntaxError;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;

public class QueryParser extends QParser {

  private String fieldName;

public QueryParser(String qstr, SolrParams localParams, SolrParams params, 
SolrQueryRequest req,
      String defaultFieldName) {

    super(qstr, localParams, params, req);

    fieldName = localParams.get("field");
    if (fieldName == null) {
      fieldName = params.get("df");
    }
  }
@Override
  public Query parse() throws SyntaxError {
    Analyzer analyzer = req.getSchema().getQueryAnalyzer(); 
    InputStream tokenModelIn = null;
    InputStream posModelIn = null;
    try {
        tokenModelIn = new FileInputStream("/Files/en-token.bin");
     } catch (FileNotFoundException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

    TokenizerModel tokenModel = null;
    try {
        tokenModel = new TokenizerModel(tokenModelIn);
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    Tokenizer tokenizer = new TokenizerME(tokenModel);
    String tokens[] = tokenizer.tokenize(qstr);

    try {
        posModelIn = new FileInputStream("/Files/en-pos-maxent.bin");
    } catch (FileNotFoundException e) {
        // TODO Auto-generated catch block
         e.printStackTrace();
    }
      // loading the parts-of-speech model from stream
    POSModel posModel = null;
    try {
         posModel = new POSModel(posModelIn);
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    // initializing the parts-of-speech tagger with model 
    POSTaggerME posTagger = new POSTaggerME(posModel);
    // Tagger tagging the tokens
    String tags[] = posTagger.tag(tokens);
    String final_query = "";
    for(int i=0;i<tokens.length;i++){
        if (tags[i]=="JJ" || tags[i]=="NNS" || tags[i]=="NN") {
            final_query = final_query + " " +tokens[i];
        }
    }
    TermQuery tq= new TermQuery(new Term(fieldName,final_query));  
    return tq; 
    }
}

I then exported this as a jar and added these jars to my solrconfig.xml -

<lib dir="${solr.install.dir:../../../..}/contrib/customparser/lib" 
 regex=".*\.JAR" />
<lib dir="${solr.install.dir:../../../..}/contrib/analysis-extras/lib" 
 regex="opennlp-.*\.jar" />

But getting the below error :

Caused by:

java.lang.NoClassDefFoundError: opennlp/tools/tokenize/Tokenizer
    at java.lang.Class.forName0(Native Method)
    at java.lang.Class.forName(Class.java:348)
    at org.apache.solr.core.SolrResourceLoader.findClass(SolrResourceLoader.java:541)
    at org.apache.solr.core.SolrResourceLoader.findClass(SolrResourceLoader.java:488)
    at org.apache.solr.core.SolrCore.createInstance(SolrCore.java:786)
    at org.apache.solr.core.PluginBag.createPlugin(PluginBag.java:135)
    at org.apache.solr.core.PluginBag.init(PluginBag.java:271)
    at org.apache.solr.core.PluginBag.init(PluginBag.java:260)
    at org.apache.solr.core.SolrCore.<init>(SolrCore.java:957)
    ... 9 more

This is my first time creating a CustomQueryParser, Could you please help me out.

Thanks

回答1:

most probably your path

${solr.install.dir:../../../..}/contrib/analysis-extras/lib

doesn't contain the relevant opennlp jars or the regex is not appropriate. that's the first thing to check.

you have to either "bundle" also the opennlp dependencies in your custom query parser jar (e.g. if you use maven to build your project, using maven-assembly-plugin, maven-shade-plugin, etc.) or make sure the opennlp specific jars in the relevant directive in your solrconfig.xml are matched.