How would I recreate the following index using Elasticsearch Nest API?
Here is the json for the index including the mapping:
{
"settings": {
"analysis": {
"filter": {
"trigrams_filter": {
"type": "ngram",
"min_gram": 3,
"max_gram": 3
}
},
"analyzer": {
"trigrams": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"trigrams_filter"
]
}
}
}
},
"mappings": {
"data": {
"_all" : {"enabled" : true},
"properties": {
"text": {
"type": "string",
"analyzer": "trigrams"
}
}
}
}
}
Here is my attempt:
var newIndex = client.CreateIndexAsync(indexName, index => index
.NumberOfReplicas(replicas)
.NumberOfShards(shards)
.Settings(settings => settings
.Add("merge.policy.merge_factor", "10")
.Add("search.slowlog.threshold.fetch.warn", "1s")
.Add("mapping.allow_type_wrapper", true))
.AddMapping<Object>(mapping => mapping
.IndexAnalyzer("trigram")
.Type("string"))
);
The documentation does not mention anything about this?
UPDATE:
Found this post that uses
var index = new IndexSettings()
and then adds Analysis with the string literal json.
index.Add("analysis", @"{json});
Where can one find more examples like this one and does this work?
There are two main ways that you can accomplish this as outlined in the Nest Create Index Documentation:
Here is the way where you directly declare the index settings as Fluent Dictionary entries. Just like you are doing in your example above. I tested this locally and it produces the index settings that match your JSON above.
var response = client.CreateIndex(indexName, s => s
.NumberOfReplicas(replicas)
.NumberOfShards(shards)
.Settings(settings => settings
.Add("merge.policy.merge_factor", "10")
.Add("search.slowlog.threshold.fetch.warn", "1s")
.Add("mapping.allow_type_wrapper", true)
.Add("analysis.filter.trigrams_filter.type", "nGram")
.Add("analysis.filter.trigrams_filter.min_gram", "3")
.Add("analysis.filter.trigrams_filter.max_gram", "3")
.Add("analysis.analyzer.trigrams.type", "custom")
.Add("analysis.analyzer.trigrams.tokenizer", "standard")
.Add("analysis.analyzer.trigrams.filter.0", "lowercase")
.Add("analysis.analyzer.trigrams.filter.1", "trigrams_filter")
)
.AddMapping<Object>(mapping => mapping
.Type("data")
.AllField(af => af.Enabled())
.Properties(prop => prop
.String(sprop => sprop
.Name("text")
.IndexAnalyzer("trigrams")
)
)
)
);
Please note that NEST also includes the ability to create index settings using strongly typed classes as well. I will post an example of that later, if I have time to work through it.
Hope this helps.
In case people have NEST 2.0, the .NumberOfReplicas(x).NumberOfShards(y) are in the Settings area now so specify within the lamba expression under Settings.
EsClient.CreateIndex("indexname", c => c
.Settings(s => s
.NumberOfReplicas(replicasNr)
.NumberOfShards(shardsNr)
)
NEST 2.0 has a lot of changes and moved things around a bit so these answers are a great starting point for sure. You may need to adjust a little for the NEST 2.0 update.
Small example :
EsClient.CreateIndex("indexname", c => c
.NumberOfReplicas(replicasNr)
.NumberOfShards(shardsNr)
.Settings(s => s
.Add("merge.policy.merge_factor", "10")
.Add("search.slowlog.threshold.fetch.warn", "15s")
)
#region Analysis
.Analysis(descriptor => descriptor
.Analyzers(bases => bases
.Add("folded_word", new CustomAnalyzer()
{
Filter = new List<string> { "icu_folding", "trim" },
Tokenizer = "standard"
}
)
.TokenFilters(i => i
.Add("engram", new EdgeNGramTokenFilter
{
MinGram = 1,
MaxGram = 20
}
)
)
.CharFilters(cf => cf
.Add("drop_chars", new PatternReplaceCharFilter
{
Pattern = @"[^0-9]",
Replacement = ""
}
)
#endregion
#region Mapping Categories
.AddMapping<Categories>(m => m
.Properties(props => props
.MultiField(mf => mf
.Name(n => n.Label_en)
.Fields(fs => fs
.String(s => s.Name(t => t.Label_en).Analyzer("folded_word"))
)
)
)
#endregion
);
In case anyone has migrated to NEST 2.4 and has the same question - you would need to define your custom filters and analyzers in the index settings like this:
elasticClient.CreateIndex(_indexName, i => i
.Settings(s => s
.Analysis(a => a
.TokenFilters(tf => tf
.EdgeNGram("edge_ngrams", e => e
.MinGram(1)
.MaxGram(50)
.Side(EdgeNGramSide.Front)))
.Analyzers(analyzer => analyzer
.Custom("partial_text", ca => ca
.Filters(new string[] { "lowercase", "edge_ngrams" })
.Tokenizer("standard"))
.Custom("full_text", ca => ca
.Filters(new string[] { "standard", "lowercase" } )
.Tokenizer("standard"))))));