[Fixed]-Partial search not working on Elasticsearch+Haystack in spite of using Ngram and Edgengram for building index

1👍

Haystack is not very good with ElasticSearch, you cannot use proper indexing values so you have to provide custom ElasticSearchBackEnd to enable it:

#in a search_backends.py file
from django.conf import settings
from haystack.backends.elasticsearch_backend import (
    ElasticsearchSearchBackend,
    ElasticsearchSearchEngine
)
from haystack.fields import EdgeNgramField as BaseEdgeNgramField, NgramField as BaseNgramField
from haystack.indexes import CharField

#just an example of which degree of configuration could be possible
CUSTOM_FIELD_TYPE = {
    'completion': {
        'type': 'completion',
        'payloads': True,
        'analyzer': 'suggest_analyzer',
        'preserve_separators': True,
        'preserve_position_increments': False
    },
}

# Custom Backend
class CustomElasticBackend(ElasticsearchSearchBackend):

    DEFAULT_ANALYZER = None

    def __init__(self, connection_alias, **connection_options):
        super(CustomElasticBackend, self).__init__(
                                connection_alias, **connection_options)
        user_settings = getattr(settings, 'ELASTICSEARCH_INDEX_SETTINGS', None)
        self.DEFAULT_ANALYZER = getattr(settings, 'ELASTICSEARCH_DEFAULT_ANALYZER', "snowball")
        if user_settings:
            setattr(self, 'DEFAULT_SETTINGS', user_settings)

    def build_schema(self, fields):
        content_field_name, mapping = super(CustomElasticBackend,
                                              self).build_schema(fields)

        for field_name, field_class in fields.items():
            field_mapping = mapping[field_class.index_fieldname]

            index_analyzer = getattr(field_class, 'index_analyzer', None)
            search_analyzer = getattr(field_class, 'search_analyzer', None)
            field_analyzer = getattr(field_class, 'analyzer', self.DEFAULT_ANALYZER)

            if field_mapping['type'] == 'string' and field_class.indexed:
                field_mapping["term_vector"] = "with_positions_offsets"
                if not hasattr(field_class, 'facet_for') and not field_class.field_type in('ngram', 'edge_ngram'):
                    field_mapping['analyzer'] = field_analyzer

            if field_class.field_type in CUSTOM_FIELD_TYPE:
                field_mapping = CUSTOM_FIELD_TYPE.get(field_class.field_type).copy()

            if index_analyzer and search_analyzer:
                field_mapping['index_analyzer'] = index_analyzer
                field_mapping['search_analyzer'] = search_analyzer
                if 'analyzer' in field_mapping:
                    del(field_mapping['analyzer'])

            mapping.update({field_class.index_fieldname: field_mapping})
        return (content_field_name, mapping)


class CustomElasticSearchEngine(ElasticsearchSearchEngine):
    backend = CustomElasticBackend


# Custom fields, just use the ones you need or create yours
class CustomFieldMixin(object):

    def __init__(self, **kwargs):
        self.analyzer = kwargs.pop('analyzer', None)
        self.index_analyzer = kwargs.pop('index_analyzer', None)
        self.search_analyzer = kwargs.pop('search_analyzer', None)
        super(CustomFieldMixin, self).__init__(**kwargs)

class CustomCharField(CustomFieldMixin, CharField):
    pass


class CustomCompletionField(CustomFieldMixin, CharField):
    field_type = 'completion'


class CustomEdgeNgramField(CustomFieldMixin, BaseEdgeNgramField):
    pass


class CustomNgramField(CustomFieldMixin, BaseNgramField):
    pass




#settings.py
ELASTICSEARCH_INDEX_SETTINGS = {
    'settings': {
        "analysis": {
            "analyzer": {
                "custom_analyzer": {
                    "type": "custom",
                    "tokenizer": "standard",
                    "filter":  [ "lowercase", "asciifolding" ]
                },
                "str_index_analyzer" : {
                    "type": "custom",
                    "tokenizer" : "haystack_ngram_tokenizer",
                    "filter" : ["stopwords", "asciifolding", "lowercase", "snowball", "elision", "worddelimiter"]
                },
                "str_search_analyzer" : {
                    "type": "custom",
                    "tokenizer" : "standard",
                    "filter" : ["stopwords", "asciifolding", "lowercase", "snowball", "elision", "worddelimiter"]
                },
                "suggest_analyzer": {
                    "type":"custom",
                    "tokenizer":"standard",
                    "filter":[
                        "stopwords",
                        "standard",
                        "lowercase",
                        "asciifolding"
                    ]
                },
            },
            "tokenizer": {
                "haystack_ngram_tokenizer": {
                    "type": "nGram",
                    "min_gram": 2,
                    "max_gram": 20,
                },
            },
            "filter": {
                "elision": {
                    "type": "elision",
                    "articles": ["l", "m", "t", "qu", "n", "s", "j", "d"]
                },
                "stopwords": {
                    "type": "stop",
                    "stopwords": ["_french_", "_english_"],
                    "ignore_case": True
                },
                "worddelimiter": {
                    "type": "word_delimiter"
                }
            }
        }
    }
}

#Haystack settings
HAYSTACK_CONNECTIONS = {
    'default': {
        ...
        'ENGINE': 'path.to.search_backends.CustomElasticSearchEngine',
        ...
    },
}

👤Pierre Criulanscy

Trying to understand FormSet ManagementForm data (because error = "it's missing")

0👍

Using elasticsearch-2.x with django-haystack versions <2.5 causes this issue. Check if your versions match these.

elasticsearch-2.x onwards, boost is no longer a support meta-data which haystack passes to it. (Please refer to the answer https://stackoverflow.com/a/36847352/5108155)
This issue was fixed in 2.5 verison of haystack.

While building (or updating) your index, elasticsearch never got the ngram analyzer you intended to apply to the field. You can validate this by manually running-
curl 'http://<elasticsearch_address>/<index_name>/?pretty'
This will show only the types on the fields and no analyzer property.

Interesting thing is that haystack doesn’t throw this exception because of an internal silently_fail property in the ElasticSearchBackend class.

👤Siddharth Dagar

Source:stackexchange.com

Leave a comment Cancel reply