diff --git a/README.txt b/README.txt index 900faff..5989273 100644 --- a/README.txt +++ b/README.txt @@ -16,3 +16,5 @@ For installation instructions, see the file "INSTALL.txt" in this directory; for instructions on how to use this application, and on what it provides, see the file "overview.txt" in the "docs/" directory. + +change \ No newline at end of file diff --git a/fts/backends/base.py b/fts/backends/base.py index 69ee100..e3377bd 100644 --- a/fts/backends/base.py +++ b/fts/backends/base.py @@ -36,13 +36,12 @@ def __call__(self, query=None, **kwargs): def contribute_to_class(self, cls, name): # Instances need to get to us to update their indexes. search_managers = getattr(cls, '_search_managers', []) - search_managers.append(self) + if not isinstance(self.fields,type(None)): + search_managers.append(self) setattr(cls, '_search_managers', search_managers) super(BaseManager, self).contribute_to_class(cls, name) - if not self.fields: self.fields = self._find_text_fields() - if isinstance(self.fields, (list, tuple)): self._fields = {} for field in self.fields: @@ -66,6 +65,14 @@ def update_index(self, pk=None): def search(self, query, **kwargs): return self._search(query, **kwargs) + def _word_count(self, **kwargs): + raise NotImplementedError + + def word_count(self, **kwargs): + query = None + return self._word_count(**kwargs) + + def _find_text_fields(self): """ Return the names of all CharField and TextField fields defined for this manager's model. @@ -82,7 +89,7 @@ class BaseModel(models.Model): class Meta: abstract = True - @transaction.commit_on_success + #@transaction.commit_on_success def update_index(self): """ Update the index. @@ -91,7 +98,7 @@ def update_index(self): sm._update_index(pk=self.pk) @classmethod - @transaction.commit_on_success + #@transaction.commit_on_success def update_indexes(cls): """ Update the index. @@ -99,10 +106,17 @@ def update_indexes(cls): for sm in getattr(cls, '_search_managers', []): sm._update_index(None) - @transaction.commit_on_success + #@transaction.commit_on_success def save(self, *args, **kwargs): update_index = kwargs.pop('update_index', True) super(BaseModel, self).save(*args, **kwargs) if update_index and getattr(self, '_auto_reindex', True): for sm in getattr(self.__class__, '_search_managers', []): sm._update_index(pk=self.pk) + + + + + + + diff --git a/fts/backends/pgsql.py b/fts/backends/pgsql.py index adaf549..cecddd9 100644 --- a/fts/backends/pgsql.py +++ b/fts/backends/pgsql.py @@ -1,11 +1,36 @@ "Pgsql Fts backend" import django +from django.db.utils import DatabaseError DJANGO_VERSION = django.VERSION -from django.db import connection, transaction -from django.db.models.fields import FieldDoesNotExist +from django.db.models.fields import FieldDoesNotExist +from django.core.exceptions import FieldError from fts.backends.base import InvalidFtsBackendError from fts.backends.base import BaseClass, BaseModel, BaseManager +from django.conf import settings +from django.db import transaction +import sys + +if (DJANGO_VERSION[0] <= 1) and (DJANGO_VERSION[1] <=2): + NEW_DJANGO = False + from django.db import connection + this_backend = settings.DATABASE_ENGINE +else: + NEW_DJANGO = True + from django.db import connections + try: + fts_database = settings.FTS_DATABASE + except AttributeError: + from ...fts import settings as fts_settings + fts_database = fts_settings.FTS_DATABASE + connection = connections[fts_database] + this_backend = settings.DATABASES[fts_database]['ENGINE'] +try: + from south.modelsinspector import add_introspection_rules + add_introspection_rules([], ["^fts\.backends\.pgsql\.VectorField"]) +except: + pass + qn = connection.ops.quote_name @@ -29,6 +54,11 @@ 'tr' : 'turkish', } +def require_postgres(connection): + engine = connection.settings_dict['ENGINE'] + if 'psycopg2' not in engine and 'postgis' not in engine: + raise FieldError("Vector fields are currently implemented only for PostgreSQL/psycopg2") + class VectorField(models.Field): def __init__(self, *args, **kwargs): kwargs['null'] = True @@ -37,20 +67,16 @@ def __init__(self, *args, **kwargs): super(VectorField, self).__init__(*args, **kwargs) def db_type(self, connection=None): - return 'tsvector' + try: + require_postgres(connection) + return 'tsvector' + except: + return 'char(%s)' % 5 class SearchClass(BaseClass): def __init__(self, server, params): - from django.conf import settings - if (DJANGO_VERSION[0] <= 1) and (DJANGO_VERSION[1] <=2): - if not settings.DATABASE_ENGINE in ['postgresql', 'postgresql_psycopg2']: - raise InvalidFtsBackendError("PostgreSQL with tsearch2 support is needed to use the pgsql FTS backend") - else: - databases = [] - for database in settings.DATABASES.values(): - databases.append(database['ENGINE']) - if not databases in ['postgresql', 'postgresql_psycopg2','django.db.backends.postgresql_psycopg2']: - raise InvalidFtsBackendError("PostgreSQL with tsearch2 support is needed to use the pgsql FTS backend") + if not this_backend in ['postgresql', 'postgresql_psycopg2','django.db.backends.postgresql_psycopg2']: + raise InvalidFtsBackendError("PostgreSQL with tsearch2 support is needed to use the pgsql FTS backend") self.backend = 'pgsql' class SearchManager(BaseManager): @@ -83,9 +109,22 @@ def _vector_sql(self, field, weight): try: f = self.model._meta.get_field(field) return ("setweight(to_tsvector('%s', coalesce(%s,'')), '%s')" % (self.language, qn(f.column), weight), []) +# print self.model.__dict__ +# t_size = getattr(self.model, f.column) +# if sys.getsizeof(t_size) < 1048575: +# return ("setweight(to_tsvector('%s', coalesce(%s,'')), '%s')" % (self.language, qn(f.column), weight), []) +# else: +# a = t_size +# while sys.getsizeof(t_size) >= 1048575: +# a = a[:-1] +# setattr(self.model, a) +# return ("setweight(to_tsvector('%s', coalesce(%s,'')), '%s')" % (self.language, qn(a), weight), []) + except FieldDoesNotExist: return ("setweight(to_tsvector('%s', %%s), '%s')" % (self.language, weight), [field]) + + #@transaction.commit_on_success def _update_index_update(self, pk=None): # Build a list of SQL clauses that generate tsvectors for each specified field. clauses = [] @@ -95,7 +134,6 @@ def _update_index_update(self, pk=None): clauses.append(v[0]) params.extend(v[1]) vector_sql = ' || '.join(clauses) - where = '' # If one or more pks are specified, tack a WHERE clause onto the SQL. if pk is not None: @@ -107,7 +145,10 @@ def _update_index_update(self, pk=None): sql = 'UPDATE %s SET %s = %s%s' % (qn(self.model._meta.db_table), qn(self.vector_field.column), vector_sql, where) cursor = connection.cursor() cursor.execute(sql, tuple(params)) - transaction.set_dirty() + if NEW_DJANGO: + transaction.commit_unless_managed(using=fts_database) + else: + transaction.set_dirty() def _update_index_walking(self, pk=None): if pk is not None: @@ -118,7 +159,7 @@ def _update_index_walking(self, pk=None): else: items = self.all() - IW = {} +# IW = {} for item in items: clauses = [] params = [] @@ -138,9 +179,12 @@ def _update_index_walking(self, pk=None): sql = 'UPDATE %s SET %s = %s WHERE %s = %d' % (qn(self.model._meta.db_table), qn(self.vector_field.column), vector_sql, qn(self.model._meta.pk.column), item.pk) cursor = connection.cursor() cursor.execute(sql, tuple(params)) - transaction.set_dirty() + if NEW_DJANGO: + transaction.commit_unless_managed(using=fts_database) + else: + transaction.set_dirty() - @transaction.commit_on_success + #@transaction.commit_on_success def _update_index(self, pk=None): index_walking = False for field, weight in self._fields.items(): @@ -151,7 +195,47 @@ def _update_index(self, pk=None): self._update_index_walking(pk) else: self._update_index_update(pk) - +# + def _word_count(self, **kwargs): + + word_length = kwargs.get('word_length') + if word_length == None: + word_length = '1' + table = kwargs.get('table') + if table == None: + element_text = 'element_text' + clone = kwargs.get('clone') + if clone != None: + pre_query = str(clone.query) + pre_query = pre_query.replace('`','"') + else: + qs = clone.get_query_set() + pre_query = str(qs.query) + pre_query = pre_query.replace('`','"') + + sql = "\ + with etext as ({pre_query}),\ + words as ( \ + select lower(regexp_split_to_table({table} , E'\\\\W+')) as word \ + from etext \ + ), \ + word_lex as ( select word, count(*) as cnt,\ + to_tsvector('english', COALESCE(word,'')) as t \ + from words \ + group by 1 order by %s desc ) \ + select * from word_lex WHERE \ + t != '' and word !~ '[0-9]+' and length(word) > {wl} ".format(table=element_text,pre_query=pre_query,wl=word_length) + limit = kwargs.get('limit') + order_by = kwargs.get('order_by') + if limit != None: + sql += 'limit {limit}'.format(limit=limit) + if order_by != None: + sql = sql %(order_by) + else: + sql = sql %('cnt') + + return custom_sql(sql) + def _search(self, query, **kwargs): """ Returns a queryset after having applied the full-text search query. If rank_field @@ -175,6 +259,11 @@ def _search(self, query, **kwargs): order = ['-%s' % rank_field] return qs.extra(select=select, where=[where], order_by=order) +def custom_sql(sql): + cursor = connection.cursor() + cursor.execute(sql) + rows = cursor.fetchall() + return rows class SearchableModel(BaseModel): class Meta: @@ -183,3 +272,7 @@ class Meta: search_index = VectorField() objects = SearchManager() + + + + diff --git a/fts/settings.py b/fts/settings.py index e2aa426..0892291 100644 --- a/fts/settings.py +++ b/fts/settings.py @@ -1,4 +1,6 @@ from django.conf import settings FTS_BACKEND = getattr(settings, 'FTS_BACKEND', 'simple://') -FTS_CONFIGURE_ALL_BACKENDS = getattr(settings, 'FTS_CONFIGURE_ALL_BACKENDS', True) +FTS_CONFIGURE_ALL_BACKENDS = getattr(settings, 'FTS_CONFIGURE_ALL_BACKENDS', False) + +FTS_DATABASE = getattr(settings, 'FTS_DATABASE', 'default')