From 1320422385c305f3e117cc2eda364b951c86f9e0 Mon Sep 17 00:00:00 2001 From: eire1130 Date: Fri, 8 Jun 2012 21:01:03 -0400 Subject: [PATCH 1/7] additional changes for multiple-db scenario --- README.txt | 2 ++ fts/backends/pgsql.py | 20 ++++++++++++++------ fts/settings.py | 2 ++ 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/README.txt b/README.txt index 900faff..5989273 100644 --- a/README.txt +++ b/README.txt @@ -16,3 +16,5 @@ For installation instructions, see the file "INSTALL.txt" in this directory; for instructions on how to use this application, and on what it provides, see the file "overview.txt" in the "docs/" directory. + +change \ No newline at end of file diff --git a/fts/backends/pgsql.py b/fts/backends/pgsql.py index adaf549..ac7a4c5 100644 --- a/fts/backends/pgsql.py +++ b/fts/backends/pgsql.py @@ -1,11 +1,23 @@ "Pgsql Fts backend" import django DJANGO_VERSION = django.VERSION -from django.db import connection, transaction +from django.db import connections, transaction from django.db.models.fields import FieldDoesNotExist from fts.backends.base import InvalidFtsBackendError from fts.backends.base import BaseClass, BaseModel, BaseManager +from django.conf import settings +try: + from south.modelsinspector import add_introspection_rules + add_introspection_rules([], ["^fts\.backends\.pgsql\.VectorField"]) +except: + pass + +try: + fts_database = settings.FTS_DATABASE +except AttributeError: + fts_database = 'default' +connection = connections[fts_database] qn = connection.ops.quote_name @@ -41,15 +53,11 @@ def db_type(self, connection=None): class SearchClass(BaseClass): def __init__(self, server, params): - from django.conf import settings if (DJANGO_VERSION[0] <= 1) and (DJANGO_VERSION[1] <=2): if not settings.DATABASE_ENGINE in ['postgresql', 'postgresql_psycopg2']: raise InvalidFtsBackendError("PostgreSQL with tsearch2 support is needed to use the pgsql FTS backend") else: - databases = [] - for database in settings.DATABASES.values(): - databases.append(database['ENGINE']) - if not databases in ['postgresql', 'postgresql_psycopg2','django.db.backends.postgresql_psycopg2']: + if not settings.DATABASES[fts_database]['ENGINE'] in ['postgresql', 'postgresql_psycopg2','django.db.backends.postgresql_psycopg2']: raise InvalidFtsBackendError("PostgreSQL with tsearch2 support is needed to use the pgsql FTS backend") self.backend = 'pgsql' diff --git a/fts/settings.py b/fts/settings.py index e2aa426..0156591 100644 --- a/fts/settings.py +++ b/fts/settings.py @@ -2,3 +2,5 @@ FTS_BACKEND = getattr(settings, 'FTS_BACKEND', 'simple://') FTS_CONFIGURE_ALL_BACKENDS = getattr(settings, 'FTS_CONFIGURE_ALL_BACKENDS', True) + +FTS_DATABASE = 'default' From 904d5cd1bfa1a04fc0534577a63ecfd830923eff Mon Sep 17 00:00:00 2001 From: eire1130 Date: Sat, 9 Jun 2012 08:52:37 -0400 Subject: [PATCH 2/7] more changes for django 1.3+ and multiple db support. --- fts/backends/pgsql.py | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/fts/backends/pgsql.py b/fts/backends/pgsql.py index ac7a4c5..9dae883 100644 --- a/fts/backends/pgsql.py +++ b/fts/backends/pgsql.py @@ -1,23 +1,30 @@ "Pgsql Fts backend" import django DJANGO_VERSION = django.VERSION -from django.db import connections, transaction + from django.db.models.fields import FieldDoesNotExist from fts.backends.base import InvalidFtsBackendError from fts.backends.base import BaseClass, BaseModel, BaseManager from django.conf import settings +from django.db import transaction +if (DJANGO_VERSION[0] <= 1) and (DJANGO_VERSION[1] <=2): + from django.db import connection + this_backend = settings.DATABASE_ENGINE +else: + from django.db import connections + try: + fts_database = settings.FTS_DATABASE + except AttributeError: + fts_database = 'default' + connection = connections[fts_database] + this_backend = settings.DATABASES[fts_database]['ENGINE'] try: from south.modelsinspector import add_introspection_rules add_introspection_rules([], ["^fts\.backends\.pgsql\.VectorField"]) except: pass -try: - fts_database = settings.FTS_DATABASE -except AttributeError: - fts_database = 'default' -connection = connections[fts_database] qn = connection.ops.quote_name @@ -53,12 +60,12 @@ def db_type(self, connection=None): class SearchClass(BaseClass): def __init__(self, server, params): - if (DJANGO_VERSION[0] <= 1) and (DJANGO_VERSION[1] <=2): - if not settings.DATABASE_ENGINE in ['postgresql', 'postgresql_psycopg2']: - raise InvalidFtsBackendError("PostgreSQL with tsearch2 support is needed to use the pgsql FTS backend") - else: - if not settings.DATABASES[fts_database]['ENGINE'] in ['postgresql', 'postgresql_psycopg2','django.db.backends.postgresql_psycopg2']: - raise InvalidFtsBackendError("PostgreSQL with tsearch2 support is needed to use the pgsql FTS backend") +# if (DJANGO_VERSION[0] <= 1) and (DJANGO_VERSION[1] <=2): + if not this_backend in ['postgresql', 'postgresql_psycopg2','django.db.backends.postgresql_psycopg2']: + raise InvalidFtsBackendError("PostgreSQL with tsearch2 support is needed to use the pgsql FTS backend") +# else: +# if not settings.DATABASES[fts_database]['ENGINE'] in ['postgresql', 'postgresql_psycopg2','django.db.backends.postgresql_psycopg2']: +# raise InvalidFtsBackendError("PostgreSQL with tsearch2 support is needed to use the pgsql FTS backend") self.backend = 'pgsql' class SearchManager(BaseManager): @@ -98,6 +105,7 @@ def _update_index_update(self, pk=None): # Build a list of SQL clauses that generate tsvectors for each specified field. clauses = [] params = [] +# print self._field for field, weight in self._fields.items(): v = self._vector_sql(field, weight) clauses.append(v[0]) From bcf6a468ba1149c3083988e1ed148b184c91c883 Mon Sep 17 00:00:00 2001 From: eire1130 Date: Sat, 9 Jun 2012 23:12:23 -0400 Subject: [PATCH 3/7] fixes the issue with 1.3+ saving and inserting. Also there was some issue with two managers being instantiated, the base.py change hopefully fixes that (hackish) --- fts/backends/base.py | 19 +++++++++++++------ fts/backends/pgsql.py | 28 ++++++++++++++++------------ fts/settings.py | 4 ++-- 3 files changed, 31 insertions(+), 20 deletions(-) diff --git a/fts/backends/base.py b/fts/backends/base.py index 69ee100..318627c 100644 --- a/fts/backends/base.py +++ b/fts/backends/base.py @@ -36,13 +36,12 @@ def __call__(self, query=None, **kwargs): def contribute_to_class(self, cls, name): # Instances need to get to us to update their indexes. search_managers = getattr(cls, '_search_managers', []) - search_managers.append(self) + if not isinstance(self.fields,type(None)): + search_managers.append(self) setattr(cls, '_search_managers', search_managers) super(BaseManager, self).contribute_to_class(cls, name) - if not self.fields: self.fields = self._find_text_fields() - if isinstance(self.fields, (list, tuple)): self._fields = {} for field in self.fields: @@ -82,7 +81,7 @@ class BaseModel(models.Model): class Meta: abstract = True - @transaction.commit_on_success + #@transaction.commit_on_success def update_index(self): """ Update the index. @@ -91,7 +90,7 @@ def update_index(self): sm._update_index(pk=self.pk) @classmethod - @transaction.commit_on_success + #@transaction.commit_on_success def update_indexes(cls): """ Update the index. @@ -99,10 +98,18 @@ def update_indexes(cls): for sm in getattr(cls, '_search_managers', []): sm._update_index(None) - @transaction.commit_on_success + #@transaction.commit_on_success def save(self, *args, **kwargs): update_index = kwargs.pop('update_index', True) super(BaseModel, self).save(*args, **kwargs) if update_index and getattr(self, '_auto_reindex', True): for sm in getattr(self.__class__, '_search_managers', []): sm._update_index(pk=self.pk) + + + + + + + + diff --git a/fts/backends/pgsql.py b/fts/backends/pgsql.py index 9dae883..213e404 100644 --- a/fts/backends/pgsql.py +++ b/fts/backends/pgsql.py @@ -9,14 +9,17 @@ from django.conf import settings from django.db import transaction if (DJANGO_VERSION[0] <= 1) and (DJANGO_VERSION[1] <=2): + NEW_DJANGO = False from django.db import connection this_backend = settings.DATABASE_ENGINE else: + NEW_DJANGO = True from django.db import connections try: fts_database = settings.FTS_DATABASE except AttributeError: - fts_database = 'default' + from ...fts import settings as fts_settings + fts_database = fts_settings.FTS_DATABASE connection = connections[fts_database] this_backend = settings.DATABASES[fts_database]['ENGINE'] try: @@ -60,12 +63,8 @@ def db_type(self, connection=None): class SearchClass(BaseClass): def __init__(self, server, params): -# if (DJANGO_VERSION[0] <= 1) and (DJANGO_VERSION[1] <=2): if not this_backend in ['postgresql', 'postgresql_psycopg2','django.db.backends.postgresql_psycopg2']: raise InvalidFtsBackendError("PostgreSQL with tsearch2 support is needed to use the pgsql FTS backend") -# else: -# if not settings.DATABASES[fts_database]['ENGINE'] in ['postgresql', 'postgresql_psycopg2','django.db.backends.postgresql_psycopg2']: -# raise InvalidFtsBackendError("PostgreSQL with tsearch2 support is needed to use the pgsql FTS backend") self.backend = 'pgsql' class SearchManager(BaseManager): @@ -84,7 +83,7 @@ def _vector_field(self): vectors = [f for f in self.model._meta.fields if isinstance(f, VectorField)] if len(vectors) != 1: - raise ValueError('There must be exactly 1 VectorField defined for the %s model.' % self.model._meta.object_name) + raise ValueError('There must be exactly one VectorField defined for the %s model.' % self.model._meta.object_name) self._vector_field_cache = vectors[0] @@ -100,18 +99,17 @@ def _vector_sql(self, field, weight): return ("setweight(to_tsvector('%s', coalesce(%s,'')), '%s')" % (self.language, qn(f.column), weight), []) except FieldDoesNotExist: return ("setweight(to_tsvector('%s', %%s), '%s')" % (self.language, weight), [field]) - + + @transaction.commit_on_success def _update_index_update(self, pk=None): # Build a list of SQL clauses that generate tsvectors for each specified field. clauses = [] params = [] -# print self._field for field, weight in self._fields.items(): v = self._vector_sql(field, weight) clauses.append(v[0]) params.extend(v[1]) vector_sql = ' || '.join(clauses) - where = '' # If one or more pks are specified, tack a WHERE clause onto the SQL. if pk is not None: @@ -123,7 +121,10 @@ def _update_index_update(self, pk=None): sql = 'UPDATE %s SET %s = %s%s' % (qn(self.model._meta.db_table), qn(self.vector_field.column), vector_sql, where) cursor = connection.cursor() cursor.execute(sql, tuple(params)) - transaction.set_dirty() + if NEW_DJANGO: + transaction.commit_unless_managed(using=fts_database) + else: + transaction.set_dirty() def _update_index_walking(self, pk=None): if pk is not None: @@ -134,7 +135,7 @@ def _update_index_walking(self, pk=None): else: items = self.all() - IW = {} +# IW = {} for item in items: clauses = [] params = [] @@ -154,7 +155,10 @@ def _update_index_walking(self, pk=None): sql = 'UPDATE %s SET %s = %s WHERE %s = %d' % (qn(self.model._meta.db_table), qn(self.vector_field.column), vector_sql, qn(self.model._meta.pk.column), item.pk) cursor = connection.cursor() cursor.execute(sql, tuple(params)) - transaction.set_dirty() + if NEW_DJANGO: + transaction.commit_unless_managed(using=fts_database) + else: + transaction.set_dirty() @transaction.commit_on_success def _update_index(self, pk=None): diff --git a/fts/settings.py b/fts/settings.py index 0156591..0892291 100644 --- a/fts/settings.py +++ b/fts/settings.py @@ -1,6 +1,6 @@ from django.conf import settings FTS_BACKEND = getattr(settings, 'FTS_BACKEND', 'simple://') -FTS_CONFIGURE_ALL_BACKENDS = getattr(settings, 'FTS_CONFIGURE_ALL_BACKENDS', True) +FTS_CONFIGURE_ALL_BACKENDS = getattr(settings, 'FTS_CONFIGURE_ALL_BACKENDS', False) -FTS_DATABASE = 'default' +FTS_DATABASE = getattr(settings, 'FTS_DATABASE', 'default') From e210470ffb62c9c7444c8f6a18a0d2950c78cf4b Mon Sep 17 00:00:00 2001 From: eire1130 Date: Mon, 11 Jun 2012 18:11:49 -0400 Subject: [PATCH 4/7] VectorField will return a simple char if the connection to it is other than postgres. This is to keep south from freaking out during a multiple db scenario. --- fts/backends/pgsql.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/fts/backends/pgsql.py b/fts/backends/pgsql.py index 213e404..de7724a 100644 --- a/fts/backends/pgsql.py +++ b/fts/backends/pgsql.py @@ -3,7 +3,7 @@ DJANGO_VERSION = django.VERSION from django.db.models.fields import FieldDoesNotExist - +from django.core.exceptions import FieldError from fts.backends.base import InvalidFtsBackendError from fts.backends.base import BaseClass, BaseModel, BaseManager from django.conf import settings @@ -51,6 +51,11 @@ 'tr' : 'turkish', } +def require_postgres(connection): + engine = connection.settings_dict['ENGINE'] + if 'psycopg2' not in engine and 'postgis' not in engine: + raise FieldError("Vector fields are currently implemented only for PostgreSQL/psycopg2") + class VectorField(models.Field): def __init__(self, *args, **kwargs): kwargs['null'] = True @@ -59,7 +64,11 @@ def __init__(self, *args, **kwargs): super(VectorField, self).__init__(*args, **kwargs) def db_type(self, connection=None): - return 'tsvector' + try: + require_postgres(connection) + return 'tsvector' + except: + return 'char(%s)' % 5 class SearchClass(BaseClass): def __init__(self, server, params): From 774a176a62fa0721b028f5b4748cd8b4658b4845 Mon Sep 17 00:00:00 2001 From: eire1130 Date: Tue, 31 Jul 2012 17:02:39 -0400 Subject: [PATCH 5/7] added word_count --- fts/backends/base.py | 13 ++++++-- fts/backends/pgsql.py | 70 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 76 insertions(+), 7 deletions(-) diff --git a/fts/backends/base.py b/fts/backends/base.py index 318627c..e3377bd 100644 --- a/fts/backends/base.py +++ b/fts/backends/base.py @@ -65,6 +65,14 @@ def update_index(self, pk=None): def search(self, query, **kwargs): return self._search(query, **kwargs) + def _word_count(self, **kwargs): + raise NotImplementedError + + def word_count(self, **kwargs): + query = None + return self._word_count(**kwargs) + + def _find_text_fields(self): """ Return the names of all CharField and TextField fields defined for this manager's model. @@ -107,9 +115,8 @@ def save(self, *args, **kwargs): sm._update_index(pk=self.pk) - - - + + diff --git a/fts/backends/pgsql.py b/fts/backends/pgsql.py index de7724a..2ed894a 100644 --- a/fts/backends/pgsql.py +++ b/fts/backends/pgsql.py @@ -1,5 +1,6 @@ "Pgsql Fts backend" import django +from django.db.utils import DatabaseError DJANGO_VERSION = django.VERSION from django.db.models.fields import FieldDoesNotExist @@ -7,7 +8,9 @@ from fts.backends.base import InvalidFtsBackendError from fts.backends.base import BaseClass, BaseModel, BaseManager from django.conf import settings -from django.db import transaction +from django.db import transaction +import sys + if (DJANGO_VERSION[0] <= 1) and (DJANGO_VERSION[1] <=2): NEW_DJANGO = False from django.db import connection @@ -92,7 +95,7 @@ def _vector_field(self): vectors = [f for f in self.model._meta.fields if isinstance(f, VectorField)] if len(vectors) != 1: - raise ValueError('There must be exactly one VectorField defined for the %s model.' % self.model._meta.object_name) + raise ValueError('There must be exactly 1 VectorField defined for the %s model.' % self.model._meta.object_name) self._vector_field_cache = vectors[0] @@ -105,9 +108,19 @@ def _vector_sql(self, field, weight): """ try: f = self.model._meta.get_field(field) - return ("setweight(to_tsvector('%s', coalesce(%s,'')), '%s')" % (self.language, qn(f.column), weight), []) + t_size = getattr(self.model, f.column) + if sys.getsizeof(t_size) < 1048575: + return ("setweight(to_tsvector('%s', coalesce(%s,'')), '%s')" % (self.language, qn(f.column), weight), []) + else: + a = t_size + while sys.getsizeof(t_size) >= 1048575: + a = a[:-1] + setattr(self.model, a) + return ("setweight(to_tsvector('%s', coalesce(%s,'')), '%s')" % (self.language, qn(a), weight), []) + except FieldDoesNotExist: return ("setweight(to_tsvector('%s', %%s), '%s')" % (self.language, weight), [field]) + @transaction.commit_on_success def _update_index_update(self, pk=None): @@ -180,7 +193,47 @@ def _update_index(self, pk=None): self._update_index_walking(pk) else: self._update_index_update(pk) - +# + def _word_count(self, **kwargs): + + word_length = kwargs.get('word_length') + if word_length == None: + word_length = '1' + table = kwargs.get('table') + if table == None: + element_text = 'element_text' + clone = kwargs.get('clone') + if clone: + pre_query = str(clone.query) + pre_query = pre_query.replace('`','"') + else: + qs = clone.get_query_set() + pre_query = str(qs.query) + pre_query = pre_query.replace('`','"') + + sql = "\ + with etext as ({pre_query}),\ + words as ( \ + select lower(regexp_split_to_table({table} , E'\\\\W+')) as word \ + from etext \ + ), \ + word_lex as ( select word, count(*) as cnt,\ + to_tsvector('english', COALESCE(word,'')) as t \ + from words \ + group by 1 order by %s desc ) \ + select * from word_lex WHERE \ + t != '' and word !~ '[0-9]+' and length(word) > {wl} ".format(table=table,pre_query=pre_query,wl=word_length) + limit = kwargs.get('limit') + order_by = kwargs.get('order_by') + if limit != None: + sql += 'limit {limit}'.format(limit=limit) + if order_by != None: + sql = sql %(order_by) + else: + sql = sql %('cnt') + + return custom_sql(sql) + def _search(self, query, **kwargs): """ Returns a queryset after having applied the full-text search query. If rank_field @@ -204,6 +257,11 @@ def _search(self, query, **kwargs): order = ['-%s' % rank_field] return qs.extra(select=select, where=[where], order_by=order) +def custom_sql(sql): + cursor = connection.cursor() + cursor.execute(sql) + rows = cursor.fetchall() + return rows class SearchableModel(BaseModel): class Meta: @@ -212,3 +270,7 @@ class Meta: search_index = VectorField() objects = SearchManager() + + + + From a62db93a40d7be214445e3b936cfcb5d529abaf7 Mon Sep 17 00:00:00 2001 From: eire1130 Date: Tue, 31 Jul 2012 17:39:11 -0400 Subject: [PATCH 6/7] small fix for table --- fts/backends/pgsql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fts/backends/pgsql.py b/fts/backends/pgsql.py index 2ed894a..475a439 100644 --- a/fts/backends/pgsql.py +++ b/fts/backends/pgsql.py @@ -222,7 +222,7 @@ def _word_count(self, **kwargs): from words \ group by 1 order by %s desc ) \ select * from word_lex WHERE \ - t != '' and word !~ '[0-9]+' and length(word) > {wl} ".format(table=table,pre_query=pre_query,wl=word_length) + t != '' and word !~ '[0-9]+' and length(word) > {wl} ".format(table=element_text,pre_query=pre_query,wl=word_length) limit = kwargs.get('limit') order_by = kwargs.get('order_by') if limit != None: From 31cbbe6bf92dfa3a4256f84501831c492f2d592e Mon Sep 17 00:00:00 2001 From: eire1130 Date: Wed, 1 Aug 2012 18:38:57 -0400 Subject: [PATCH 7/7] fix for backend --- fts/backends/pgsql.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/fts/backends/pgsql.py b/fts/backends/pgsql.py index 475a439..cecddd9 100644 --- a/fts/backends/pgsql.py +++ b/fts/backends/pgsql.py @@ -108,21 +108,23 @@ def _vector_sql(self, field, weight): """ try: f = self.model._meta.get_field(field) - t_size = getattr(self.model, f.column) - if sys.getsizeof(t_size) < 1048575: - return ("setweight(to_tsvector('%s', coalesce(%s,'')), '%s')" % (self.language, qn(f.column), weight), []) - else: - a = t_size - while sys.getsizeof(t_size) >= 1048575: - a = a[:-1] - setattr(self.model, a) - return ("setweight(to_tsvector('%s', coalesce(%s,'')), '%s')" % (self.language, qn(a), weight), []) + return ("setweight(to_tsvector('%s', coalesce(%s,'')), '%s')" % (self.language, qn(f.column), weight), []) +# print self.model.__dict__ +# t_size = getattr(self.model, f.column) +# if sys.getsizeof(t_size) < 1048575: +# return ("setweight(to_tsvector('%s', coalesce(%s,'')), '%s')" % (self.language, qn(f.column), weight), []) +# else: +# a = t_size +# while sys.getsizeof(t_size) >= 1048575: +# a = a[:-1] +# setattr(self.model, a) +# return ("setweight(to_tsvector('%s', coalesce(%s,'')), '%s')" % (self.language, qn(a), weight), []) except FieldDoesNotExist: return ("setweight(to_tsvector('%s', %%s), '%s')" % (self.language, weight), [field]) - @transaction.commit_on_success + #@transaction.commit_on_success def _update_index_update(self, pk=None): # Build a list of SQL clauses that generate tsvectors for each specified field. clauses = [] @@ -182,7 +184,7 @@ def _update_index_walking(self, pk=None): else: transaction.set_dirty() - @transaction.commit_on_success + #@transaction.commit_on_success def _update_index(self, pk=None): index_walking = False for field, weight in self._fields.items(): @@ -203,7 +205,7 @@ def _word_count(self, **kwargs): if table == None: element_text = 'element_text' clone = kwargs.get('clone') - if clone: + if clone != None: pre_query = str(clone.query) pre_query = pre_query.replace('`','"') else: