Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,5 @@ For installation instructions, see the file "INSTALL.txt" in this
directory; for instructions on how to use this application, and on
what it provides, see the file "overview.txt" in the "docs/"
directory.

change
26 changes: 20 additions & 6 deletions fts/backends/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,12 @@ def __call__(self, query=None, **kwargs):
def contribute_to_class(self, cls, name):
# Instances need to get to us to update their indexes.
search_managers = getattr(cls, '_search_managers', [])
search_managers.append(self)
if not isinstance(self.fields,type(None)):
search_managers.append(self)
setattr(cls, '_search_managers', search_managers)
super(BaseManager, self).contribute_to_class(cls, name)

if not self.fields:
self.fields = self._find_text_fields()

if isinstance(self.fields, (list, tuple)):
self._fields = {}
for field in self.fields:
Expand All @@ -66,6 +65,14 @@ def update_index(self, pk=None):
def search(self, query, **kwargs):
return self._search(query, **kwargs)

def _word_count(self, **kwargs):
raise NotImplementedError

def word_count(self, **kwargs):
query = None
return self._word_count(**kwargs)


def _find_text_fields(self):
"""
Return the names of all CharField and TextField fields defined for this manager's model.
Expand All @@ -82,7 +89,7 @@ class BaseModel(models.Model):
class Meta:
abstract = True

@transaction.commit_on_success
#@transaction.commit_on_success
def update_index(self):
"""
Update the index.
Expand All @@ -91,18 +98,25 @@ def update_index(self):
sm._update_index(pk=self.pk)

@classmethod
@transaction.commit_on_success
#@transaction.commit_on_success
def update_indexes(cls):
"""
Update the index.
"""
for sm in getattr(cls, '_search_managers', []):
sm._update_index(None)

@transaction.commit_on_success
#@transaction.commit_on_success
def save(self, *args, **kwargs):
update_index = kwargs.pop('update_index', True)
super(BaseModel, self).save(*args, **kwargs)
if update_index and getattr(self, '_auto_reindex', True):
for sm in getattr(self.__class__, '_search_managers', []):
sm._update_index(pk=self.pk)







131 changes: 112 additions & 19 deletions fts/backends/pgsql.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,36 @@
"Pgsql Fts backend"
import django
from django.db.utils import DatabaseError
DJANGO_VERSION = django.VERSION
from django.db import connection, transaction
from django.db.models.fields import FieldDoesNotExist

from django.db.models.fields import FieldDoesNotExist
from django.core.exceptions import FieldError
from fts.backends.base import InvalidFtsBackendError
from fts.backends.base import BaseClass, BaseModel, BaseManager
from django.conf import settings
from django.db import transaction
import sys

if (DJANGO_VERSION[0] <= 1) and (DJANGO_VERSION[1] <=2):
NEW_DJANGO = False
from django.db import connection
this_backend = settings.DATABASE_ENGINE
else:
NEW_DJANGO = True
from django.db import connections
try:
fts_database = settings.FTS_DATABASE
except AttributeError:
from ...fts import settings as fts_settings
fts_database = fts_settings.FTS_DATABASE
connection = connections[fts_database]
this_backend = settings.DATABASES[fts_database]['ENGINE']
try:
from south.modelsinspector import add_introspection_rules
add_introspection_rules([], ["^fts\.backends\.pgsql\.VectorField"])
except:
pass


qn = connection.ops.quote_name

Expand All @@ -29,6 +54,11 @@
'tr' : 'turkish',
}

def require_postgres(connection):
engine = connection.settings_dict['ENGINE']
if 'psycopg2' not in engine and 'postgis' not in engine:
raise FieldError("Vector fields are currently implemented only for PostgreSQL/psycopg2")

class VectorField(models.Field):
def __init__(self, *args, **kwargs):
kwargs['null'] = True
Expand All @@ -37,20 +67,16 @@ def __init__(self, *args, **kwargs):
super(VectorField, self).__init__(*args, **kwargs)

def db_type(self, connection=None):
return 'tsvector'
try:
require_postgres(connection)
return 'tsvector'
except:
return 'char(%s)' % 5

class SearchClass(BaseClass):
def __init__(self, server, params):
from django.conf import settings
if (DJANGO_VERSION[0] <= 1) and (DJANGO_VERSION[1] <=2):
if not settings.DATABASE_ENGINE in ['postgresql', 'postgresql_psycopg2']:
raise InvalidFtsBackendError("PostgreSQL with tsearch2 support is needed to use the pgsql FTS backend")
else:
databases = []
for database in settings.DATABASES.values():
databases.append(database['ENGINE'])
if not databases in ['postgresql', 'postgresql_psycopg2','django.db.backends.postgresql_psycopg2']:
raise InvalidFtsBackendError("PostgreSQL with tsearch2 support is needed to use the pgsql FTS backend")
if not this_backend in ['postgresql', 'postgresql_psycopg2','django.db.backends.postgresql_psycopg2']:
raise InvalidFtsBackendError("PostgreSQL with tsearch2 support is needed to use the pgsql FTS backend")
self.backend = 'pgsql'

class SearchManager(BaseManager):
Expand Down Expand Up @@ -83,9 +109,22 @@ def _vector_sql(self, field, weight):
try:
f = self.model._meta.get_field(field)
return ("setweight(to_tsvector('%s', coalesce(%s,'')), '%s')" % (self.language, qn(f.column), weight), [])
# print self.model.__dict__
# t_size = getattr(self.model, f.column)
# if sys.getsizeof(t_size) < 1048575:
# return ("setweight(to_tsvector('%s', coalesce(%s,'')), '%s')" % (self.language, qn(f.column), weight), [])
# else:
# a = t_size
# while sys.getsizeof(t_size) >= 1048575:
# a = a[:-1]
# setattr(self.model, a)
# return ("setweight(to_tsvector('%s', coalesce(%s,'')), '%s')" % (self.language, qn(a), weight), [])

except FieldDoesNotExist:
return ("setweight(to_tsvector('%s', %%s), '%s')" % (self.language, weight), [field])


#@transaction.commit_on_success
def _update_index_update(self, pk=None):
# Build a list of SQL clauses that generate tsvectors for each specified field.
clauses = []
Expand All @@ -95,7 +134,6 @@ def _update_index_update(self, pk=None):
clauses.append(v[0])
params.extend(v[1])
vector_sql = ' || '.join(clauses)

where = ''
# If one or more pks are specified, tack a WHERE clause onto the SQL.
if pk is not None:
Expand All @@ -107,7 +145,10 @@ def _update_index_update(self, pk=None):
sql = 'UPDATE %s SET %s = %s%s' % (qn(self.model._meta.db_table), qn(self.vector_field.column), vector_sql, where)
cursor = connection.cursor()
cursor.execute(sql, tuple(params))
transaction.set_dirty()
if NEW_DJANGO:
transaction.commit_unless_managed(using=fts_database)
else:
transaction.set_dirty()

def _update_index_walking(self, pk=None):
if pk is not None:
Expand All @@ -118,7 +159,7 @@ def _update_index_walking(self, pk=None):
else:
items = self.all()

IW = {}
# IW = {}
for item in items:
clauses = []
params = []
Expand All @@ -138,9 +179,12 @@ def _update_index_walking(self, pk=None):
sql = 'UPDATE %s SET %s = %s WHERE %s = %d' % (qn(self.model._meta.db_table), qn(self.vector_field.column), vector_sql, qn(self.model._meta.pk.column), item.pk)
cursor = connection.cursor()
cursor.execute(sql, tuple(params))
transaction.set_dirty()
if NEW_DJANGO:
transaction.commit_unless_managed(using=fts_database)
else:
transaction.set_dirty()

@transaction.commit_on_success
#@transaction.commit_on_success
def _update_index(self, pk=None):
index_walking = False
for field, weight in self._fields.items():
Expand All @@ -151,7 +195,47 @@ def _update_index(self, pk=None):
self._update_index_walking(pk)
else:
self._update_index_update(pk)

#
def _word_count(self, **kwargs):

word_length = kwargs.get('word_length')
if word_length == None:
word_length = '1'
table = kwargs.get('table')
if table == None:
element_text = 'element_text'
clone = kwargs.get('clone')
if clone != None:
pre_query = str(clone.query)
pre_query = pre_query.replace('`','"')
else:
qs = clone.get_query_set()
pre_query = str(qs.query)
pre_query = pre_query.replace('`','"')

sql = "\
with etext as ({pre_query}),\
words as ( \
select lower(regexp_split_to_table({table} , E'\\\\W+')) as word \
from etext \
), \
word_lex as ( select word, count(*) as cnt,\
to_tsvector('english', COALESCE(word,'')) as t \
from words \
group by 1 order by %s desc ) \
select * from word_lex WHERE \
t != '' and word !~ '[0-9]+' and length(word) > {wl} ".format(table=element_text,pre_query=pre_query,wl=word_length)
limit = kwargs.get('limit')
order_by = kwargs.get('order_by')
if limit != None:
sql += 'limit {limit}'.format(limit=limit)
if order_by != None:
sql = sql %(order_by)
else:
sql = sql %('cnt')

return custom_sql(sql)

def _search(self, query, **kwargs):
"""
Returns a queryset after having applied the full-text search query. If rank_field
Expand All @@ -175,6 +259,11 @@ def _search(self, query, **kwargs):
order = ['-%s' % rank_field]

return qs.extra(select=select, where=[where], order_by=order)
def custom_sql(sql):
cursor = connection.cursor()
cursor.execute(sql)
rows = cursor.fetchall()
return rows

class SearchableModel(BaseModel):
class Meta:
Expand All @@ -183,3 +272,7 @@ class Meta:
search_index = VectorField()

objects = SearchManager()




4 changes: 3 additions & 1 deletion fts/settings.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from django.conf import settings

FTS_BACKEND = getattr(settings, 'FTS_BACKEND', 'simple://')
FTS_CONFIGURE_ALL_BACKENDS = getattr(settings, 'FTS_CONFIGURE_ALL_BACKENDS', True)
FTS_CONFIGURE_ALL_BACKENDS = getattr(settings, 'FTS_CONFIGURE_ALL_BACKENDS', False)

FTS_DATABASE = getattr(settings, 'FTS_DATABASE', 'default')