Postgres Search

7 views
Skip to first unread message

Chip

unread,
Mar 24, 2010, 2:50:28 AM3/24/10
to snapboard-discuss
Hi, I'm Chip Black. I work for a backup company called SpiderOak, and
we've been using snapboard for our user support boards rather
successfully. Users demanded to be able to search snapboard, though,
so I modified it to use PostgreSQL's rather handy full-text search
capabilities. This does of course mean our solution only works when
you're using postgres on the backend, but I'm posting it in hopes that
it's useful to the community. It is a bit ugly in that it does direct
database queries instead of traipsing through django's delightful
models layer.

The change needs an index to be added to the snapboard_post table.
Technically this isn't strictly necessary, but without it search
queries are likely to be slow. It should look like this:

CREATE INDEX snapboard_post_search_idx ON snapboard_post USING
gin(to_tsvector('english'::regconfig, text));

Add this to TEMPLATE_DIRS in your settings.py:

os.path.join(PROJECT_DIR, 'search/templates'),

And 'search' to TEMPLATE_CONTEXT_PROCESSORS in the same. I'm pasting
the patch below because Google groups for some reason does not allow
files to be attached. Perhaps this can be done via email, but I don't
feel adventurous enough to try. Hopefully it comes through without
google doing something stupid like auto-wrapping.

Good luck, and thanks!

~chip

diff --git a/webpy/spideroak.com/forum/search/__init__.py b/webpy/
spideroak.com/forum/search/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/webpy/spideroak.com/forum/search/templates/search/
search.html b/webpy/spideroak.com/forum/search/templates/search/
search.html
new file mode 100644
index 0000000..39dfbe6
--- /dev/null
+++ b/webpy/spideroak.com/forum/search/templates/search/search.html
@@ -0,0 +1,53 @@
+{% extends "snapboard/base_forum.html" %}
+{% load i18n pagination_tags %}
+{% load extras %}
+
+{% block snapboard_main %}
+ <div class="index_title">
+ <div style="float: left">
+ <h2 style="font-size: 152%">Search Results: {{ q }}</h2>
+
+ {% ifequal page 1 %}
+ &lsaquo;&lsaquo; previous
+ {% else %}
+ <a href="{% url search_query %}?q={{ q|urlencode }}
&page={{ prevpage }}">&lsaquo;&lsaquo; previous</a>
+ {% endifequal %}
+
+ {% for p in pagelist %}
+ {% ifequal page p %}
+ {{ p }}
+ {% else %}
+ <a href="{% url search_query %}?q={{ q|urlencode }}
&page={{ p }}">{{ p }}</a>
+ {% endifequal %}
+ {% endfor %}
+
+ {% ifequal page pages %}
+ next &rsaquo;&rsaquo;
+ {% else %}
+ <a href="{% url search_query %}?q={{ q|urlencode }}
&page={{ nextpage }}">next &rsaquo;&rsaquo;</a>
+ {% endifequal %}
+ </div>
+
+ <div style="text-align: right">
+ Results {{ i_start }}-{{ i_end }} of {{ count }}<br />
+ </div>
+
+ <div style="clear: both"></div>
+ </div>
+
+ {% if results %}
+ {% for post in results %}
+ <div class="thread">
+ <h3><a href="{% url snapboard_locate_post post_id=post.id
%}/">{{ post.title }}</a></h3>
+ {% autoescape off %}
+ <p>{{ post.preview }}</p>
+ {% endautoescape %}
+ </div>
+ {% endfor %}
+ {% else %}
+ <p>{% trans "No results." %}</p>
+ {% endif %}
+{% endblock %}
+<!--
+vim: ai ts=4 sts=4 et sw=4
+-->
diff --git a/webpy/spideroak.com/forum/search/urls.py b/webpy/
spideroak.com/forum/search/urls.py
new file mode 100644
index 0000000..e4058bd
--- /dev/null
+++ b/webpy/spideroak.com/forum/search/urls.py
@@ -0,0 +1,7 @@
+from django.conf.urls.defaults import *
+
+from views import query
+
+urlpatterns = patterns('',
+ (r'^$', query, {}, 'search_query'),
+)
diff --git a/webpy/spideroak.com/forum/search/views.py b/webpy/
spideroak.com/forum/search/views.py
new file mode 100644
index 0000000..de40149
--- /dev/null
+++ b/webpy/spideroak.com/forum/search/views.py
@@ -0,0 +1,75 @@
+import sys
+from pyPgSQL import PgSQL
+import re
+from django.core.urlresolvers import reverse
+from django.http import HttpResponse, HttpResponseNotFound
+from django.shortcuts import render_to_response
+from django.template import RequestContext
+
+import django.db
+from snapboard.views import extra_processors
+from snapboard.models import Thread
+
+search_query = """SELECT id, thread_id, ts_headline(text, query,
'MinWords=20, MaxWords=50') AS preview FROM (
+ SELECT id, thread_id, text, query,
+ ts_rank_cd(to_tsvector('english', text), query, 16) AS
rank
+ FROM snapboard_post, plainto_tsquery('english', %s) query
+ WHERE revision_id IS NULL AND to_tsvector('english', text) @@
query
+ ORDER BY rank DESC LIMIT 10 OFFSET %s
+) AS foo;
+"""
+
+count_query = """SELECT count(*) FROM snapboard_post,
plainto_tsquery('english', %s) query
+ WHERE revision_id IS NULL AND to_tsvector('english', text) @@ query
+"""
+
+def query(request):
+ if not 'q' in request.GET:
+ return HttpResponseNotFound()
+
+ words = re.split(r'\s+', request.GET['q'])
+
+ try:
+ page = int(request.GET['page'])
+ except KeyError:
+ page = 1
+
+ query = ' & '.join(words)
+
+ c = django.db.connection.cursor()
+ c.execute(count_query, [query])
+ results = c.fetchall()
+ count = results[0][0]
+ start = page * 10 - 10
+ if page * 10 - 1 > count - 1:
+ end = count - 1
+ else:
+ end = page * 10 - 1
+
+ c.execute(search_query, [query, start])
+ results = c.fetchall()
+
+ rdict = {
+ 'results': [],
+ 'q': ' '.join(words),
+ 'i_start': start + 1,
+ 'i_end': end + 1,
+ 'count': count,
+ 'page': page,
+ 'nextpage': page + 1,
+ 'prevpage': page - 1,
+ 'pages': (count + 9) / 10,
+ }
+ rdict['pagelist'] = range(1, rdict['pages'] + 1)
+
+ for r in results:
+ t = Thread.objects.get(id=r[1])
+ rdict['results'].append({
+ 'id': r[0],
+ 'title': t.subject,
+ 'preview': r[2],
+ })
+
+ c.close()
+
+ return render_to_response('search/search.html', rdict,
context_instance=RequestContext(request, processors=extra_processors))
diff --git a/webpy/spideroak.com/forum/snapboard/models.py b/webpy/
spideroak.com/forum/snapboard/models.py
index 01ab7d2..5a816d9 100644
--- a/webpy/spideroak.com/forum/snapboard/models.py
+++ b/webpy/spideroak.com/forum/snapboard/models.py
@@ -274,7 +274,7 @@ class Thread(models.Model):
if not getattr(user, 'is_staff', False):
qs = qs.exclude(censor=True)
if before:
- qs.filter(date__lt=before.date)
+ qs = qs.filter(date__lt=before.date)
return qs.count()

class Post(models.Model):
diff --git a/webpy/spideroak.com/forum/snapboard/templates/snapboard/
include/side_panel.html b/webpy/spideroak.com/forum/snapboard/
templates/snapboard/include/side_panel.html
index 635f290..f1ca820 100644
--- a/webpy/spideroak.com/forum/snapboard/templates/snapboard/include/
side_panel.html
+++ b/webpy/spideroak.com/forum/snapboard/templates/snapboard/include/
side_panel.html
@@ -1,5 +1,12 @@
{% load i18n %}
<div class="forummenu-first">
+ <form method="GET" action="/forum/search/">
+ <input type="text" name="q" size="20">
+ <input type="submit" value="Search">
+ </form>
+</div>
+
+<div class="forummenu">
{% if user.is_authenticated %}
<p>
{# TODO: The next line used to be in a blocktrans before using
the url tag #}
@@ -46,8 +53,8 @@
</div>

{% if user.is_staff %}
- <br />
- <div class="forummenu">
+<br />
+<div class="forummenu">
<p><b>{% trans "Admin Shortcuts" %}</b></p>
<br />
<ul>
@@ -57,20 +64,20 @@
<li>&#187; <a href="/forum/admin/snapboard/thread/?
csticky__exact=1">{% trans "Category Sticky" %}</a></li>
<li>&#187; <a href="/forum/admin/snapboard/thread/?
gsticky__exact=1">{% trans "Global Sticky" %}</a></li>
</ul>
- </div>
+</div>
{% endif %}

{% if user.is_authenticated %}
- <!-- br />
- <div class="forummenu">
+<!-- br />
+<div class="forummenu">
<ul>
<li>&#187; <a href="#">Watched Topics<b></a> <br /> <a
href="#">(2 new!)</b></a></li>
<li>&#187; <a href="#">Private Posts<b></a> <br /> <a
href="#">(4 new!)</b></a></li>
</ul>
- </div -->
+</div -->

- <br />
- <div class="forummenu">
+<br />
+<div class="forummenu">
<p><b><a href="{% url snapboard_edit_settings %}">{% trans
"Forum Preferences" %}</a></b></p>
- </div>
+</div>
{% endif %}
diff --git a/webpy/spideroak.com/forum/urls.py b/webpy/spideroak.com/
forum/urls.py
index 4783f23..21cecfa 100644
--- a/webpy/spideroak.com/forum/urls.py
+++ b/webpy/spideroak.com/forum/urls.py
@@ -7,6 +7,7 @@ urlpatterns = patterns('',
(r'^forum/admin/(.*)', admin.site.root),
(r'^forum/accounts/', include('forum.registration.urls')),
(r'^forum/auth/', include('forum.spideroak_auth.urls')),
+ (r'^forum/search/', include('forum.search.urls')),
(r'^forum/', include('forum.snapboard.urls')),
(r'^forum/notices/', 'django.views.generic.simple.redirect_to',
{'url': '/forum/'}, 'notification_notices'),
)

Reply all
Reply to author
Forward
0 new messages