Mercurial > public > src > rhodecode
diff pylons_app/lib/indexers/daemon.py @ 474:28f19fa562df
updated config files,
Implemented content index extensions with whoosh,
fixed analyzer to match more words
author | Marcin Kuzminski <marcin@python-works.com> |
---|---|
date | Sat, 28 Aug 2010 14:53:32 +0200 |
parents | 9b67cebe6609 |
children | c59c4d4323e7 |
line wrap: on
line diff
--- a/pylons_app/lib/indexers/daemon.py Fri Aug 27 22:28:50 2010 +0200 +++ b/pylons_app/lib/indexers/daemon.py Sat Aug 28 14:53:32 2010 +0200 @@ -38,7 +38,7 @@ from pylons_app.model.hg_model import HgModel from whoosh.index import create_in, open_dir from shutil import rmtree -from pylons_app.lib.indexers import ANALYZER, EXCLUDE_EXTENSIONS, IDX_LOCATION, \ +from pylons_app.lib.indexers import ANALYZER, INDEX_EXTENSIONS, IDX_LOCATION, \ SCHEMA, IDX_NAME import logging @@ -70,8 +70,10 @@ def add_doc(self, writer, path, repo): """Adding doc to writer""" - #we don't won't to read excluded file extensions just index them - if path.split('/')[-1].split('.')[-1].lower() not in EXCLUDE_EXTENSIONS: + ext = unicode(path.split('/')[-1].split('.')[-1].lower()) + #we just index the content of choosen files + if ext in INDEX_EXTENSIONS: + log.debug(' >> %s [WITH CONTENT]' % path) fobj = open(path, 'rb') content = fobj.read() fobj.close() @@ -81,15 +83,20 @@ #incase we have a decode error just represent as byte string u_content = unicode(str(content).encode('string_escape')) else: - u_content = u'' + log.debug(' >> %s' % path) + #just index file name without it's content + u_content = u'' + writer.add_document(owner=unicode(repo.contact), repository=u"%s" % repo.name, path=u"%s" % path, content=u_content, - modtime=os.path.getmtime(path)) + modtime=os.path.getmtime(path), + extension=ext) def build_index(self): if os.path.exists(IDX_LOCATION): + log.debug('removing previos index') rmtree(IDX_LOCATION) if not os.path.exists(IDX_LOCATION): @@ -102,7 +109,6 @@ log.debug('building index @ %s' % repo.path) for idx_path in self.get_paths(repo.path): - log.debug(' >> %s' % idx_path) self.add_doc(writer, idx_path, repo) writer.commit(merge=True) @@ -170,11 +176,12 @@ self.update_index() if __name__ == "__main__": - repo_location = '/home/marcink/python_workspace_dirty/*' - + repo_location = '/home/marcink/hg_repos/*' + full_index = True # False means looking just for changes try: l = DaemonLock() - WhooshIndexingDaemon(repo_location=repo_location).run(full_index=True) + WhooshIndexingDaemon(repo_location=repo_location)\ + .run(full_index=full_index) l.release() except LockHeld: sys.exit(1)